59 #define PROG_NAME "delve"
60 #define PROG_DESC "Inspect the contents of a Xapian database"
63 cout <<
"Usage: " PROG_NAME " [OPTIONS] DATABASE...\n\n"
65 " -a show all terms in the database\n"
66 " -A <prefix> show all terms in the database with given prefix\n"
67 " -r <recno> for term list(s)\n"
68 " -t <term> for posting list(s)\n"
69 " -t <term> -r <recno> for position list(s)\n"
70 " -s, --stemmer=LANG set the stemming language, the default is 'none'\n"
71 " -1 output one list entry per line\n"
72 " -V[<type>]<valueno> output value valueno for each document referred to\n"
73 " (or each document in the database if no -r options).\n"
75 " E: escape in a C-like way (default)\n"
76 " I: decode as a packed integer\n"
77 " R: show the raw value (which may contain binary data,\n"
78 " newlines, invalid UTF-8, etc)\n"
79 " S: decode using Xapian::sortable_unserialise()\n"
80 " -V[<type>] output all values for each document referred to.\n"
81 " <type> is as above.\n"
82 " -d output document data for each document referred to\n"
83 " -z for db, count documents with length 0\n"
84 " -v extra info (wdf and len for postlist;\n"
85 " wdf and termfreq for termlist; number of terms for db;\n"
86 " termfreq when showing all terms; value bounds and freq\n"
87 " when showing all values in a slot)\n"
88 " -vv even more info (also show collection freq and wdf\n"
89 " upper bound for terms)\n"
90 " --help display this help and exit\n"
91 " --version output version information and exit\n";
98 cout <<
"UUID = " << db.
get_uuid() <<
'\n';
99 cout <<
"number of documents = " << db.
get_doccount() <<
'\n';
100 cout <<
"average document length = " << db.
get_avlength() <<
'\n';
105 cout <<
"highest document id ever used = " << db.
get_lastdocid() <<
'\n';
107 cout <<
"has positional information = " << db.
has_positions() <<
'\n';
108 cout <<
"revision = ";
110 cout <<
"N/A (sharded DB)\n";
117 cout <<
"N/A (" << e.
get_msg() <<
")\n";
120 cout <<
"currently open for writing = ";
122 cout << db.
locked() <<
'\n';
140 cout <<
"number of zero-length documents = " << empty_docs <<
'\n';
153 cout <<
"number of distinct terms = " << terms <<
'\n';
171 unsigned long long i = 0;
172 for (
unsigned char ch : value) {
198 vector<docid>::const_iterator i,
199 vector<docid>::const_iterator end)
202 cout <<
"Values for record #" << *i <<
':';
211 vector<docid>::const_iterator i,
212 vector<docid>::const_iterator end,
217 cout <<
"Value " << slot <<
" for record #" << did <<
": ";
232 vector<docid>::const_iterator i,
233 vector<docid>::const_iterator end)
236 cout <<
"Data for record #" << *i <<
":\n";
244 const char * all_pfx = NULL)
250 cout <<
"All terms in database";
252 cout <<
" with prefix \"" << all_pfx <<
"\"";
256 cout <<
"Term List for record #" << did;
264 cout <<
", collection freq, wdf upper bound";
270 const string &
term = *t;
288 vector<docid>::const_iterator i,
289 vector<docid>::const_iterator end)
299 main(
int argc,
char **argv)
try {
300 if (argc > 1 && argv[1][0] ==
'-') {
301 if (strcmp(argv[1],
"--help") == 0) {
306 if (strcmp(argv[1],
"--version") == 0) {
312 const char * all_terms = NULL;
313 vector<docid> recnos;
314 vector<string> terms;
319 bool slot_set =
false;
322 while ((c =
gnu_getopt(argc, argv,
"aA:r:t:s:1vV::dz")) != -1) {
333 unsigned long n = strtoul(
optarg, &end, 10);
334 if (
optarg == end || *end) {
335 cout <<
"Non-numeric document id: " <<
optarg <<
'\n';
339 if (errno == ERANGE || n == 0 || did != n) {
340 cout <<
"Document id out of range: " <<
optarg <<
'\n';
343 recnos.push_back(did);
377 unsigned long n = strtoul(
optarg, &end, 10);
378 if (
optarg == end || *end) {
379 cout <<
"Non-numeric value slot: " <<
optarg <<
'\n';
383 if (errno == ERANGE || slot != n) {
384 cout <<
"Value slot out of range: " <<
optarg <<
'\n';
418 vector<string>::const_iterator i;
419 for (i = dbs.begin(); i != dbs.end(); ++i) {
422 }
catch (
const Error &e) {
423 cerr <<
"Error opening database '" << *i <<
"': ";
430 if (!all_terms && terms.empty() && recnos.empty() && !slot_set) {
440 if (!recnos.empty()) {
443 }
else if (slot_set) {
444 show_value(db, recnos.begin(), recnos.end(), slot);
452 cout <<
"Value " << slot;
454 cout <<
" (lower bound=";
456 cout <<
" upper bound=";
460 cout <<
" for each document:";
476 vector<string>::const_iterator i;
477 for (i = terms.begin(); i != terms.end(); ++i) {
482 cout <<
"term '" <<
term <<
"' not in database\n";
485 if (recnos.empty()) {
487 cout <<
"Posting List for term '" <<
term <<
"' (termfreq "
494 cout <<
' ' <<
p.get_wdf() <<
' ' <<
p.get_doclength();
503 vector<docid>::const_iterator j;
504 for (j = recnos.begin(); j != recnos.end(); ++j) {
506 if (
p == pend || *
p != *j) {
507 cout <<
"term '" <<
term <<
508 "' doesn't index document #" << *j <<
'\n';
510 cout <<
"Position List for term '" <<
term
511 <<
"', record #" << *j <<
':';
514 while (
pos !=
p.positionlist_end()) {
519 }
catch (
const Error &e) {
526 }
catch (
const Error &e) {
An indexed database of documents.
Xapian::rev get_revision() const
Get the revision of the database.
ValueIterator valuestream_begin(Xapian::valueno slot) const
Return an iterator over the value in slot slot for each document.
Xapian::doccount get_termfreq(std::string_view term) const
Get the number of documents indexed by a specified term.
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
PostingIterator postlist_begin(std::string_view term) const
Start iterating the postings of a term.
bool locked() const
Test if this database is currently locked for writing.
TermIterator termlist_begin(Xapian::docid did) const
Start iterating the terms in a document.
double get_avlength() const
Old name for get_average_length() for backward compatibility.
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
Get an upper bound on the wdf of term term.
size_t size() const
Return number of shards in this Database object.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
void add_database(const Database &other)
Add shards from another Database.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
TermIterator allterms_end(std::string_view={}) const noexcept
End iterator corresponding to allterms_begin(prefix).
bool has_positions() const
Does this database have any positional information?
Xapian::termcount get_collection_freq(std::string_view term) const
Get the total number of occurrences of a specified term.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
PostingIterator postlist_end(std::string_view) const noexcept
End iterator corresponding to postlist_begin().
TermIterator termlist_end(Xapian::docid) const noexcept
End iterator corresponding to termlist_begin().
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
TermIterator allterms_begin(std::string_view prefix={}) const
Start iterating all terms in the database with a given prefix.
ValueIterator valuestream_end(Xapian::valueno) const noexcept
Return end iterator corresponding to valuestream_begin().
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
std::string get_uuid() const
Get the UUID for the database.
Class representing a document.
std::string get_data() const
Get the document data.
ValueIterator values_begin() const
Start iterating the values in this document.
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
ValueIterator values_end() const noexcept
End iterator corresponding to values_begin().
All exceptions thrown by Xapian are subclasses of Xapian::Error.
const std::string & get_msg() const noexcept
Message giving details of the error, intended for human consumption.
std::string get_description() const
Return a string describing this object.
InvalidOperationError indicates the API was used in an invalid way.
virtual bool skip_to(Xapian::termpos termpos)=0
Skip forward to the specified position.
Class for iterating over term positions.
Class for iterating over a list of terms.
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Class representing a stemming algorithm.
Class for iterating over a list of terms.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
UnimplementedError indicates an attempt to use an unimplemented feature.
Class for iterating over document values.
Xapian::docid get_docid() const
Return the docid at the current position.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
Append a string to an object description, escaping invalid UTF-8.
Append a string to an object description, escaping invalid UTF-8.
Wrappers to allow GNU getopt to be used cleanly from C++ code.
int gnu_getopt(int argc_, char *const *argv_, const char *shortopts_)
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
double sortable_unserialise(std::string_view serialised) noexcept
Convert a string encoded using sortable_serialise back to a floating point number.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
static Xapian::Stem stemmer
void description_append(std::string &desc, std::string_view s)
static void show_db_stats(Database &db)
int main(int argc, char **argv)
static enum @9 value_decode
static void decode_and_show_value(const string &value)
static void show_value(Database &db, vector< docid >::const_iterator i, vector< docid >::const_iterator end, Xapian::valueno slot)
@ VALUE_SORTABLE_SERIALISE
static void show_termlist(const Database &db, Xapian::docid did, const char *all_pfx=NULL)
static void show_values(Database &db, docid docid, char sep)
static void show_docdata(Database &db, docid docid, char sep)
static bool count_zero_length_docs
static void show_termlists(Database &db, vector< docid >::const_iterator i, vector< docid >::const_iterator end)
Public interfaces for the Xapian library.