60 #define PROG_NAME "delve" 61 #define PROG_DESC "Inspect the contents of a Xapian database" 64 cout <<
"Usage: " PROG_NAME " [OPTIONS] DATABASE...\n\n" 66 " -a show all terms in the database\n" 67 " -A <prefix> show all terms in the database with given prefix\n" 68 " -r <recno> for term list(s)\n" 69 " -t <term> for posting list(s)\n" 70 " -t <term> -r <recno> for position list(s)\n" 71 " -s, --stemmer=LANG set the stemming language, the default is 'none'\n" 72 " -1 output one list entry per line\n" 73 " -V[<type>]<valueno> output value valueno for each document referred to\n" 74 " (or each document in the database if no -r options).\n" 76 " E: escape in a C-like way (default)\n" 77 " I: decode as a packed integer\n" 78 " R: show the raw value (which may contain binary data,\n" 79 " newlines, invalid UTF-8, etc)\n" 80 " S: decode using Xapian::sortable_unserialise()\n" 81 " -V[<type>] output all values for each document referred to.\n" 82 " <type> is as above.\n" 83 " -d output document data for each document referred to\n" 84 " -z for db, count documents with length 0\n" 85 " -v extra info (wdf and len for postlist;\n" 86 " wdf and termfreq for termlist; number of terms for db;\n" 87 " termfreq when showing all terms; value bounds and freq\n" 88 " when showing all values in a slot)\n" 89 " -vv even more info (also show collection freq and wdf\n" 90 " upper bound for terms)\n" 91 " --help display this help and exit\n" 92 " --version output version information and exit\n";
99 cout <<
"UUID = " << db.
get_uuid() <<
'\n';
100 cout <<
"number of documents = " << db.
get_doccount() <<
'\n';
101 cout <<
"average document length = " << db.
get_avlength() <<
'\n';
106 cout <<
"highest document id ever used = " << db.
get_lastdocid() <<
'\n';
108 cout <<
"has positional information = " << db.
has_positions() <<
'\n';
109 cout <<
"revision = ";
111 cout <<
"N/A (sharded DB)\n";
118 cout <<
"N/A (" << e.
get_msg() <<
")\n";
121 cout <<
"currently open for writing = ";
123 cout << db.
locked() <<
'\n';
141 cout <<
"number of zero-length documents = " << empty_docs <<
'\n';
154 cout <<
"number of distinct terms = " << terms <<
'\n';
172 unsigned long long i = 0;
173 for (
unsigned char ch : value) {
199 vector<docid>::const_iterator i,
200 vector<docid>::const_iterator end)
203 cout <<
"Values for record #" << *i <<
':';
212 vector<docid>::const_iterator i,
213 vector<docid>::const_iterator end,
218 cout <<
"Value " << slot <<
" for record #" << did <<
": ";
233 vector<docid>::const_iterator i,
234 vector<docid>::const_iterator end)
237 cout <<
"Data for record #" << *i <<
":\n";
245 const char * all_pfx = NULL)
251 cout <<
"All terms in database";
253 cout <<
" with prefix \"" << all_pfx <<
"\"";
257 cout <<
"Term List for record #" << did;
265 cout <<
", collection freq, wdf upper bound";
271 const string & term = *t;
289 vector<docid>::const_iterator i,
290 vector<docid>::const_iterator end)
300 main(
int argc,
char **argv)
try {
301 if (argc > 1 && argv[1][0] ==
'-') {
302 if (strcmp(argv[1],
"--help") == 0) {
307 if (strcmp(argv[1],
"--version") == 0) {
313 const char * all_terms = NULL;
314 vector<docid> recnos;
315 vector<string> terms;
320 bool slot_set =
false;
323 while ((c =
gnu_getopt(argc, argv,
"aA:r:t:s:1vV::dz")) != -1) {
334 unsigned long n = strtoul(
optarg, &end, 10);
335 if (
optarg == end || *end) {
336 cout <<
"Non-numeric document id: " <<
optarg <<
'\n';
340 if (errno == ERANGE || n == 0 || did != n) {
341 cout <<
"Document id out of range: " <<
optarg <<
'\n';
344 recnos.push_back(did);
378 unsigned long n = strtoul(
optarg, &end, 10);
379 if (
optarg == end || *end) {
380 cout <<
"Non-numeric value slot: " <<
optarg <<
'\n';
384 if (errno == ERANGE || slot != n) {
385 cout <<
"Value slot out of range: " <<
optarg <<
'\n';
408 while (argv[
optind]) dbs.push_back(argv[optind++]);
415 std::sort(recnos.begin(), recnos.end());
419 vector<string>::const_iterator i;
420 for (i = dbs.begin(); i != dbs.end(); ++i) {
423 }
catch (
const Error &e) {
424 cerr <<
"Error opening database '" << *i <<
"': ";
431 if (!all_terms && terms.empty() && recnos.empty() && !slot_set) {
441 if (!recnos.empty()) {
444 }
else if (slot_set) {
445 show_value(db, recnos.begin(), recnos.end(), slot);
453 cout <<
"Value " << slot;
455 cout <<
" (lower bound=";
457 cout <<
" upper bound=";
459 cout <<
" freq=" << db.get_value_freq(slot) <<
")";
461 cout <<
" for each document:";
463 while (it != db.valuestream_end(slot)) {
477 vector<string>::const_iterator i;
478 for (i = terms.begin(); i != terms.end(); ++i) {
483 cout <<
"term '" << term <<
"' not in database\n";
486 if (recnos.empty()) {
488 cout <<
"Posting List for term '" << term <<
"' (termfreq " 489 << db.get_termfreq(term) <<
", collfreq " 490 << db.get_collection_freq(term) <<
", wdf_max " 491 << db.get_wdf_upper_bound(term) <<
"):";
495 cout <<
' ' << p.
get_wdf() <<
' ' << p.get_doclength();
504 vector<docid>::const_iterator j;
505 for (j = recnos.begin(); j != recnos.end(); ++j) {
507 if (p == pend || *p != *j) {
508 cout <<
"term '" << term <<
509 "' doesn't index document #" << *j <<
'\n';
511 cout <<
"Position List for term '" << term
512 <<
"', record #" << *j <<
':';
520 }
catch (
const Error &e) {
527 }
catch (
const Error &e) {
The Xapian namespace contains public interfaces for the Xapian library.
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
int gnu_getopt(int argc_, char *const *argv_, const char *shortopts_)
Wrappers to allow GNU getopt to be used cleanly from C++ code.
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
static enum @5 value_decode
This class is used to access a database, or a group of databases.
static bool count_zero_length_docs
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
InvalidOperationError indicates the API was used in an invalid way.
Class representing a stemming algorithm.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
bool has_positions() const
Does this database have any positional information?
ValueIterator values_begin() const
Iterator for the values in this document.
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
PositionIterator positionlist_end() const
Return an end PositionIterator for the current document.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Class for iterating over document values.
Xapian::rev get_revision() const
Get the revision of the database.
static Xapian::Stem stemmer
Xapian::doccount get_doccount() const
Get the number of documents in the database.
static void show_termlists(Database &db, vector< docid >::const_iterator i, vector< docid >::const_iterator end)
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
static void show_values(Database &db, docid docid, char sep)
Xapian::doclength get_avlength() const
Get the average length of the documents in the database.
Class for iterating over a list of terms.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Class for iterating over a list of terms.
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
static void show_docdata(Database &db, docid docid, char sep)
void description_append(std::string &desc, const std::string &s)
static void decode_and_show_value(const string &value)
Public interfaces for the Xapian library.
bool locked() const
Test if this database is currently locked for writing.
Class for iterating over term positions.
int main(int argc, char **argv)
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Append a string to an object description, escaping invalid UTF-8.
size_t size() const
Return number of shards in this Database object.
double sortable_unserialise(const std::string &serialised)
Convert a string encoded using sortable_serialise back to a floating point number.
std::string get_description() const
Return a string describing this object.
Append a string to an object description, escaping invalid UTF-8.
static void show_termlist(const Database &db, Xapian::docid did, const char *all_pfx=NULL)
static void show_db_stats(Database &db)
PositionIterator positionlist_begin() const
Return a PositionIterator for the current document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
All exceptions thrown by Xapian are subclasses of Xapian::Error.
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Xapian::docid get_docid() const
Return the docid at the current position.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
std::string get_data() const
Get data stored in the document.
static void show_value(Database &db, vector< docid >::const_iterator i, vector< docid >::const_iterator end, Xapian::valueno slot)
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
std::string get_value(Xapian::valueno slot) const
Get value by number.
A handle representing a document in a Xapian database.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
std::string get_uuid() const
Get a UUID for the database.
UnimplementedError indicates an attempt to use an unimplemented feature.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Xapian::termcount get_wdf_upper_bound(const std::string &term) const
Get an upper bound on the wdf of term term.