xapian-core  1.4.25
xapian-inspect.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2017,2018,2023 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include <ios>
24 #include <iostream>
25 #include <string>
26 #include <cstdio> // For sprintf().
27 
28 #include "glass_cursor.h"
29 #include "glass_table.h"
30 #include "glass_version.h"
31 #include "filetests.h"
32 #include "stringutils.h"
33 
34 #include <xapian.h>
35 
36 #include "gnu_getopt.h"
37 
38 using namespace std;
39 
40 #define PROG_NAME "xapian-inspect"
41 #define PROG_DESC "Inspect the contents of a glass table for development or debugging"
42 
43 #define OPT_HELP 1
44 #define OPT_VERSION 2
45 
46 static bool keys = true, tags = true;
47 
48 static void show_usage() {
49  cout << "Usage: " PROG_NAME " [OPTIONS] TABLE\n"
50  " " PROG_NAME " [OPTIONS] -t TABLE DB\n\n"
51 "Options:\n"
52 " -t, --table=TABLE which table to inspect\n"
53 " --help display this help and exit\n"
54 " --version output version information and exit\n";
55 }
56 
57 static void
58 display_nicely(const string& data)
59 {
60  for (unsigned char ch : data) {
61  if (ch < 32 || ch >= 127) {
62  switch (ch) {
63  case '\n': cout << "\\n"; break;
64  case '\r': cout << "\\r"; break;
65  case '\t': cout << "\\t"; break;
66  default: {
67  char buf[20];
68  sprintf(buf, "\\x%02x", int(ch));
69  cout << buf;
70  }
71  }
72  } else if (ch == '\\') {
73  cout << "\\\\";
74  } else {
75  cout << ch;
76  }
77  }
78 }
79 
80 // Reverse display_nicely() encoding.
81 static string
82 unescape(const string& s)
83 {
84  auto bslash = s.find('\\');
85  if (bslash == string::npos)
86  return s;
87  string r(s, 0, bslash);
88  for (auto i = s.begin() + bslash; i != s.end(); ++i) {
89  char ch = *i;
90  if (ch == '\\') {
91  if (++i == s.end())
92  goto bad_escaping;
93  ch = *i;
94  switch (ch) {
95  case '\\':
96  break;
97  case '0':
98  // \0 is not output by display_nicely(), but would
99  // reasonably be expected to work.
100  ch = '\0';
101  break;
102  case 'n':
103  ch = '\n';
104  break;
105  case 'r':
106  ch = '\r';
107  break;
108  case 't':
109  ch = '\t';
110  break;
111  case 'x': {
112  if (++i == s.end())
113  goto bad_escaping;
114  char ch1 = *i;
115  if (++i == s.end())
116  goto bad_escaping;
117  char ch2 = *i;
118  if (!C_isxdigit(ch1) || !C_isxdigit(ch2))
119  goto bad_escaping;
120  ch = hex_decode(ch1, ch2);
121  break;
122  }
123  default:
124  goto bad_escaping;
125  }
126  }
127  r += ch;
128  }
129  return r;
130 
131 bad_escaping:
132  cout << "Bad escaping in specified key value, assuming literal\n";
133  return s;
134 }
135 
136 static void
138 {
139  cout << "Commands:\n"
140  "next : Next entry (alias 'n' or '')\n"
141  "prev : Previous entry (alias 'p')\n"
142  "first : First entry (alias 'f')\n"
143  "last : Last entry (alias 'l')\n"
144  "goto K : Goto first entry with key >= K (alias 'g')\n"
145  "until K: Display entries until key >= K (alias 'u')\n"
146  "until : Display entries until end (alias 'u')\n"
147  "count K: Count entries until key >= K (alias 'c')\n"
148  "count : Count entries until end (alias 'c')\n"
149  "open T : Open table T instead (alias 'o') - e.g. open postlist\n"
150  "keys : Toggle showing keys (default: true) (alias 'k')\n"
151  "tags : Toggle showing tags (default: true) (alias 't')\n"
152  "help : Show this (alias 'h' or '?')\n"
153  "quit : Quit this utility (alias 'q')\n";
154 }
155 
156 static void
158 {
159  if (cursor.after_end()) {
160  cout << "After end\n";
161  return;
162  }
163  if (cursor.current_key.empty()) {
164  cout << "Before start\n";
165  return;
166  }
167  if (keys) {
168  cout << "Key: ";
169  display_nicely(cursor.current_key);
170  cout << '\n';
171  }
172  if (tags) {
173  cout << "Tag: ";
174  cursor.read_tag();
175  display_nicely(cursor.current_tag);
176  cout << '\n';
177  }
178 }
179 
180 static void
181 do_until(GlassCursor& cursor, const string& target, bool show)
182 {
183  if (cursor.after_end()) {
184  cout << "At end already.\n";
185  return;
186  }
187 
188  if (!target.empty()) {
189  int cmp = target.compare(cursor.current_key);
190  if (cmp <= 0) {
191  if (cmp)
192  cout << "Already after specified key.\n";
193  else
194  cout << "Already at specified key.\n";
195  return;
196  }
197  }
198 
199  size_t count = 0;
200  while (cursor.next()) {
201  ++count;
202  if (show) show_entry(cursor);
203 
204  if (target.empty())
205  continue;
206 
207  int cmp = target.compare(cursor.current_key);
208  if (cmp < 0) {
209  cout << "No exact match, stopping at entry after, "
210  "having advanced by " << count << " entries.\n";
211  return;
212  }
213  if (cmp == 0) {
214  cout << "Advanced by " << count << " entries.\n";
215  return;
216  }
217  }
218 
219  cout << "Reached end, having advanced by " << count << " entries.\n";
220 }
221 
222 static void
224 {
225  // To position on the last key we just do a < search for a key greater than
226  // any possible key - one longer than the longest possible length and
227  // consisting entirely of the highest sorting byte value.
228  cursor.find_entry_lt(string(GLASS_BTREE_MAX_KEY_LEN + 1, '\xff'));
229 }
230 
231 int
232 main(int argc, char** argv)
233 {
234  static const struct option long_opts[] = {
235  {"table", required_argument, 0, 't'},
236  {"help", no_argument, 0, OPT_HELP},
237  {"version", no_argument, 0, OPT_VERSION},
238  {NULL, 0, 0, 0}
239  };
240 
241  string table_name;
242 
243  int c;
244  while ((c = gnu_getopt_long(argc, argv, "t:", long_opts, 0)) != -1) {
245  switch (c) {
246  case 't':
247  table_name = optarg;
248  break;
249  case OPT_HELP:
250  cout << PROG_NAME " - " PROG_DESC "\n\n";
251  show_usage();
252  exit(0);
253  case OPT_VERSION:
254  cout << PROG_NAME " - " PACKAGE_STRING "\n";
255  exit(0);
256  default:
257  show_usage();
258  exit(1);
259  }
260  }
261 
262  if (argc - optind != 1) {
263  show_usage();
264  exit(1);
265  }
266 
267  // Path to the DB to inspect (possibly with a table name appended).
268  string db_path(argv[optind]);
269  bool arg_is_directory = dir_exists(db_path);
270  if (arg_is_directory && table_name.empty()) {
271  cerr << argv[0]
272  << ": You need to specify a table name to inspect with "
273  "--table.\n";
274  exit(1);
275  }
276  int single_file_fd = -1;
277  if (table_name.empty()) {
278  // db_path should be a path to a table, possibly without the extension
279  // or with just a trailing '.' (supported mostly for historical
280  // reasons). First normalise away any extension or trailing '.'.
281  if (endswith(db_path, "." GLASS_TABLE_EXTENSION)) {
282  db_path.resize(db_path.size() -
284  } else if (endswith(db_path, '.')) {
285  db_path.resize(db_path.size() - 1);
286  }
287  size_t slash = db_path.find_last_of(DIR_SEPS);
288  // If slash is std::string::npos, this assigns the whole of db_path to
289  // table_name, which is what we want.
290  table_name.assign(db_path, slash + 1, string::npos);
291  if (slash != string::npos) {
292  db_path.resize(slash);
293  } else {
294  db_path.resize(0);
295  }
296  } else if (!arg_is_directory) {
297  single_file_fd = open(db_path.c_str(), O_RDONLY | O_BINARY);
298  if (single_file_fd < 0) {
299  cerr << argv[0] << ": Couldn't open file '" << db_path << "'"
300  << endl;
301  exit(1);
302  }
303  }
304 
305  GlassVersion* version_file_ptr;
306  if (single_file_fd < 0) {
307  version_file_ptr = new GlassVersion(db_path);
308  } else {
309  version_file_ptr = new GlassVersion(single_file_fd);
310  }
311  GlassVersion& version_file = *version_file_ptr;
312 
313  version_file.read();
314  glass_revision_number_t rev = version_file.get_revision();
315 
316  show_help();
317  cout << '\n';
318 
319 open_different_table:
320  try {
321  Glass::table_type table_code;
322  if (table_name == "docdata") {
323  table_code = Glass::DOCDATA;
324  } else if (table_name == "spelling") {
325  table_code = Glass::SPELLING;
326  } else if (table_name == "synonym") {
327  table_code = Glass::SYNONYM;
328  } else if (table_name == "termlist") {
329  table_code = Glass::TERMLIST;
330  } else if (table_name == "position") {
331  table_code = Glass::POSITION;
332  } else if (table_name == "postlist") {
333  table_code = Glass::POSTLIST;
334  } else {
335  cerr << "Unknown table: '" << table_name << "'\n";
336  exit(1);
337  }
338 
339  GlassTable* table_ptr;
340  if (single_file_fd < 0) {
341  string table_path = db_path;
342  table_path += '/';
343  table_path += table_name;
344  table_path += '.';
345  table_ptr = new GlassTable("", table_path, true);
346  } else {
347  auto offset = version_file.get_offset();
348  table_ptr = new GlassTable("", single_file_fd, offset, true);
349  }
350  GlassTable& table = *table_ptr;
351 
352  table.open(0, version_file.get_root(table_code), rev);
353  if (table.empty()) {
354  cout << "No entries!\n";
355  exit(0);
356  }
357  cout << "Table has " << table.get_entry_count() << " entries\n";
358 
359  GlassCursor cursor(&table);
360  cursor.find_entry_ge(string());
361  cursor.next();
362 
363  while (!cin.eof()) {
364  show_entry(cursor);
365 wait_for_input:
366  cout << "? " << flush;
367 
368  string input;
369  getline(cin, input);
370  if (cin.eof()) break;
371 
372  if (endswith(input, '\r'))
373  input.resize(input.size() - 1);
374 
375  if (input.empty() || input == "n" || input == "next") {
376  if (cursor.after_end()) {
377  cout << "At end already.\n";
378  goto wait_for_input;
379  }
380  (void)cursor.next();
381  continue;
382  } else if (input == "p" || input == "prev") {
383  if (cursor.current_key.empty()) {
384  cout << "Before start already.\n";
385  goto wait_for_input;
386  }
387  // If the cursor has fallen off the end, point it back at the
388  // last entry.
389  if (cursor.after_end()) {
390  goto_last(cursor);
391  continue;
392  }
393  cursor.find_entry_lt(cursor.current_key);
394  continue;
395  } else if (startswith(input, "u ")) {
396  do_until(cursor, unescape(input.substr(2)), true);
397  goto wait_for_input;
398  } else if (startswith(input, "until ")) {
399  do_until(cursor, unescape(input.substr(6)), true);
400  goto wait_for_input;
401  } else if (input == "u" || input == "until") {
402  do_until(cursor, string(), true);
403  goto wait_for_input;
404  } else if (startswith(input, "c ")) {
405  do_until(cursor, unescape(input.substr(2)), false);
406  goto wait_for_input;
407  } else if (startswith(input, "count ")) {
408  do_until(cursor, unescape(input.substr(6)), false);
409  goto wait_for_input;
410  } else if (input == "c" || input == "count") {
411  do_until(cursor, string(), false);
412  goto wait_for_input;
413  } else if (input == "f" || input == "first") {
414  cursor.find_entry_ge(string());
415  cursor.next();
416  continue;
417  } else if (input == "l" || input == "last") {
418  goto_last(cursor);
419  continue;
420  } else if (startswith(input, "g ")) {
421  if (!cursor.find_entry_ge(unescape(input.substr(2)))) {
422  cout << "No exact match, going to entry after.\n";
423  }
424  continue;
425  } else if (startswith(input, "goto ")) {
426  if (!cursor.find_entry_ge(unescape(input.substr(5)))) {
427  cout << "No exact match, going to entry after.\n";
428  }
429  continue;
430  } else if (startswith(input, "o ") || startswith(input, "open ")) {
431  size_t trim = (input[1] == ' ' ? 2 : 5);
432  table_name.assign(input, trim, string::npos);
433  if (endswith(table_name, "." GLASS_TABLE_EXTENSION))
434  table_name.resize(table_name.size() -
436  else if (endswith(table_name, '.'))
437  table_name.resize(table_name.size() - 1);
438  goto open_different_table;
439  } else if (input == "t" || input == "tags") {
440  tags = !tags;
441  cout << "Showing tags: " << boolalpha << tags << '\n';
442  } else if (input == "k" || input == "keys") {
443  keys = !keys;
444  cout << "Showing keys: " << boolalpha << keys << '\n';
445  } else if (input == "q" || input == "quit") {
446  break;
447  } else if (input == "h" || input == "help" || input == "?") {
448  show_help();
449  goto wait_for_input;
450  } else {
451  cout << "Unknown command.\n";
452  goto wait_for_input;
453  }
454  }
455  } catch (const Xapian::Error& error) {
456  cerr << argv[0] << ": " << error.get_description() << '\n';
457  exit(1);
458  }
459 }
const RootInfo & get_root(Glass::table_type tbl) const
static void do_until(GlassCursor &cursor, const string &target, bool show)
bool endswith(const std::string &s, char sfx)
Definition: stringutils.h:75
Wrappers to allow GNU getopt to be used cleanly from C++ code.
GlassVersion class.
static void show_usage()
int optind
Definition: getopt.cc:94
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
table_type
Definition: glass_defs.h:53
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
Definition: gnu_getopt.h:97
#define GLASS_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: glass_table.h:57
bool empty() const
Return true if there are no entries in the table.
Definition: glass_table.h:681
#define OPT_HELP
Class managing a Btree table in a Glass database.
Definition: glass_table.h:425
void find_entry_lt(const string &key)
Position the cursor on the highest entry with key < key.
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
The GlassVersion class manages the revision files.
Definition: glass_version.h:94
Definition: header.h:63
#define DIR_SEPS
Definition: config.h:8
#define O_BINARY
Definition: safefcntl.h:81
WritableDatabase open()
Construct a WritableDatabase object for a new, empty InMemory database.
Definition: dbfactory.h:104
bool next()
Advance to the next key.
static bool tags
STL namespace.
off_t get_offset() const
bool after_end() const
Determine whether cursor is off the end of table.
Definition: glass_cursor.h:329
Utility functions for testing files.
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
bool read_tag(bool keep_compressed=false)
Read the tag from the table and store it in current_tag.
#define no_argument
Definition: gnu_getopt.h:79
string current_key
Current key pointed to by cursor.
Definition: glass_cursor.h:239
static bool keys
Public interfaces for the Xapian library.
string current_tag
Current tag pointed to by cursor.
Definition: glass_cursor.h:244
char * optarg
Definition: getopt.cc:79
static void show_help()
#define CONST_STRLEN(S)
Returns the length of a string constant.
Definition: stringutils.h:43
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
#define required_argument
Definition: gnu_getopt.h:80
Btree implementation.
void read()
Read the version file and check it&#39;s a version we understand.
#define PROG_DESC
bool dir_exists(const char *path)
Test if a directory exists.
Definition: filetests.h:136
bool find_entry_ge(const string &key)
Position the cursor on the lowest entry with key >= key.
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
glass_tablesize_t get_entry_count() const
Return a count of the number of entries in the table.
Definition: glass_table.h:676
#define PROG_NAME
Interface to Btree cursors.
static string unescape(const string &s)
static void goto_last(GlassCursor &cursor)
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
char hex_decode(char ch1, char ch2)
Decode a pair of ASCII hex digits.
Definition: stringutils.h:243
Various handy helpers which std::string really should provide.
static void display_nicely(const string &data)
int main(int argc, char **argv)
#define PACKAGE_STRING
Definition: config.h:337
#define OPT_VERSION
static void show_entry(GlassCursor &cursor)
bool C_isxdigit(char ch)
Definition: stringutils.h:182
glass_revision_number_t get_revision() const