xapian-core  2.0.0
xapian-inspect-honey.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2017,2018,2023 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include <ios>
24 #include <iostream>
25 #include <string>
26 
27 #include "honey_cursor.h"
28 #include "honey_defs.h"
29 #include "honey_table.h"
30 #include "honey_version.h"
31 #include "filetests.h"
32 #include "stringutils.h"
33 
34 #include <xapian.h>
35 
36 #include "gnu_getopt.h"
37 
38 using namespace std;
39 
40 #define PROG_NAME "xapian-inspect-honey"
41 #define PROG_DESC "Inspect a honey table for development or debugging"
42 
43 #define OPT_HELP 1
44 #define OPT_VERSION 2
45 
46 static bool keys = true, tags = true;
47 
48 static void show_usage() {
49  cout << "Usage: " PROG_NAME " [OPTIONS] TABLE\n"
50  " " PROG_NAME " [OPTIONS] -t TABLE DB\n\n"
51 "Options:\n"
52 " -t, --table=TABLE which table to inspect\n"
53 " --help display this help and exit\n"
54 " --version output version information and exit\n";
55 }
56 
57 static void
58 display_nicely(const string& data)
59 {
60  for (unsigned char ch : data) {
61  if (ch < 32 || ch >= 127) {
62  switch (ch) {
63  case '\n': cout << "\\n"; break;
64  case '\r': cout << "\\r"; break;
65  case '\t': cout << "\\t"; break;
66  default: {
67  cout << "\\x"
68  << "0123456789abcdef"[ch >> 4]
69  << "0123456789abcdef"[ch & 0x0f];
70  }
71  }
72  } else if (ch == '\\') {
73  cout << "\\\\";
74  } else {
75  cout << ch;
76  }
77  }
78 }
79 
80 // Reverse display_nicely() encoding.
81 static string
82 unescape(const string& s)
83 {
84  auto bslash = s.find('\\');
85  if (bslash == string::npos)
86  return s;
87  string r(s, 0, bslash);
88  for (auto i = s.begin() + bslash; i != s.end(); ++i) {
89  char ch = *i;
90  if (ch == '\\') {
91  if (++i == s.end())
92  goto bad_escaping;
93  ch = *i;
94  switch (ch) {
95  case '\\':
96  break;
97  case '0':
98  // \0 is not output by display_nicely(), but would
99  // reasonably be expected to work.
100  ch = '\0';
101  break;
102  case 'n':
103  ch = '\n';
104  break;
105  case 'r':
106  ch = '\r';
107  break;
108  case 't':
109  ch = '\t';
110  break;
111  case 'x': {
112  if (++i == s.end())
113  goto bad_escaping;
114  char ch1 = *i;
115  if (++i == s.end())
116  goto bad_escaping;
117  char ch2 = *i;
118  if (!C_isxdigit(ch1) || !C_isxdigit(ch2))
119  goto bad_escaping;
120  ch = hex_decode(ch1, ch2);
121  break;
122  }
123  default:
124  goto bad_escaping;
125  }
126  }
127  r += ch;
128  }
129  return r;
130 
131 bad_escaping:
132  cout << "Bad escaping in specified key value, assuming literal\n";
133  return s;
134 }
135 
136 static void
138 {
139  cout << "Commands:\n"
140  "next : Next entry (alias 'n' or '')\n"
141  "prev : Previous entry (alias 'p')\n"
142  "first : First entry (alias 'f')\n"
143  "last : Last entry (alias 'l')\n"
144  "goto K : Goto first entry with key >= K (alias 'g')\n"
145  "until K: Display entries until key >= K (alias 'u')\n"
146  "until : Display entries until end (alias 'u')\n"
147  "count K: Count entries until key >= K (alias 'c')\n"
148  "count : Count entries until end (alias 'c')\n"
149  "open T : Open table T instead (alias 'o') - e.g. open postlist\n"
150  "keys : Toggle showing keys (default: true) (alias 'k')\n"
151  "tags : Toggle showing tags (default: true) (alias 't')\n"
152  "help : Show this (alias 'h' or '?')\n"
153  "quit : Quit this utility (alias 'q')\n";
154 }
155 
156 static void
158 {
159  if (cursor.after_end()) {
160  cout << "After end\n";
161  return;
162  }
163  if (cursor.current_key.empty()) {
164  cout << "Before start\n";
165  return;
166  }
167  if (keys) {
168  cout << "Key: ";
169  display_nicely(cursor.current_key);
170  cout << '\n';
171  }
172  if (tags) {
173  cout << "Tag: ";
174  cursor.read_tag();
175  display_nicely(cursor.current_tag);
176  cout << '\n';
177  }
178 }
179 
180 static void
181 do_until(HoneyCursor& cursor, const string& target, bool show)
182 {
183  if (cursor.after_end()) {
184  cout << "At end already.\n";
185  return;
186  }
187 
188  if (!target.empty()) {
189  int cmp = target.compare(cursor.current_key);
190  if (cmp <= 0) {
191  if (cmp)
192  cout << "Already after specified key.\n";
193  else
194  cout << "Already at specified key.\n";
195  return;
196  }
197  }
198 
199  size_t count = 0;
200  while (cursor.next()) {
201  ++count;
202  if (show) show_entry(cursor);
203 
204  if (target.empty())
205  continue;
206 
207  int cmp = target.compare(cursor.current_key);
208  if (cmp < 0) {
209  cout << "No exact match, stopping at entry after, "
210  "having advanced by " << count << " entries.\n";
211  return;
212  }
213  if (cmp == 0) {
214  cout << "Advanced by " << count << " entries.\n";
215  return;
216  }
217  }
218 
219  cout << "Reached end, having advanced by " << count << " entries.\n";
220 }
221 
222 int
223 main(int argc, char** argv)
224 {
225  static const struct option long_opts[] = {
226  {"table", required_argument, 0, 't'},
227  {"help", no_argument, 0, OPT_HELP},
228  {"version", no_argument, 0, OPT_VERSION},
229  {NULL, 0, 0, 0}
230  };
231 
232  string table_name;
233 
234  int c;
235  while ((c = gnu_getopt_long(argc, argv, "t:", long_opts, 0)) != -1) {
236  switch (c) {
237  case 't':
238  table_name = optarg;
239  break;
240  case OPT_HELP:
241  cout << PROG_NAME " - " PROG_DESC "\n\n";
242  show_usage();
243  exit(0);
244  case OPT_VERSION:
245  cout << PROG_NAME " - " PACKAGE_STRING "\n";
246  exit(0);
247  default:
248  show_usage();
249  exit(1);
250  }
251  }
252 
253  if (argc - optind != 1) {
254  show_usage();
255  exit(1);
256  }
257 
258  // Path to the DB to inspect (possibly with a table name appended).
259  string db_path(argv[optind]);
260  bool arg_is_directory = dir_exists(db_path);
261  if (arg_is_directory && table_name.empty()) {
262  cerr << argv[0]
263  << ": You need to specify a table name to inspect with "
264  "--table.\n";
265  exit(1);
266  }
267  int single_file_fd = -1;
268  if (table_name.empty()) {
269  // db_path should be a path to a table, possibly without the extension
270  // or with just a trailing '.' (supported mostly for historical
271  // reasons). First normalise away any extension or trailing '.'.
272  if (endswith(db_path, "." HONEY_TABLE_EXTENSION)) {
273  db_path.resize(db_path.size() -
275  } else if (endswith(db_path, '.')) {
276  db_path.resize(db_path.size() - 1);
277  }
278  size_t slash = db_path.find_last_of(DIR_SEPS);
279  // If slash is std::string::npos, this assigns the whole of db_path to
280  // table_name, which is what we want.
281  table_name.assign(db_path, slash + 1, string::npos);
282  if (slash != string::npos) {
283  db_path.resize(slash);
284  } else {
285  db_path.resize(0);
286  }
287  } else if (!arg_is_directory) {
288  single_file_fd = open(db_path.c_str(), O_RDONLY | O_BINARY);
289  if (single_file_fd < 0) {
290  cerr << argv[0] << ": Couldn't open file '" << db_path << "'\n";
291  exit(1);
292  }
293  }
294 
295  HoneyVersion* version_file_ptr;
296  if (single_file_fd < 0) {
297  version_file_ptr = new HoneyVersion(db_path);
298  } else {
299  version_file_ptr = new HoneyVersion(single_file_fd);
300  }
301  HoneyVersion& version_file = *version_file_ptr;
302 
303  version_file.read();
304  honey_revision_number_t rev = version_file.get_revision();
305 
306  show_help();
307  cout << '\n';
308 
309 open_different_table:
310  try {
311  Honey::table_type table_code;
312  if (table_name == "docdata") {
313  table_code = Honey::DOCDATA;
314  } else if (table_name == "spelling") {
315  table_code = Honey::SPELLING;
316  } else if (table_name == "synonym") {
317  table_code = Honey::SYNONYM;
318  } else if (table_name == "termlist") {
319  table_code = Honey::TERMLIST;
320  } else if (table_name == "position") {
321  table_code = Honey::POSITION;
322  } else if (table_name == "postlist") {
323  table_code = Honey::POSTLIST;
324  } else {
325  cerr << "Unknown table: '" << table_name << "'\n";
326  exit(1);
327  }
328 
329  HoneyTable* table_ptr;
330  if (single_file_fd < 0) {
331  string table_path = db_path;
332  table_path += '/';
333  table_path += table_name;
334  table_path += '.';
335  table_ptr = new HoneyTable("", table_path, true);
336  } else {
337  auto offset = version_file.get_offset();
338  table_ptr = new HoneyTable("", single_file_fd, offset, true);
339  }
340  HoneyTable& table = *table_ptr;
341 
342  table.open(0, version_file.get_root(table_code), rev);
343  HoneyCursor cursor(&table);
344  cursor.rewind();
345  if (!cursor.next()) {
346  cout << "No entries!\n";
347  exit(0);
348  }
349 
350  cout << "Table has " << table.get_entry_count() << " entries\n";
351 
352  while (!cin.eof()) {
353  show_entry(cursor);
354 wait_for_input:
355  cout << "? " << flush;
356 
357  string input;
358  getline(cin, input);
359  if (cin.eof()) break;
360 
361  if (endswith(input, '\r'))
362  input.resize(input.size() - 1);
363 
364  if (input.empty() || input == "n" || input == "next") {
365  if (cursor.after_end()) {
366  cout << "At end already.\n";
367  goto wait_for_input;
368  }
369  (void)cursor.next();
370  continue;
371  } else if (input == "p" || input == "prev") {
372  if (!cursor.prev()) {
373  cout << "Before start already.\n";
374  goto wait_for_input;
375  }
376  continue;
377  } else if (startswith(input, "u ")) {
378  do_until(cursor, unescape(input.substr(2)), true);
379  goto wait_for_input;
380  } else if (startswith(input, "until ")) {
381  do_until(cursor, unescape(input.substr(6)), true);
382  goto wait_for_input;
383  } else if (input == "u" || input == "until") {
384  do_until(cursor, string(), true);
385  goto wait_for_input;
386  } else if (startswith(input, "c ")) {
387  do_until(cursor, unescape(input.substr(2)), false);
388  goto wait_for_input;
389  } else if (startswith(input, "count ")) {
390  do_until(cursor, unescape(input.substr(6)), false);
391  goto wait_for_input;
392  } else if (input == "c" || input == "count") {
393  do_until(cursor, string(), false);
394  goto wait_for_input;
395  } else if (input == "f" || input == "first") {
396  cursor.rewind();
397  cursor.next();
398  continue;
399  } else if (input == "l" || input == "last") {
400  cursor.to_end();
401  cursor.prev();
402  continue;
403  } else if (startswith(input, "g ")) {
404  if (!cursor.find_entry_ge(unescape(input.substr(2)))) {
405  cout << "No exact match, going to entry after.\n";
406  }
407  continue;
408  } else if (startswith(input, "goto ")) {
409  if (!cursor.find_entry_ge(unescape(input.substr(5)))) {
410  cout << "No exact match, going to entry after.\n";
411  }
412  continue;
413  } else if (startswith(input, "o ") || startswith(input, "open ")) {
414  size_t trim = (input[1] == ' ' ? 2 : 5);
415  table_name.assign(input, trim, string::npos);
416  if (endswith(table_name, "." HONEY_TABLE_EXTENSION))
417  table_name.resize(table_name.size() -
419  else if (endswith(table_name, '.'))
420  table_name.resize(table_name.size() - 1);
421  goto open_different_table;
422  } else if (input == "t" || input == "tags") {
423  tags = !tags;
424  cout << "Showing tags: " << boolalpha << tags << '\n';
425  } else if (input == "k" || input == "keys") {
426  keys = !keys;
427  cout << "Showing keys: " << boolalpha << keys << '\n';
428  } else if (input == "q" || input == "quit") {
429  break;
430  } else if (input == "h" || input == "help" || input == "?") {
431  show_help();
432  goto wait_for_input;
433  } else {
434  cout << "Unknown command.\n";
435  goto wait_for_input;
436  }
437  }
438  } catch (const Xapian::Error& error) {
439  cerr << argv[0] << ": " << error.get_description() << '\n';
440  exit(1);
441  }
442 }
bool read_tag(bool keep_compressed=false)
bool after_end() const
Definition: honey_cursor.h:94
void rewind()
Position cursor on the dummy empty key.
Definition: honey_cursor.h:85
bool prev()
Move to the item before the current one.
bool find_entry_ge(std::string_view key)
Definition: honey_cursor.h:110
void to_end()
Definition: honey_cursor.h:92
std::string current_tag
Definition: honey_cursor.h:43
bool next()
Definition: honey_cursor.h:96
std::string current_key
Definition: honey_cursor.h:43
honey_tablesize_t get_entry_count() const
Definition: honey_table.h:693
void open(int flags_, const Honey::RootInfo &root_info, honey_revision_number_t)
Definition: honey_table.cc:58
The HoneyVersion class manages the revision files.
Definition: honey_version.h:79
off_t get_offset() const
honey_revision_number_t get_revision() const
void read()
Read the version file and check it's a version we understand.
const Honey::RootInfo & get_root(Honey::table_type tbl) const
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:41
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
#define DIR_SEPS
Definition: config.h:8
#define PACKAGE_STRING
Definition: config.h:361
Utility functions for testing files.
bool dir_exists(const char *path)
Test if a directory exists.
Definition: filetests.h:145
int optind
Definition: getopt.cc:93
char * optarg
Definition: getopt.cc:78
Wrappers to allow GNU getopt to be used cleanly from C++ code.
#define no_argument
Definition: gnu_getopt.h:78
#define required_argument
Definition: gnu_getopt.h:79
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
Definition: gnu_getopt.h:96
HoneyCursor class.
Definitions, types, etc for use inside honey.
#define HONEY_TABLE_EXTENSION
Honey table extension.
Definition: honey_defs.h:29
uint4 honey_revision_number_t
The revision number of a honey database.
Definition: honey_defs.h:104
HoneyTable class.
HoneyVersion class.
table_type
Definition: honey_defs.h:68
@ TERMLIST
Definition: honey_defs.h:71
@ DOCDATA
Definition: honey_defs.h:70
@ SYNONYM
Definition: honey_defs.h:74
@ POSITION
Definition: honey_defs.h:72
@ SPELLING
Definition: honey_defs.h:73
@ POSTLIST
Definition: honey_defs.h:69
Database open(std::string_view host, unsigned int port, unsigned timeout=10000, unsigned connect_timeout=10000)
Construct a Database object for read-only access to a remote database accessed via a TCP connection.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:108
#define O_BINARY
Definition: safefcntl.h:80
Various handy string-related helpers.
char hex_decode(char ch1, char ch2)
Decode a pair of ASCII hex digits.
Definition: stringutils.h:248
#define CONST_STRLEN(S)
Returns the length of a string constant.
Definition: stringutils.h:48
bool endswith(std::string_view s, char sfx)
Definition: stringutils.h:80
bool C_isxdigit(char ch)
Definition: stringutils.h:187
bool startswith(std::string_view s, char pfx)
Definition: stringutils.h:56
Definition: header.h:87
static bool keys
static void show_usage()
#define OPT_VERSION
int main(int argc, char **argv)
static void display_nicely(const string &data)
#define PROG_NAME
static void do_until(HoneyCursor &cursor, const string &target, bool show)
static void show_help()
static string unescape(const string &s)
#define PROG_DESC
#define OPT_HELP
static bool tags
static void show_entry(HoneyCursor &cursor)
static const struct option long_opts[]
Public interfaces for the Xapian library.