xapian-core  2.0.0
dbcheck.cc
Go to the documentation of this file.
1 
4 /* Copyright 2007-2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 #include "xapian/database.h"
23 
24 #include "xapian/constants.h"
25 #include "xapian/error.h"
26 
27 // We always need GLASS_TABLE_EXTENSION.
28 #include "glass/glass_defs.h"
29 #ifdef XAPIAN_HAS_GLASS_BACKEND
30 #include "glass/glass_changes.h"
31 #include "glass/glass_dbcheck.h"
32 #include "glass/glass_version.h"
33 #endif
34 
35 // We always need HONEY_TABLE_EXTENSION.
36 #include "honey/honey_defs.h"
37 #ifdef XAPIAN_HAS_HONEY_BACKEND
38 #include "honey/honey_dbcheck.h"
39 #include "honey/honey_version.h"
40 #endif
41 
42 #include "backends.h"
43 #include "databasehelpers.h"
44 #include "filetests.h"
45 #include "omassert.h"
46 #include "stringutils.h"
47 
48 #include <ostream>
49 #include <stdexcept>
50 #include <string_view>
51 
52 using namespace std;
53 
54 #ifdef XAPIAN_HAS_GLASS_BACKEND
55 // Tables to check for a glass database. Note: it's important to check
56 // termlist before postlist so that we can cross-check the document lengths.
57 static const struct { char name[9]; } glass_tables[] = {
58  { "docdata" },
59  { "termlist" },
60  { "postlist" },
61  { "position" },
62  { "spelling" },
63  { "synonym" }
64 };
65 #endif
66 
67 #ifdef XAPIAN_HAS_HONEY_BACKEND
68 // Tables to check for a honey database. Note: it's important to check
69 // termlist before postlist so that we can cross-check the document lengths.
70 static const struct { char name[9]; } honey_tables[] = {
71  { "docdata" },
72  { "termlist" },
73  { "postlist" },
74  { "position" },
75  { "spelling" },
76  { "synonym" }
77 };
78 #endif
79 
80 [[noreturn]]
81 static void
83 {
84  auto msg = "Couldn't find Xapian database or table to check";
85  throw Xapian::DatabaseOpeningError(msg, ENOENT);
86 }
87 
88 // FIXME: We don't currently cross-check wdf between postlist and termlist.
89 // It's hard to see how to efficiently. We do cross-check doclens, but that
90 // "only" requires (4 * last_docid()) bytes.
91 
92 #if defined XAPIAN_HAS_GLASS_BACKEND
93 static void
94 reserve_doclens(vector<Xapian::termcount>& doclens, Xapian::docid last_docid,
95  ostream * out)
96 {
97  if (last_docid >= 0x40000000ul / sizeof(Xapian::termcount)) {
98  // The memory block needed by the vector would be >= 1GB.
99  if (out)
100  *out << "Cross-checking document lengths between the postlist and "
101  "termlist tables would use more than 1GB of memory, so "
102  "skipping that check" << endl;
103  return;
104  }
105  try {
106  doclens.reserve(last_docid + 1);
107  } catch (const std::bad_alloc &) {
108  // Failed to allocate the required memory.
109  if (out)
110  *out << "Couldn't allocate enough memory for cross-checking document "
111  "lengths between the postlist and termlist tables, so "
112  "skipping that check" << endl;
113  } catch (const std::length_error &) {
114  // There are too many elements for the vector to handle!
115  if (out)
116  *out << "Couldn't allocate enough elements for cross-checking document "
117  "lengths between the postlist and termlist tables, so "
118  "skipping that check" << endl;
119  }
120 }
121 #endif
122 
123 static size_t
124 check_db_dir(string_view path, int opts, std::ostream *out)
125 {
126  struct stat sb;
127  string filename{path};
128  filename += "/iamglass";
129  if (stat(filename.c_str(), &sb) == 0) {
130 #ifndef XAPIAN_HAS_GLASS_BACKEND
131  (void)opts;
132  (void)out;
133  throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
134 #else
135  // Check a whole glass database directory.
136  vector<Xapian::termcount> doclens;
137  size_t errors = 0;
138 
139  try {
140  // Check if the database can actually be opened.
141  Xapian::Database db(path);
142  } catch (const Xapian::Error& e) {
143  // Continue - we can still usefully look at how it is broken.
144  if (out)
145  *out << "Database couldn't be opened for reading: "
146  << e.get_description()
147  << "\nContinuing check anyway" << endl;
148  ++errors;
149  }
150 
151  GlassVersion version_file(path);
152  version_file.read();
153  for (auto r = version_file.get_revision(); r != 0; --r) {
154  filename.resize(path.size());
155  filename += "/changes";
156  filename += str(r);
157  if (file_exists(filename))
158  GlassChanges::check(filename);
159  }
160 
161  Xapian::docid doccount = version_file.get_doccount();
162  Xapian::docid db_last_docid = version_file.get_last_docid();
163  if (db_last_docid < doccount) {
164  if (out)
165  *out << "last_docid = " << db_last_docid << " < doccount = "
166  << doccount << endl;
167  ++errors;
168  }
169  reserve_doclens(doclens, db_last_docid, out);
170 
171  // Check all the tables.
172  for (auto t : glass_tables) {
173  errors += check_glass_table(t.name, path, version_file, opts,
174  doclens, out);
175  }
176  return errors;
177 #endif
178  }
179 
180  filename.resize(path.size());
181  filename += "/iamhoney";
182  if (stat(filename.c_str(), &sb) == 0) {
183 #ifndef XAPIAN_HAS_HONEY_BACKEND
184  (void)opts;
185  (void)out;
186  auto msg = "Honey database support isn't enabled";
188 #else
189  // Check a whole honey database directory.
190  vector<Xapian::termcount> doclens;
191  size_t errors = 0;
192 
193  try {
194  // Check if the database can actually be opened.
195  Xapian::Database db(path);
196  } catch (const Xapian::Error& e) {
197  // Continue - we can still usefully look at how it is broken.
198  if (out)
199  *out << "Database couldn't be opened for reading: "
200  << e.get_description()
201  << "\nContinuing check anyway" << endl;
202  ++errors;
203  }
204 
205  HoneyVersion version_file(path);
206  version_file.read();
207 #if 0 // FIXME: Honey replication not yet implemented.
208  for (auto r = version_file.get_revision(); r != 0; --r) {
209  string changes_file = path;
210  changes_file += "/changes";
211  changes_file += str(r);
212  if (file_exists(changes_file))
213  HoneyChanges::check(changes_file);
214  }
215 #endif
216 
217  Xapian::docid doccount = version_file.get_doccount();
218  Xapian::docid db_last_docid = version_file.get_last_docid();
219  if (db_last_docid < doccount) {
220  if (out)
221  *out << "last_docid = " << db_last_docid << " < doccount = "
222  << doccount << endl;
223  ++errors;
224  }
225  reserve_doclens(doclens, db_last_docid, out);
226 
227  // Check all the tables.
228  for (auto t : honey_tables) {
229  errors += check_honey_table(t.name, path, version_file, opts,
230  doclens, out);
231  }
232  return errors;
233 #endif
234  }
235 
236  filename.resize(path.size());
237  filename += "/iamchert";
238  if (stat(filename.c_str(), &sb) == 0) {
239  // Chert is no longer supported as of Xapian 2.0.0.
240  throw Xapian::FeatureUnavailableError("Chert database support was removed in Xapian 2.0.0");
241  }
242 
243  filename.resize(path.size());
244  filename += "/iamflint";
245  if (stat(filename.c_str(), &sb) == 0) {
246  // Flint is no longer supported as of Xapian 1.3.0.
247  throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
248  }
249 
250  filename.resize(path.size());
251  filename += "/iambrass";
252  if (stat(filename.c_str(), &sb) == 0) {
253  // Brass was renamed to glass as of Xapian 1.3.2.
254  throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
255  }
256 
257  filename.resize(path.size());
258  filename += "/record_DB";
259  if (stat(filename.c_str(), &sb) == 0) {
260  // Quartz is no longer supported as of Xapian 1.1.0.
261  throw Xapian::FeatureUnavailableError("Quartz database support was removed in Xapian 1.1.0");
262  }
263 
265  "Directory does not contain a Xapian database");
266 }
267 
275 static size_t
276 check_db_table(string_view filename, int opts, std::ostream* out, int backend)
277 {
278  size_t p = filename.find_last_of(DIR_SEPS);
279  // If we found a directory separator, advance p to the next character. If
280  // we didn't, incrementing string::npos will give us 0, which is what we
281  // want.
282  ++p;
283 
284  string dir(filename, 0, p);
285 
286  string tablename;
287  while (p != filename.size()) {
288  char ch = filename[p++];
289  if (ch == '.') break;
290  tablename += C_tolower(ch);
291  }
292 
293 #if defined XAPIAN_HAS_GLASS_BACKEND
294  vector<Xapian::termcount> doclens;
295 #else
296  (void)opts;
297  (void)out;
298 #endif
299 
300  switch (backend) {
301  case BACKEND_GLASS: {
302 #ifndef XAPIAN_HAS_GLASS_BACKEND
303  auto msg = "Glass database support isn't enabled";
305 #else
306  GlassVersion version_file(dir);
307  version_file.read();
308  return check_glass_table(tablename.c_str(), dir, version_file, opts,
309  doclens, out);
310 #endif
311  }
312 
313  case BACKEND_HONEY: {
314 #ifndef XAPIAN_HAS_HONEY_BACKEND
315  auto msg = "Honey database support isn't enabled";
317 #else
318  HoneyVersion version_file(dir);
319  version_file.read();
320  return check_honey_table(tablename.c_str(), dir, version_file, opts,
321  doclens, out);
322 #endif
323  }
324 
325  case BACKEND_OLD:
326  break;
327 
328  default:
329  Assert(false);
330  break;
331  }
332 
333  // Chert, flint and brass all used the extension ".DB", so check which
334  // to give an appropriate error.
335  struct stat sb;
336  if (stat((dir + "/iamchert").c_str(), &sb) == 0) {
337  // Chert is no longer supported as of Xapian 2.0.0.
338  throw Xapian::FeatureUnavailableError("Chert database support was removed in Xapian 2.0.0");
339  }
340  if (stat((dir + "/iamflint").c_str(), &sb) == 0) {
341  // Flint is no longer supported as of Xapian 1.3.0.
342  throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
343  }
344  if (stat((dir + "/iambrass").c_str(), &sb) == 0) {
345  // Brass was renamed to glass as of Xapian 1.3.2.
346  throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
347  }
348  // Unaccompanied .DB file.
349  throw Xapian::FeatureUnavailableError("Flint, chert and brass database support have all been removed");
350 }
351 
356 static size_t
357 check_db_fd(int fd, int opts, std::ostream* out, int backend)
358 {
359  if (backend == BACKEND_UNKNOWN) {
360  // FIXME: Actually probe.
361  backend = BACKEND_GLASS;
362  }
363 
364  size_t errors = 0;
365  switch (backend) {
366  case BACKEND_GLASS: {
367  // Check a single-file glass database.
368 #ifdef XAPIAN_HAS_GLASS_BACKEND
369  // GlassVersion's destructor will close fd.
370  GlassVersion version_file(fd);
371  version_file.read();
372 
373  Xapian::docid doccount = version_file.get_doccount();
374  Xapian::docid db_last_docid = version_file.get_last_docid();
375  if (db_last_docid < doccount) {
376  if (out)
377  *out << "last_docid = " << db_last_docid << " < doccount = "
378  << doccount << endl;
379  ++errors;
380  }
381  vector<Xapian::termcount> doclens;
382  reserve_doclens(doclens, db_last_docid, out);
383 
384  // Check all the tables.
385  for (auto t : glass_tables) {
386  errors += check_glass_table(t.name, fd, version_file.get_offset(),
387  version_file, opts, doclens,
388  out);
389  }
390  break;
391 #else
392  (void)opts;
393  (void)out;
394  ::close(fd);
395  throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
396 #endif
397  }
398  case BACKEND_HONEY:
399 #ifdef XAPIAN_HAS_HONEY_BACKEND
400  (void)opts;
401  (void)out;
402  ::close(fd);
403  throw Xapian::UnimplementedError("Honey database checking not implemented");
404 #else
405  (void)opts;
406  (void)out;
407  ::close(fd);
408  throw Xapian::FeatureUnavailableError("Honey database support isn't enabled");
409 #endif
410  default:
411  Assert(false);
412  }
413  return errors;
414 }
415 
416 namespace Xapian {
417 
418 static size_t
419 check_stub(const string& stub_path, int opts, std::ostream* out)
420 {
421  size_t errors = 0;
422  read_stub_file(stub_path,
423  [&errors, opts, out](string_view path) {
424  errors += Database::check(path, opts, out);
425  },
426  [&errors, opts, out](string_view path) {
427  // FIXME: Doesn't check the database type is glass.
428  errors += Database::check(path, opts, out);
429  },
430  [&errors, opts, out](string_view path) {
431  // FIXME: Doesn't check the database type is honey.
432  errors += Database::check(path, opts, out);
433  },
434  [](string_view, string_view) {
435  auto msg = "Remote database checking not implemented";
436  throw Xapian::UnimplementedError(msg);
437  },
438  [](string_view, unsigned) {
439  auto msg = "Remote database checking not implemented";
440  throw Xapian::UnimplementedError(msg);
441  },
442  []() {
443  auto msg = "InMemory database checking not implemented";
444  throw Xapian::UnimplementedError(msg);
445  });
446  return errors;
447 }
448 
449 size_t
450 Database::check_(const string_view* path_ptr,
451  int fd,
452  int opts,
453  std::ostream *out)
454 {
455  if (!out) {
456  // If we have nowhere to write output, then disable all the options
457  // which only affect what we output.
459  }
460 
461  if (path_ptr == NULL) {
462  return check_db_fd(fd, opts, out, BACKEND_UNKNOWN);
463  }
464 
465  if (path_ptr->empty()) {
467  }
468 
469  string filename{*path_ptr};
470  struct stat sb;
471  if (stat(filename.c_str(), &sb) == 0) {
472  if (S_ISDIR(sb.st_mode)) {
473  return check_db_dir(filename, opts, out);
474  }
475 
476  if (S_ISREG(sb.st_mode)) {
477  int backend = test_if_single_file_db(sb, filename, &fd);
478  if (backend != BACKEND_UNKNOWN) {
479  return check_db_fd(fd, opts, out, backend);
480  }
481  // Could be a single table or a stub database file. Look at the
482  // extension to determine the type.
483  if (endswith(filename, ".DB")) {
484  backend = BACKEND_OLD;
485  } else if (endswith(filename, "." GLASS_TABLE_EXTENSION)) {
486  backend = BACKEND_GLASS;
487  } else if (endswith(filename, "." HONEY_TABLE_EXTENSION)) {
488  backend = BACKEND_HONEY;
489  } else {
490  return check_stub(filename, opts, out);
491  }
492 
493  return check_db_table(filename, opts, out, backend);
494  }
495 
496  throw Xapian::DatabaseOpeningError("Not a regular file or directory");
497  }
498 
499  // The filename passed doesn't exist - see if it's the basename of the
500  // table (perhaps with "." after it), so the user can do xapian-check on
501  // "foo/termlist" or "foo/termlist." (which you would get from filename
502  // completion with older backends).
503  if (endswith(filename, '.')) {
504  filename.resize(filename.size() - 1);
505  }
506 
507  int backend = BACKEND_UNKNOWN;
508  if (stat((filename + ".DB").c_str(), &sb) == 0) {
509  // Could be chert, flint or brass - we check which below.
510  backend = BACKEND_OLD;
511  } else if (stat((filename + "." GLASS_TABLE_EXTENSION).c_str(), &sb) == 0) {
512  backend = BACKEND_GLASS;
513  } else if (stat((filename + "." HONEY_TABLE_EXTENSION).c_str(), &sb) == 0) {
514  backend = BACKEND_HONEY;
515  } else {
517  }
518 
519  return check_db_table(*path_ptr, opts, out, backend);
520 }
521 
522 }
static size_t check_db_dir(string_view path, int opts, std::ostream *out)
Definition: dbcheck.cc:124
static void throw_no_db_to_check()
Definition: dbcheck.cc:82
static const struct @5 honey_tables[]
static void reserve_doclens(vector< Xapian::termcount > &doclens, Xapian::docid last_docid, ostream *out)
Definition: dbcheck.cc:94
static size_t check_db_fd(int fd, int opts, std::ostream *out, int backend)
Check a single file DB from an fd.
Definition: dbcheck.cc:357
static size_t check_db_table(string_view filename, int opts, std::ostream *out, int backend)
Check a database table.
Definition: dbcheck.cc:276
static const struct @4 glass_tables[]
BACKEND_* constants.
@ BACKEND_GLASS
Definition: backends.h:29
@ BACKEND_OLD
Definition: backends.h:25
@ BACKEND_UNKNOWN
Definition: backends.h:26
@ BACKEND_HONEY
Definition: backends.h:30
static void check(const std::string &changes_file)
The GlassVersion class manages the revision files.
Definition: glass_version.h:96
Xapian::docid get_last_docid() const
glass_revision_number_t get_revision() const
off_t get_offset() const
Xapian::doccount get_doccount() const
void read()
Read the version file and check it's a version we understand.
The HoneyVersion class manages the revision files.
Definition: honey_version.h:79
Xapian::docid get_last_docid() const
honey_revision_number_t get_revision() const
void read()
Read the version file and check it's a version we understand.
Xapian::doccount get_doccount() const
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:569
An indexed database of documents.
Definition: database.h:75
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:41
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:707
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
#define DIR_SEPS
Definition: config.h:8
Constants in the Xapian namespace.
An indexed database of documents.
int test_if_single_file_db(int fd)
Probe if a file descriptor is a single-file database.
Helper functions for database handling.
void read_stub_file(std::string_view file, A1 action_auto, A2 action_glass, A3 action_honey, A4 action_remote_prog, A5 action_remote_tcp, A6 action_inmemory)
Open, read and process a stub database file.
PositionList * p
Hierarchy of classes which Xapian can throw as exceptions.
int close(FD &fd)
Definition: fd.h:63
Utility functions for testing files.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:40
Glass changesets.
size_t check_glass_table(const char *tablename, string_view db_dir, int fd, off_t offset_, const GlassVersion &version_file, int opts, vector< Xapian::termcount > &doclens, ostream *out)
Check a glass table.
Definitions, types, etc for use inside glass.
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
GlassVersion class.
size_t check_honey_table(const char *tablename, string_view db_dir, int fd, off_t offset_, const HoneyVersion &version_file, int opts, vector< Xapian::termcount > &doclens, ostream *out)
Check a honey table.
Definitions, types, etc for use inside honey.
#define HONEY_TABLE_EXTENSION
Honey table extension.
Definition: honey_defs.h:29
HoneyVersion class.
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
const int DBCHECK_FIX
Fix problems.
Definition: constants.h:238
static size_t check_stub(const string &stub_path, int opts, std::ostream *out)
Definition: dbcheck.cc:419
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
#define S_ISREG(ST_MODE)
Definition: safesysstat.h:59
#define S_ISDIR(ST_MODE)
Definition: safesysstat.h:56
Various handy string-related helpers.
bool endswith(std::string_view s, char sfx)
Definition: stringutils.h:80
char C_tolower(char ch)
Definition: stringutils.h:226
Definition: header.h:215
static const char * opts