xapian-core  1.4.25
dbcheck.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 #include "xapian/database.h"
25 
26 #include "xapian/constants.h"
27 #include "xapian/error.h"
28 
29 #ifdef XAPIAN_HAS_GLASS_BACKEND
30 #include "glass/glass_changes.h"
31 #include "glass/glass_dbcheck.h"
32 #include "glass/glass_version.h"
33 #endif
34 #ifdef XAPIAN_HAS_CHERT_BACKEND
35 #include "chert/chert_database.h"
36 #include "chert/chert_dbcheck.h"
37 #include "chert/chert_types.h"
38 #include "chert/chert_version.h"
39 #endif
40 
41 #include "backends.h"
42 #include "databasehelpers.h"
43 #include "filetests.h"
44 #include "omassert.h"
45 #include "stringutils.h"
46 
47 #include <ostream>
48 #include <stdexcept>
49 
50 using namespace std;
51 
52 #ifdef XAPIAN_HAS_GLASS_BACKEND
53 // Tables to check for a glass database. Note: it's important to check
54 // termlist before postlist so that we can cross-check the document lengths.
55 static const struct { char name[9]; } glass_tables[] = {
56  { "docdata" },
57  { "termlist" },
58  { "postlist" },
59  { "position" },
60  { "spelling" },
61  { "synonym" }
62 };
63 #endif
64 
65 [[noreturn]]
66 static void
68 {
69  auto msg = "Couldn't find Xapian database or table to check";
70  throw Xapian::DatabaseOpeningError(msg, ENOENT);
71 }
72 
73 // FIXME: We don't currently cross-check wdf between postlist and termlist.
74 // It's hard to see how to efficiently. We do cross-check doclens, but that
75 // "only" requires (4 * last_docid()) bytes.
76 
77 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
78 static void
79 reserve_doclens(vector<Xapian::termcount>& doclens, Xapian::docid last_docid,
80  ostream * out)
81 {
82  if (last_docid >= 0x40000000ul / sizeof(Xapian::termcount)) {
83  // The memory block needed by the vector would be >= 1GB.
84  if (out)
85  *out << "Cross-checking document lengths between the postlist and "
86  "termlist tables would use more than 1GB of memory, so "
87  "skipping that check" << endl;
88  return;
89  }
90  try {
91  doclens.reserve(last_docid + 1);
92  } catch (const std::bad_alloc &) {
93  // Failed to allocate the required memory.
94  if (out)
95  *out << "Couldn't allocate enough memory for cross-checking document "
96  "lengths between the postlist and termlist tables, so "
97  "skipping that check" << endl;
98  } catch (const std::length_error &) {
99  // There are too many elements for the vector to handle!
100  if (out)
101  *out << "Couldn't allocate enough elements for cross-checking document "
102  "lengths between the postlist and termlist tables, so "
103  "skipping that check" << endl;
104  }
105 }
106 #endif
107 
108 static size_t
109 check_db_dir(const string & path, int opts, std::ostream *out)
110 {
111  struct stat sb;
112  if (stat((path + "/iamchert").c_str(), &sb) == 0) {
113 #ifndef XAPIAN_HAS_CHERT_BACKEND
114  (void)opts;
115  (void)out;
116  throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
117 #else
118  // Check a whole chert database directory.
119  vector<Xapian::termcount> doclens;
120  size_t errors = 0;
121 
122  // If we can't read the doccount or last docid, set them to their
123  // maximum values to suppress errors.
125  Xapian::docid db_last_docid = CHERT_MAX_DOCID;
126 
128  chert_revision_number_t * rev_ptr = &rev;
129  try {
130  // Open at the lower level so we can get the revision number.
131  ChertDatabase db(path);
132  doccount = db.get_doccount();
133  db_last_docid = db.get_lastdocid();
134  reserve_doclens(doclens, db_last_docid, out);
135  rev = db.get_revision_number();
136  } catch (const Xapian::Error & e) {
137  // Ignore so we can check a database too broken to open.
138  if (out)
139  *out << "Database couldn't be opened for reading: "
140  << e.get_description()
141  << "\nContinuing check anyway" << endl;
142  ++errors;
143  }
144 
145  size_t pre_table_check_errors = errors;
146 
147  // Check all the btrees.
148  //
149  // Note: it's important to check "termlist" before "postlist" so
150  // that we can cross-check the document lengths; also we check
151  // "record" first as that's the last committed, so has the most
152  // reliable rootblock revision in DBCHECK_FIX mode.
153  static const struct { char name[9]; } tables[] = {
154  { "record" },
155  { "termlist" },
156  { "postlist" },
157  { "position" },
158  { "spelling" },
159  { "synonym" }
160  };
161  for (auto t : tables) {
162  const char * name = t.name;
163  if (out)
164  *out << name << ":\n";
165  if (strcmp(name, "record") != 0 && strcmp(name, "postlist") != 0) {
166  // Other tables are created lazily, so may not exist.
167  string table(path);
168  table += '/';
169  table += name;
170  table += ".DB";
171  if (!file_exists(table)) {
172  if (out) {
173  if (strcmp(name, "termlist") == 0) {
174  *out << "Not present.\n";
175  } else {
176  *out << "Lazily created, and not yet used.\n";
177  }
178  *out << endl;
179  }
180  continue;
181  }
182  }
183  errors += check_chert_table(name, path, rev_ptr, opts, doclens,
184  doccount, db_last_docid, out);
185  }
186 
187  if (errors == pre_table_check_errors && (opts & Xapian::DBCHECK_FIX)) {
188  // Check the version file is OK and if not, recreate it.
189  ChertVersion iam(path);
190  try {
191  iam.read_and_check();
192  } catch (const Xapian::DatabaseError &) {
193  iam.create();
194  }
195  }
196  return errors;
197 #endif
198  }
199 
200  if (stat((path + "/iamglass").c_str(), &sb) == 0) {
201 #ifndef XAPIAN_HAS_GLASS_BACKEND
202  (void)opts;
203  (void)out;
204  throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
205 #else
206  // Check a whole glass database directory.
207  vector<Xapian::termcount> doclens;
208  size_t errors = 0;
209 
210  try {
211  // Check if the database can actually be opened.
212  Xapian::Database db(path);
213  } catch (const Xapian::Error & e) {
214  // Continue - we can still usefully look at how it is broken.
215  if (out)
216  *out << "Database couldn't be opened for reading: "
217  << e.get_description()
218  << "\nContinuing check anyway" << endl;
219  ++errors;
220  }
221 
222  GlassVersion version_file(path);
223  version_file.read();
224  for (glass_revision_number_t r = version_file.get_revision(); r != 0; --r) {
225  string changes_file = path;
226  changes_file += "/changes";
227  changes_file += str(r);
228  if (file_exists(changes_file))
229  GlassChanges::check(changes_file);
230  }
231 
232  Xapian::docid doccount = version_file.get_doccount();
233  Xapian::docid db_last_docid = version_file.get_last_docid();
234  if (db_last_docid < doccount) {
235  if (out)
236  *out << "last_docid = " << db_last_docid << " < doccount = "
237  << doccount << endl;
238  ++errors;
239  }
240  reserve_doclens(doclens, db_last_docid, out);
241 
242  // Check all the tables.
243  for (auto t : glass_tables) {
244  errors += check_glass_table(t.name, path, version_file, opts,
245  doclens, out);
246  }
247  return errors;
248 #endif
249  }
250 
251  if (stat((path + "/iamflint").c_str(), &sb) == 0) {
252  // Flint is no longer supported as of Xapian 1.3.0.
253  throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
254  }
255 
256  if (stat((path + "/iambrass").c_str(), &sb) == 0) {
257  // Brass was renamed to glass as of Xapian 1.3.2.
258  throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
259  }
260 
261  if (stat((path + "/record_DB").c_str(), &sb) == 0) {
262  // Quartz is no longer supported as of Xapian 1.1.0.
263  throw Xapian::FeatureUnavailableError("Quartz database support was removed in Xapian 1.1.0");
264  }
265 
267  "Directory does not contain a Xapian database");
268 }
269 
277 static size_t
278 check_db_table(const string& filename, int opts, std::ostream* out, int backend)
279 {
280  size_t p = filename.find_last_of(DIR_SEPS);
281  // If we found a directory separator, advance p to the next character. If
282  // we didn't, incrementing string::npos will give us 0, which is what we
283  // want.
284  ++p;
285 
286  string dir(filename, 0, p);
287 
288  string tablename;
289  while (p != filename.size()) {
290  char ch = filename[p++];
291  if (ch == '.') break;
292  tablename += C_tolower(ch);
293  }
294 
295 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
296  vector<Xapian::termcount> doclens;
297 #else
298  (void)opts;
299  (void)out;
300 #endif
301 
302  switch (backend) {
303  case BACKEND_GLASS: {
304 #ifndef XAPIAN_HAS_GLASS_BACKEND
305  auto msg = "Glass database support isn't enabled";
307 #else
308  GlassVersion version_file(dir);
309  version_file.read();
310  return check_glass_table(tablename.c_str(), dir, version_file, opts,
311  doclens, out);
312 #endif
313  }
314 
315  case BACKEND_CHERT:
316  break;
317 
318  default:
319  Assert(false);
320  break;
321  }
322 
323  // Flint and brass also used the extension ".DB", so check that we
324  // haven't been passed a single table in a flint or brass database.
325  struct stat sb;
326  if (stat((dir + "/iamflint").c_str(), &sb) == 0) {
327  // Flint is no longer supported as of Xapian 1.3.0.
328  throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
329  }
330  if (stat((dir + "/iambrass").c_str(), &sb) == 0) {
331  // Brass was renamed to glass as of Xapian 1.3.2.
332  throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
333  }
334 #ifndef XAPIAN_HAS_CHERT_BACKEND
335  throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
336 #else
337  // Set the doccount and the last docid to their maximum values to suppress
338  // errors.
339  return check_chert_table(tablename.c_str(), dir, NULL, opts, doclens,
341 #endif
342 }
343 
348 static size_t
349 check_db_fd(int fd, int opts, std::ostream* out, int backend)
350 {
351  if (backend == BACKEND_UNKNOWN) {
352  // FIXME: Actually probe.
353  backend = BACKEND_GLASS;
354  }
355 
356  size_t errors = 0;
357  switch (backend) {
358  case BACKEND_GLASS: {
359  // Check a single-file glass database.
360 #ifdef XAPIAN_HAS_GLASS_BACKEND
361  // GlassVersion's destructor will close fd.
362  GlassVersion version_file(fd);
363  version_file.read();
364 
365  Xapian::docid doccount = version_file.get_doccount();
366  Xapian::docid db_last_docid = version_file.get_last_docid();
367  if (db_last_docid < doccount) {
368  if (out)
369  *out << "last_docid = " << db_last_docid << " < doccount = "
370  << doccount << endl;
371  ++errors;
372  }
373  vector<Xapian::termcount> doclens;
374  reserve_doclens(doclens, db_last_docid, out);
375 
376  // Check all the tables.
377  for (auto t : glass_tables) {
378  errors += check_glass_table(t.name, fd, version_file.get_offset(),
379  version_file, opts, doclens,
380  out);
381  }
382  break;
383 #else
384  (void)opts;
385  (void)out;
386  ::close(fd);
387  throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
388 #endif
389  }
390  default:
391  Assert(false);
392  }
393  return errors;
394 }
395 
396 namespace Xapian {
397 
398 static size_t
399 check_stub(const string& stub_path, int opts, std::ostream* out)
400 {
401  size_t errors = 0;
402  read_stub_file(stub_path,
403  [&errors, opts, out](const string& path) {
404  errors += Database::check(path, opts, out);
405  },
406  [&errors, opts, out](const string& path) {
407  // FIXME: Doesn't check the database type is chert.
408  errors += Database::check(path, opts, out);
409  },
410  [&errors, opts, out](const string& path) {
411  // FIXME: Doesn't check the database type is glass.
412  errors += Database::check(path, opts, out);
413  },
414  [](const string&, const string&) {
415  auto msg = "Remote database checking not implemented";
416  throw Xapian::UnimplementedError(msg);
417  },
418  [](const string&, unsigned) {
419  auto msg = "Remote database checking not implemented";
420  throw Xapian::UnimplementedError(msg);
421  },
422  []() {
423  auto msg = "InMemory database checking not implemented";
424  throw Xapian::UnimplementedError(msg);
425  });
426  return errors;
427 }
428 
429 size_t
430 Database::check_(const string * path_ptr, int fd, int opts, std::ostream *out)
431 {
432  if (!out) {
433  // If we have nowhere to write output, then disable all the options
434  // which only affect what we output.
435  opts &= Xapian::DBCHECK_FIX;
436  }
437 
438  if (path_ptr == NULL) {
439  return check_db_fd(fd, opts, out, BACKEND_UNKNOWN);
440  }
441 
442  const string & path = *path_ptr;
443  if (path.empty()) throw_no_db_to_check();
444  struct stat sb;
445  if (stat(path.c_str(), &sb) == 0) {
446  if (S_ISDIR(sb.st_mode)) {
447  return check_db_dir(path, opts, out);
448  }
449 
450  if (S_ISREG(sb.st_mode)) {
451  int backend = test_if_single_file_db(sb, path, &fd);
452  if (backend != BACKEND_UNKNOWN) {
453  return check_db_fd(fd, opts, out, backend);
454  }
455  // Could be a single table or a stub database file. Look at the
456  // extension to determine the type.
457  if (endswith(path, ".DB")) {
458  // It could also be flint or brass, but we check for those below.
459  backend = BACKEND_CHERT;
460  } else if (endswith(path, "." GLASS_TABLE_EXTENSION)) {
461  backend = BACKEND_GLASS;
462  } else {
463  return check_stub(path, opts, out);
464  }
465 
466  return check_db_table(path, opts, out, backend);
467  }
468 
469  throw Xapian::DatabaseOpeningError("Not a regular file or directory");
470  }
471 
472  // The filename passed doesn't exist - see if it's the basename of the
473  // table (perhaps with "." after it), so the user can do xapian-check on
474  // "foo/termlist" or "foo/termlist." (which you would get from filename
475  // completion with older backends).
476  string filename = path;
477  if (endswith(filename, '.')) {
478  filename.resize(filename.size() - 1);
479  }
480 
481  int backend = BACKEND_UNKNOWN;
482  if (stat((filename + ".DB").c_str(), &sb) == 0) {
483  // It could also be flint or brass, but we check for those below.
484  backend = BACKEND_CHERT;
485  } else if (stat((filename + "." GLASS_TABLE_EXTENSION).c_str(), &sb) == 0) {
486  backend = BACKEND_GLASS;
487  } else {
489  }
490 
491  return check_db_table(path, opts, out, backend);
492 }
493 
494 }
Xapian::docid get_lastdocid() const
Virtual methods of Database::Internal.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
int close(FD &fd)
Definition: fd.h:63
bool endswith(const std::string &s, char sfx)
Definition: stringutils.h:75
#define Assert(COND)
Definition: omassert.h:122
GlassVersion class.
static size_t check_stub(const string &stub_path, int opts, std::ostream *out)
Definition: dbcheck.cc:399
void read_and_check()
Read the version file and check it&#39;s a version we understand.
char C_tolower(char ch)
Definition: stringutils.h:221
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
This class is used to access a database, or a group of databases.
Definition: database.h:68
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
#define S_ISDIR(ST_MODE)
Definition: safesysstat.h:57
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
Xapian::doccount get_doccount() const
Virtual methods of Database::Internal.
Constants in the Xapian namespace.
The GlassVersion class manages the revision files.
Definition: glass_version.h:94
static const char * opts
#define DIR_SEPS
Definition: config.h:8
Helper functions for database handling.
chert_revision_number_t get_revision_number() const
Get an object holding the revision number which the tables are opened at.
static void reserve_doclens(vector< Xapian::termcount > &doclens, Xapian::docid last_docid, ostream *out)
Definition: dbcheck.cc:79
STL namespace.
static void check(const std::string &changes_file)
off_t get_offset() const
static size_t check_db_fd(int fd, int opts, std::ostream *out, int backend)
Check a single file DB from an fd.
Definition: dbcheck.cc:349
Utility functions for testing files.
Types used by chert backend and the Btree manager.
size_t check_glass_table(const char *tablename, const string &db_dir, int fd, off_t offset_, const GlassVersion &version_file, int opts, vector< Xapian::termcount > &doclens, ostream *out)
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
Xapian::docid get_last_docid() const
int test_if_single_file_db(const struct stat &sb, const string &path, int *fd_ptr)
Probe if a path is a single-file database.
Hierarchy of classes which Xapian can throw as exceptions.
The ChertVersion class manages the "iamchert" file.
Definition: chert_version.h:34
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
BACKEND_* constants.
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
Glass changesets.
#define S_ISREG(ST_MODE)
Definition: safesysstat.h:60
API for working with Xapian databases.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
#define CHERT_MAX_DOCID
The largest docid value supported by chert.
Definition: chert_types.h:60
void read_stub_file(const std::string &file, A1 action_auto, A2 action_chert, A3 action_glass, A4 action_remote_prog, A5 action_remote_tcp, A6 action_inmemory)
Open, read and process a stub database file.
const int DBCHECK_FIX
Fix problems.
Definition: constants.h:243
void read()
Read the version file and check it&#39;s a version we understand.
C++ class definition for chert database.
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
void create()
Create the version file.
ChertVersion class.
static size_t check_db_table(const string &filename, int opts, std::ostream *out, int backend)
Check a database table.
Definition: dbcheck.cc:278
size_t check_chert_table(const char *tablename, const string &dir, chert_revision_number_t *rev_ptr, int opts, vector< Xapian::termcount > &doclens, Xapian::doccount doccount, Xapian::docid db_last_docid, ostream *out)
static size_t check_db_dir(const string &path, int opts, std::ostream *out)
Definition: dbcheck.cc:109
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Check a chert table.
static const struct @3 glass_tables[]
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
Various handy helpers which std::string really should provide.
Definition: header.h:151
Various assertion macros.
static void throw_no_db_to_check()
Definition: dbcheck.cc:67
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
DatabaseError indicates some sort of database related error.
Definition: error.h:367
glass_revision_number_t get_revision() const
Check a glass table.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::doccount get_doccount() const