xapian-core  1.4.22
dbcheck.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 #include "xapian/database.h"
25 
26 #include "xapian/constants.h"
27 #include "xapian/error.h"
28 
29 #ifdef XAPIAN_HAS_GLASS_BACKEND
30 #include "glass/glass_changes.h"
31 #include "glass/glass_dbcheck.h"
32 #include "glass/glass_version.h"
33 #endif
34 #ifdef XAPIAN_HAS_CHERT_BACKEND
35 #include "chert/chert_database.h"
36 #include "chert/chert_dbcheck.h"
37 #include "chert/chert_types.h"
38 #include "chert/chert_version.h"
39 #endif
40 
41 #include "backends.h"
42 #include "databasehelpers.h"
43 #include "filetests.h"
44 #include "omassert.h"
45 #include "stringutils.h"
46 
47 #include <ostream>
48 #include <stdexcept>
49 
50 using namespace std;
51 
52 #ifdef XAPIAN_HAS_GLASS_BACKEND
53 // Tables to check for a glass database. Note: it's important to check
54 // termlist before postlist so that we can cross-check the document lengths.
55 static const struct { char name[9]; } glass_tables[] = {
56  { "docdata" },
57  { "termlist" },
58  { "postlist" },
59  { "position" },
60  { "spelling" },
61  { "synonym" }
62 };
63 #endif
64 
65 // FIXME: We don't currently cross-check wdf between postlist and termlist.
66 // It's hard to see how to efficiently. We do cross-check doclens, but that
67 // "only" requires (4 * last_docid()) bytes.
68 
69 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
70 static void
71 reserve_doclens(vector<Xapian::termcount>& doclens, Xapian::docid last_docid,
72  ostream * out)
73 {
74  if (last_docid >= 0x40000000ul / sizeof(Xapian::termcount)) {
75  // The memory block needed by the vector would be >= 1GB.
76  if (out)
77  *out << "Cross-checking document lengths between the postlist and "
78  "termlist tables would use more than 1GB of memory, so "
79  "skipping that check" << endl;
80  return;
81  }
82  try {
83  doclens.reserve(last_docid + 1);
84  } catch (const std::bad_alloc &) {
85  // Failed to allocate the required memory.
86  if (out)
87  *out << "Couldn't allocate enough memory for cross-checking document "
88  "lengths between the postlist and termlist tables, so "
89  "skipping that check" << endl;
90  } catch (const std::length_error &) {
91  // There are too many elements for the vector to handle!
92  if (out)
93  *out << "Couldn't allocate enough elements for cross-checking document "
94  "lengths between the postlist and termlist tables, so "
95  "skipping that check" << endl;
96  }
97 }
98 #endif
99 
100 static size_t
101 check_db_dir(const string & path, int opts, std::ostream *out)
102 {
103  struct stat sb;
104  if (stat((path + "/iamchert").c_str(), &sb) == 0) {
105 #ifndef XAPIAN_HAS_CHERT_BACKEND
106  (void)opts;
107  (void)out;
108  throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
109 #else
110  // Check a whole chert database directory.
111  vector<Xapian::termcount> doclens;
112  size_t errors = 0;
113 
114  // If we can't read the doccount or last docid, set them to their
115  // maximum values to suppress errors.
117  Xapian::docid db_last_docid = CHERT_MAX_DOCID;
118 
120  chert_revision_number_t * rev_ptr = &rev;
121  try {
122  // Open at the lower level so we can get the revision number.
123  ChertDatabase db(path);
124  doccount = db.get_doccount();
125  db_last_docid = db.get_lastdocid();
126  reserve_doclens(doclens, db_last_docid, out);
127  rev = db.get_revision_number();
128  } catch (const Xapian::Error & e) {
129  // Ignore so we can check a database too broken to open.
130  if (out)
131  *out << "Database couldn't be opened for reading: "
132  << e.get_description()
133  << "\nContinuing check anyway" << endl;
134  ++errors;
135  }
136 
137  size_t pre_table_check_errors = errors;
138 
139  // Check all the btrees.
140  //
141  // Note: it's important to check "termlist" before "postlist" so
142  // that we can cross-check the document lengths; also we check
143  // "record" first as that's the last committed, so has the most
144  // reliable rootblock revision in DBCHECK_FIX mode.
145  static const struct { char name[9]; } tables[] = {
146  { "record" },
147  { "termlist" },
148  { "postlist" },
149  { "position" },
150  { "spelling" },
151  { "synonym" }
152  };
153  for (auto t : tables) {
154  const char * name = t.name;
155  if (out)
156  *out << name << ":\n";
157  if (strcmp(name, "record") != 0 && strcmp(name, "postlist") != 0) {
158  // Other tables are created lazily, so may not exist.
159  string table(path);
160  table += '/';
161  table += name;
162  table += ".DB";
163  if (!file_exists(table)) {
164  if (out) {
165  if (strcmp(name, "termlist") == 0) {
166  *out << "Not present.\n";
167  } else {
168  *out << "Lazily created, and not yet used.\n";
169  }
170  *out << endl;
171  }
172  continue;
173  }
174  }
175  errors += check_chert_table(name, path, rev_ptr, opts, doclens,
176  doccount, db_last_docid, out);
177  }
178 
179  if (errors == pre_table_check_errors && (opts & Xapian::DBCHECK_FIX)) {
180  // Check the version file is OK and if not, recreate it.
181  ChertVersion iam(path);
182  try {
183  iam.read_and_check();
184  } catch (const Xapian::DatabaseError &) {
185  iam.create();
186  }
187  }
188  return errors;
189 #endif
190  }
191 
192  if (stat((path + "/iamglass").c_str(), &sb) == 0) {
193 #ifndef XAPIAN_HAS_GLASS_BACKEND
194  (void)opts;
195  (void)out;
196  throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
197 #else
198  // Check a whole glass database directory.
199  vector<Xapian::termcount> doclens;
200  size_t errors = 0;
201 
202  try {
203  // Check if the database can actually be opened.
204  Xapian::Database db(path);
205  } catch (const Xapian::Error & e) {
206  // Continue - we can still usefully look at how it is broken.
207  if (out)
208  *out << "Database couldn't be opened for reading: "
209  << e.get_description()
210  << "\nContinuing check anyway" << endl;
211  ++errors;
212  }
213 
214  GlassVersion version_file(path);
215  version_file.read();
216  for (glass_revision_number_t r = version_file.get_revision(); r != 0; --r) {
217  string changes_file = path;
218  changes_file += "/changes";
219  changes_file += str(r);
220  if (file_exists(changes_file))
221  GlassChanges::check(changes_file);
222  }
223 
224  Xapian::docid doccount = version_file.get_doccount();
225  Xapian::docid db_last_docid = version_file.get_last_docid();
226  if (db_last_docid < doccount) {
227  if (out)
228  *out << "last_docid = " << db_last_docid << " < doccount = "
229  << doccount << endl;
230  ++errors;
231  }
232  reserve_doclens(doclens, db_last_docid, out);
233 
234  // Check all the tables.
235  for (auto t : glass_tables) {
236  errors += check_glass_table(t.name, path, version_file, opts,
237  doclens, out);
238  }
239  return errors;
240 #endif
241  }
242 
243  if (stat((path + "/iamflint").c_str(), &sb) == 0) {
244  // Flint is no longer supported as of Xapian 1.3.0.
245  throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
246  }
247 
248  if (stat((path + "/iambrass").c_str(), &sb) == 0) {
249  // Brass was renamed to glass as of Xapian 1.3.2.
250  throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
251  }
252 
253  if (stat((path + "/record_DB").c_str(), &sb) == 0) {
254  // Quartz is no longer supported as of Xapian 1.1.0.
255  throw Xapian::FeatureUnavailableError("Quartz database support was removed in Xapian 1.1.0");
256  }
257 
259  "Directory does not contain a Xapian database");
260 }
261 
269 static size_t
270 check_db_table(const string& filename, int opts, std::ostream* out, int backend)
271 {
272  size_t p = filename.find_last_of(DIR_SEPS);
273  // If we found a directory separator, advance p to the next character. If
274  // we didn't, incrementing string::npos will give us 0, which is what we
275  // want.
276  ++p;
277 
278  string dir(filename, 0, p);
279 
280  string tablename;
281  while (p != filename.size()) {
282  char ch = filename[p++];
283  if (ch == '.') break;
284  tablename += C_tolower(ch);
285  }
286 
287 #if defined XAPIAN_HAS_CHERT_BACKEND || defined XAPIAN_HAS_GLASS_BACKEND
288  vector<Xapian::termcount> doclens;
289 #else
290  (void)opts;
291  (void)out;
292 #endif
293 
294  switch (backend) {
295  case BACKEND_GLASS: {
296 #ifndef XAPIAN_HAS_GLASS_BACKEND
297  auto msg = "Glass database support isn't enabled";
299 #else
300  GlassVersion version_file(dir);
301  version_file.read();
302  return check_glass_table(tablename.c_str(), dir, version_file, opts,
303  doclens, out);
304 #endif
305  }
306 
307  case BACKEND_CHERT:
308  break;
309 
310  default:
311  Assert(false);
312  break;
313  }
314 
315  // Flint and brass also used the extension ".DB", so check that we
316  // haven't been passed a single table in a flint or brass database.
317  struct stat sb;
318  if (stat((dir + "/iamflint").c_str(), &sb) == 0) {
319  // Flint is no longer supported as of Xapian 1.3.0.
320  throw Xapian::FeatureUnavailableError("Flint database support was removed in Xapian 1.3.0");
321  }
322  if (stat((dir + "/iambrass").c_str(), &sb) == 0) {
323  // Brass was renamed to glass as of Xapian 1.3.2.
324  throw Xapian::FeatureUnavailableError("Brass database support was removed in Xapian 1.3.2");
325  }
326 #ifndef XAPIAN_HAS_CHERT_BACKEND
327  throw Xapian::FeatureUnavailableError("Chert database support isn't enabled");
328 #else
329  // Set the doccount and the last docid to their maximum values to suppress
330  // errors.
331  return check_chert_table(tablename.c_str(), dir, NULL, opts, doclens,
333 #endif
334 }
335 
340 static size_t
341 check_db_fd(int fd, int opts, std::ostream* out, int backend)
342 {
343  if (backend == BACKEND_UNKNOWN) {
344  // FIXME: Actually probe.
345  backend = BACKEND_GLASS;
346  }
347 
348  size_t errors = 0;
349  switch (backend) {
350  case BACKEND_GLASS: {
351  // Check a single-file glass database.
352 #ifdef XAPIAN_HAS_GLASS_BACKEND
353  // GlassVersion's destructor will close fd.
354  GlassVersion version_file(fd);
355  version_file.read();
356 
357  Xapian::docid doccount = version_file.get_doccount();
358  Xapian::docid db_last_docid = version_file.get_last_docid();
359  if (db_last_docid < doccount) {
360  if (out)
361  *out << "last_docid = " << db_last_docid << " < doccount = "
362  << doccount << endl;
363  ++errors;
364  }
365  vector<Xapian::termcount> doclens;
366  reserve_doclens(doclens, db_last_docid, out);
367 
368  // Check all the tables.
369  for (auto t : glass_tables) {
370  errors += check_glass_table(t.name, fd, version_file.get_offset(),
371  version_file, opts, doclens,
372  out);
373  }
374  break;
375 #else
376  (void)opts;
377  (void)out;
378  ::close(fd);
379  throw Xapian::FeatureUnavailableError("Glass database support isn't enabled");
380 #endif
381  }
382  default:
383  Assert(false);
384  }
385  return errors;
386 }
387 
388 namespace Xapian {
389 
390 static size_t
391 check_stub(const string& stub_path, int opts, std::ostream* out)
392 {
393  size_t errors = 0;
394  read_stub_file(stub_path,
395  [&errors, opts, out](const string& path) {
396  errors += Database::check(path, opts, out);
397  },
398  [&errors, opts, out](const string& path) {
399  // FIXME: Doesn't check the database type is chert.
400  errors += Database::check(path, opts, out);
401  },
402  [&errors, opts, out](const string& path) {
403  // FIXME: Doesn't check the database type is glass.
404  errors += Database::check(path, opts, out);
405  },
406  [](const string&, const string&) {
407  auto msg = "Remote database checking not implemented";
408  throw Xapian::UnimplementedError(msg);
409  },
410  [](const string&, unsigned) {
411  auto msg = "Remote database checking not implemented";
412  throw Xapian::UnimplementedError(msg);
413  },
414  []() {
415  auto msg = "InMemory database checking not implemented";
416  throw Xapian::UnimplementedError(msg);
417  });
418  return errors;
419 }
420 
421 size_t
422 Database::check_(const string * path_ptr, int fd, int opts, std::ostream *out)
423 {
424  if (!out) {
425  // If we have nowhere to write output, then disable all the options
426  // which only affect what we output.
427  opts &= Xapian::DBCHECK_FIX;
428  }
429 
430  if (path_ptr == NULL) {
431  return check_db_fd(fd, opts, out, BACKEND_UNKNOWN);
432  }
433 
434  const string & path = *path_ptr;
435  struct stat sb;
436  if (stat(path.c_str(), &sb) == 0) {
437  if (S_ISDIR(sb.st_mode)) {
438  return check_db_dir(path, opts, out);
439  }
440 
441  if (S_ISREG(sb.st_mode)) {
442  int backend = test_if_single_file_db(sb, path, &fd);
443  if (backend != BACKEND_UNKNOWN) {
444  return check_db_fd(fd, opts, out, backend);
445  }
446  // Could be a single table or a stub database file. Look at the
447  // extension to determine the type.
448  if (endswith(path, ".DB")) {
449  // It could also be flint or brass, but we check for those below.
450  backend = BACKEND_CHERT;
451  } else if (endswith(path, "." GLASS_TABLE_EXTENSION)) {
452  backend = BACKEND_GLASS;
453  } else {
454  return check_stub(path, opts, out);
455  }
456 
457  return check_db_table(path, opts, out, backend);
458  }
459 
460  throw Xapian::DatabaseOpeningError("Not a regular file or directory");
461  }
462 
463  // The filename passed doesn't exist - see if it's the basename of the
464  // table (perhaps with "." after it), so the user can do xapian-check on
465  // "foo/termlist" or "foo/termlist." (which you would get from filename
466  // completion with older backends).
467  string filename = path;
468  if (endswith(filename, '.')) {
469  filename.resize(filename.size() - 1);
470  }
471 
472  int backend = BACKEND_UNKNOWN;
473  if (stat((filename + ".DB").c_str(), &sb) == 0) {
474  // It could also be flint or brass, but we check for those below.
475  backend = BACKEND_CHERT;
476  } else if (stat((filename + "." GLASS_TABLE_EXTENSION).c_str(), &sb) == 0) {
477  backend = BACKEND_GLASS;
478  } else {
479  auto msg = "Couldn't find Xapian database or table to check";
480  throw Xapian::DatabaseOpeningError(msg, ENOENT);
481  }
482 
483  return check_db_table(path, opts, out, backend);
484 }
485 
486 }
Xapian::docid get_lastdocid() const
Virtual methods of Database::Internal.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
int close(FD &fd)
Definition: fd.h:63
bool endswith(const std::string &s, char sfx)
Definition: stringutils.h:70
#define Assert(COND)
Definition: omassert.h:122
GlassVersion class.
static size_t check_stub(const string &stub_path, int opts, std::ostream *out)
Definition: dbcheck.cc:391
void read_and_check()
Read the version file and check it&#39;s a version we understand.
char C_tolower(char ch)
Definition: stringutils.h:178
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
This class is used to access a database, or a group of databases.
Definition: database.h:68
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
#define S_ISDIR(ST_MODE)
Definition: safesysstat.h:57
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:61
Xapian::doccount get_doccount() const
Virtual methods of Database::Internal.
Constants in the Xapian namespace.
The GlassVersion class manages the revision files.
Definition: glass_version.h:94
static const char * opts
#define DIR_SEPS
Definition: config.h:8
Helper functions for database handling.
chert_revision_number_t get_revision_number() const
Get an object holding the revision number which the tables are opened at.
static void reserve_doclens(vector< Xapian::termcount > &doclens, Xapian::docid last_docid, ostream *out)
Definition: dbcheck.cc:71
STL namespace.
static void check(const std::string &changes_file)
off_t get_offset() const
static size_t check_db_fd(int fd, int opts, std::ostream *out, int backend)
Check a single file DB from an fd.
Definition: dbcheck.cc:341
Utility functions for testing files.
Types used by chert backend and the Btree manager.
size_t check_glass_table(const char *tablename, const string &db_dir, int fd, off_t offset_, const GlassVersion &version_file, int opts, vector< Xapian::termcount > &doclens, ostream *out)
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
Xapian::docid get_last_docid() const
int test_if_single_file_db(const struct stat &sb, const string &path, int *fd_ptr)
Probe if a path is a single-file database.
Hierarchy of classes which Xapian can throw as exceptions.
The ChertVersion class manages the "iamchert" file.
Definition: chert_version.h:34
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
BACKEND_* constants.
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
Glass changesets.
#define S_ISREG(ST_MODE)
Definition: safesysstat.h:60
API for working with Xapian databases.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
#define CHERT_MAX_DOCID
The largest docid value supported by chert.
Definition: chert_types.h:60
void read_stub_file(const std::string &file, A1 action_auto, A2 action_chert, A3 action_glass, A4 action_remote_prog, A5 action_remote_tcp, A6 action_inmemory)
Open, read and process a stub database file.
const int DBCHECK_FIX
Fix problems.
Definition: constants.h:243
void read()
Read the version file and check it&#39;s a version we understand.
C++ class definition for chert database.
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
void create()
Create the version file.
ChertVersion class.
static size_t check_db_table(const string &filename, int opts, std::ostream *out, int backend)
Check a database table.
Definition: dbcheck.cc:270
size_t check_chert_table(const char *tablename, const string &dir, chert_revision_number_t *rev_ptr, int opts, vector< Xapian::termcount > &doclens, Xapian::doccount doccount, Xapian::docid db_last_docid, ostream *out)
static size_t check_db_dir(const string &path, int opts, std::ostream *out)
Definition: dbcheck.cc:101
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Check a chert table.
static const struct @3 glass_tables[]
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
Various handy helpers which std::string really should provide.
Definition: header.h:151
Various assertion macros.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
DatabaseError indicates some sort of database related error.
Definition: error.h:367
glass_revision_number_t get_revision() const
Check a glass table.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::doccount get_doccount() const