xapian-core  1.4.25
glass_version.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2006,2007,2008,2009,2010,2013,2014,2015,2016,2017,2024 Olly Betts
5  * Copyright (C) 2011 Dan Colish
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "glass_version.h"
25 
26 #include "debuglog.h"
27 #include "fd.h"
28 #include "io_utils.h"
29 #include "omassert.h"
30 #include "pack.h"
31 #include "posixy_wrapper.h"
32 #include "stringutils.h" // For STRINGIZE() and CONST_STRLEN().
33 
34 #include <cerrno>
35 #include <cstring> // For memcmp().
36 #include <string>
37 #include <sys/types.h>
38 #include "safesysstat.h"
39 #include "safefcntl.h"
40 #include "safeunistd.h"
41 #include "str.h"
42 #include "stringutils.h"
43 
44 #include "backends/uuids.h"
45 
46 #include "xapian/constants.h"
47 #include "xapian/error.h"
48 
49 using namespace std;
50 
52 #define GLASS_FORMAT_VERSION DATE_TO_VERSION(2016,03,14)
53 // 2016,03,14 1.3.5 compress_min in version file; partly eliminate component_of
54 // 2015,12,24 1.3.4 2 bytes "components_of" per item eliminated, and much more
55 // 2014,11,21 1.3.2 Brass renamed to Glass
56 
58 #define DATE_TO_VERSION(Y,M,D) \
59  ((unsigned(Y) - 2014) << 9 | unsigned(M) << 5 | unsigned(D))
60 #define VERSION_TO_YEAR(V) ((unsigned(V) >> 9) + 2014)
61 #define VERSION_TO_MONTH(V) ((unsigned(V) >> 5) & 0x0f)
62 #define VERSION_TO_DAY(V) (unsigned(V) & 0x1f)
63 
64 #define GLASS_VERSION_MAGIC_LEN 14
65 #define GLASS_VERSION_MAGIC_AND_VERSION_LEN 16
66 
68  '\x0f', '\x0d', 'X', 'a', 'p', 'i', 'a', 'n', ' ', 'G', 'l', 'a', 's', 's',
69  char((GLASS_FORMAT_VERSION >> 8) & 0xff), char(GLASS_FORMAT_VERSION & 0xff)
70 };
71 
73  : rev(0), fd(fd_), offset(0), db_dir(), changes(NULL),
74  doccount(0), total_doclen(0), last_docid(0),
75  doclen_lbound(0), doclen_ubound(0),
76  wdf_ubound(0), spelling_wordfreq_ubound(0),
77  oldest_changeset(0)
78 {
79  offset = lseek(fd, 0, SEEK_CUR);
80  if (rare(offset < 0)) {
81  string msg = "lseek failed on file descriptor ";
82  msg += str(fd);
83  throw Xapian::DatabaseOpeningError(msg, errno);
84  }
85 }
86 
88 {
89  // Either this is a single-file database, or this fd is from opening a new
90  // version file in write(), but sync() was never called.
91  if (fd != -1)
92  (void)::close(fd);
93 }
94 
95 void
97 {
98  LOGCALL_VOID(DB, "GlassVersion::read", NO_ARGS);
99  FD close_fd(-1);
100  int fd_in;
101  if (single_file()) {
102  if (rare(lseek(fd, offset, SEEK_SET) < 0)) {
103  string msg = "Failed to rewind file descriptor ";
104  msg += str(fd);
105  throw Xapian::DatabaseOpeningError(msg, errno);
106  }
107  fd_in = fd;
108  } else {
109  string filename = db_dir;
110  filename += "/iamglass";
111  fd_in = posixy_open(filename.c_str(), O_RDONLY|O_BINARY);
112  if (rare(fd_in < 0)) {
113  string msg = filename;
114  msg += ": Failed to open glass revision file for reading";
115  if (errno == ENOENT || errno == ENOTDIR) {
116  throw Xapian::DatabaseNotFoundError(msg, errno);
117  }
118  throw Xapian::DatabaseOpeningError(msg, errno);
119  }
120  close_fd = fd_in;
121  }
122 
123  char buf[256];
124 
125  const char * p = buf;
126  const char * end = p + io_read(fd_in, buf, sizeof(buf), 33);
127 
128  if (memcmp(buf, GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_LEN) != 0)
129  throw Xapian::DatabaseCorruptError("Rev file magic incorrect");
130 
131  unsigned version;
132  version = static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN]);
133  version <<= 8;
134  version |= static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN + 1]);
135  if (version != GLASS_FORMAT_VERSION) {
136  string msg;
137  if (!single_file()) {
138  msg = db_dir;
139  msg += ": ";
140  }
141  msg += "Database is format version ";
142  msg += str(VERSION_TO_YEAR(version) * 10000 +
143  VERSION_TO_MONTH(version) * 100 +
144  VERSION_TO_DAY(version));
145  msg += " but I only understand ";
146  msg += str(VERSION_TO_YEAR(GLASS_FORMAT_VERSION) * 10000 +
149  throw Xapian::DatabaseVersionError(msg);
150  }
151 
153  uuid.assign(p);
154  p += uuid.BINARY_SIZE;
155 
156  if (!unpack_uint(&p, end, &rev))
157  throw Xapian::DatabaseCorruptError("Rev file failed to decode revision");
158 
159  for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
160  if (!root[table_no].unserialise(&p, end)) {
161  throw Xapian::DatabaseCorruptError("Rev file root_info missing");
162  }
163  old_root[table_no] = root[table_no];
164  }
165 
166  // For a single-file database, this will assign extra data. We read
167  // sizeof(buf) above, then skip GLASS_VERSION_MAGIC_AND_VERSION_LEN,
168  // then 16, then the size of the serialised root info.
169  serialised_stats.assign(p, end);
171 }
172 
173 void
175 {
176  serialised_stats.resize(0);
178  // last_docid must always be >= doccount.
182  // doclen_ubound should always be >= wdf_ubound, so we store the
183  // difference as it may encode smaller. wdf_ubound is likely to
184  // be larger than doclen_lbound.
189 }
190 
191 void
193 {
194  const char * p = serialised_stats.data();
195  const char * end = p + serialised_stats.size();
196  if (p == end) {
197  doccount = 0;
198  total_doclen = 0;
199  last_docid = 0;
200  doclen_lbound = 0;
201  doclen_ubound = 0;
202  wdf_ubound = 0;
203  oldest_changeset = 0;
205  return;
206  }
207 
208  if (!unpack_uint(&p, end, &doccount) ||
209  !unpack_uint(&p, end, &last_docid) ||
210  !unpack_uint(&p, end, &doclen_lbound) ||
211  !unpack_uint(&p, end, &wdf_ubound) ||
212  !unpack_uint(&p, end, &doclen_ubound) ||
213  !unpack_uint(&p, end, &oldest_changeset) ||
214  !unpack_uint(&p, end, &total_doclen) ||
216  const char * m = p ?
217  "Bad serialised DB stats (overflowed)" :
218  "Bad serialised DB stats (out of data)";
220  }
221 
222  // In the single-file DB case, there will be extra data in
223  // serialised_stats, so suppress this check.
224  if (p != end && !single_file())
225  throw Xapian::DatabaseCorruptError("Rev file has junk at end");
226 
227  // last_docid must always be >= doccount.
228  last_docid += doccount;
229  // doclen_ubound should always be >= wdf_ubound, so we store the
230  // difference as it may encode smaller. wdf_ubound is likely to
231  // be larger than doclen_lbound.
233 }
234 
235 void
237 {
238  doccount += o.get_doccount();
239  if (doccount < o.get_doccount()) {
240  throw Xapian::DatabaseError("doccount overflowed!");
241  }
242 
243  Xapian::termcount o_doclen_lbound = o.get_doclength_lower_bound();
244  if (o_doclen_lbound > 0) {
245  if (doclen_lbound == 0 || o_doclen_lbound < doclen_lbound)
246  doclen_lbound = o_doclen_lbound;
247  }
248 
250  wdf_ubound = max(wdf_ubound, o.get_wdf_upper_bound());
252  if (total_doclen < o.get_total_doclen()) {
253  throw Xapian::DatabaseError("Total document length overflowed!");
254  }
255 
256  // The upper bounds might be on the same word, so we must sum them.
258 }
259 
260 void
262 {
263  LOGCALL_VOID(DB, "GlassVersion::cancel", NO_ARGS);
264  for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
265  root[table_no] = old_root[table_no];
266  }
268 }
269 
270 const string
272 {
273  LOGCALL(DB, const string, "GlassVersion::write", new_rev|flags);
274 
276  s.append(uuid.data(), uuid.BINARY_SIZE);
277 
278  pack_uint(s, new_rev);
279 
280  for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
281  root[table_no].serialise(s);
282  }
283 
284  // Serialise database statistics.
285  serialise_stats();
286  s += serialised_stats;
287 
288  string tmpfile;
289  if (!single_file()) {
290  tmpfile = db_dir;
291  // In dangerous mode, just write the new version file in place.
292  if (flags & Xapian::DB_DANGEROUS)
293  tmpfile += "/iamglass";
294  else
295  tmpfile += "/v.tmp";
296 
297  fd = posixy_open(tmpfile.c_str(), O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666);
298  if (rare(fd < 0))
299  throw Xapian::DatabaseOpeningError("Couldn't write new rev file: " + tmpfile,
300  errno);
301 
302  if (flags & Xapian::DB_DANGEROUS)
303  tmpfile = string();
304  }
305 
306  try {
307  io_write(fd, s.data(), s.size());
308  } catch (...) {
309  if (!single_file())
310  (void)close(fd);
311  throw;
312  }
313 
314  if (changes) {
315  string changes_buf;
316  changes_buf += '\xfe';
317  pack_uint(changes_buf, new_rev);
318  pack_uint(changes_buf, s.size());
319  changes->write_block(changes_buf);
320  changes->write_block(s);
321  }
322 
323  RETURN(tmpfile);
324 }
325 
326 bool
327 GlassVersion::sync(const string & tmpfile,
328  glass_revision_number_t new_rev, int flags)
329 {
330  Assert(new_rev > rev || rev == 0);
331 
332  if (single_file()) {
333  if ((flags & Xapian::DB_NO_SYNC) == 0 &&
334  ((flags & Xapian::DB_FULL_SYNC) ?
335  !io_full_sync(fd) :
336  !io_sync(fd))) {
337  // FIXME what to do?
338  }
339  } else {
340  int fd_to_close = fd;
341  fd = -1;
342  if ((flags & Xapian::DB_NO_SYNC) == 0 &&
343  ((flags & Xapian::DB_FULL_SYNC) ?
344  !io_full_sync(fd_to_close) :
345  !io_sync(fd_to_close))) {
346  int save_errno = errno;
347  (void)close(fd_to_close);
348  if (!tmpfile.empty())
349  (void)unlink(tmpfile.c_str());
350  errno = save_errno;
351  return false;
352  }
353 
354  if (close(fd_to_close) != 0) {
355  if (!tmpfile.empty()) {
356  int save_errno = errno;
357  (void)unlink(tmpfile.c_str());
358  errno = save_errno;
359  }
360  return false;
361  }
362 
363  if (!tmpfile.empty()) {
364  if (!io_tmp_rename(tmpfile, db_dir + "/iamglass")) {
365  return false;
366  }
367  }
368  }
369 
370  for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
371  old_root[table_no] = root[table_no];
372  }
373 
374  rev = new_rev;
375  return true;
376 }
377 
378 /* Only try to compress tags strictly longer than this many bytes.
379  *
380  * This can theoretically usefully be set as low as 4, but in practical terms
381  * zlib can't compress in very many cases for short inputs and even when it can
382  * the savings are small, so we default to a higher threshold to save CPU time
383  * for marginal size reductions.
384  */
385 const size_t COMPRESS_MIN = 18;
386 
387 static const uint4 compress_min_tab[] = {
388  0, // POSTLIST
389  COMPRESS_MIN, // DOCDATA
390  COMPRESS_MIN, // TERMLIST
391  0, // POSITION
392  COMPRESS_MIN, // SPELLING
393  COMPRESS_MIN // SYNONYM
394 };
395 
396 void
397 GlassVersion::create(unsigned blocksize)
398 {
399  AssertRel(blocksize,>=,2048);
400  uuid.generate();
401  for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
402  root[table_no].init(blocksize, compress_min_tab[table_no]);
403  }
404 }
405 
406 namespace Glass {
407 
408 void
409 RootInfo::init(unsigned blocksize_, uint4 compress_min_)
410 {
411  AssertRel(blocksize_,>=,2048);
412  root = 0;
413  level = 0;
414  num_entries = 0;
415  root_is_fake = true;
416  sequential = true;
417  blocksize = blocksize_;
418  compress_min = compress_min_;
419  fl_serialised.resize(0);
420 }
421 
422 void
423 RootInfo::serialise(string &s) const
424 {
425  pack_uint(s, root);
426  unsigned val = level << 2;
427  if (sequential) val |= 0x02;
428  if (root_is_fake) val |= 0x01;
429  pack_uint(s, val);
430  pack_uint(s, num_entries);
431  pack_uint(s, blocksize >> 11);
432  pack_uint(s, compress_min);
433  pack_string(s, fl_serialised);
434 }
435 
436 bool
437 RootInfo::unserialise(const char ** p, const char * end)
438 {
439  unsigned val, b;
440  if (!unpack_uint(p, end, &root) ||
441  !unpack_uint(p, end, &val) ||
442  !unpack_uint(p, end, &num_entries) ||
443  !unpack_uint(p, end, &b) ||
444  !unpack_uint(p, end, &compress_min) ||
445  !unpack_string(p, end, fl_serialised)) return false;
446  auto level_ = val >> 2;
447  if (rare(level_ >= GLASS_BTREE_CURSOR_LEVELS))
448  throw Xapian::DatabaseCorruptError("Impossibly deep Btree");
449  level = level_;
450  sequential = val & 0x02;
451  root_is_fake = val & 0x01;
452 
453  b <<= 11;
454  if (rare(b < GLASS_MIN_BLOCKSIZE ||
455  b > GLASS_MAX_BLOCKSIZE ||
456  (b & (b - 1)) != 0)) {
457  throw Xapian::DatabaseCorruptError("Invalid block size");
458  }
459  blocksize = b;
460 
461  // Map old default to new default.
462  if (compress_min == 4) {
463  compress_min = COMPRESS_MIN;
464  }
465 
466  return true;
467 }
468 
469 }
#define GLASS_VERSION_MAGIC_LEN
int close(FD &fd)
Definition: fd.h:63
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
Xapian::termcount get_doclength_upper_bound() const
GlassVersion class.
void create(unsigned blocksize)
Create the version file.
bool sync(const std::string &tmpfile, glass_revision_number_t new_rev, int flags)
void io_write(int fd, const char *p, size_t n)
Write n bytes from block pointed to by p to file descriptor fd.
Definition: io_utils.cc:145
glass_revision_number_t oldest_changeset
Oldest changeset removed when max_changesets is set.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
Xapian::termcount wdf_ubound
An upper bound on the greatest wdf in this database.
off_t offset
Offset into the file at which the version data starts.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define VERSION_TO_YEAR(V)
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
Uuid uuid
The UUID of this database.
RootInfo root[Glass::MAX_]
Definition: glass_version.h:97
#define GLASS_MIN_BLOCKSIZE
Minimum B-tree block size.
Definition: glass_defs.h:33
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
bool io_sync(int fd)
Ensure all data previously written to file descriptor fd has been written to disk.
Definition: io_utils.h:73
Provides wrappers with POSIXy semantics.
Constants in the Xapian namespace.
The GlassVersion class manages the revision files.
Definition: glass_version.h:94
GlassVersion(const std::string &db_dir_)
void assign(const char *p)
Definition: uuids.h:64
#define O_BINARY
Definition: safefcntl.h:81
const int DB_FULL_SYNC
Try to ensure changes are really written to disk.
Definition: constants.h:83
glass_revision_number_t rev
Definition: glass_version.h:95
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
GlassChanges * changes
STL namespace.
Convert types to std::string.
void serialise_stats()
#define VERSION_TO_MONTH(V)
static const char GLASS_VERSION_MAGIC[GLASS_VERSION_MAGIC_AND_VERSION_LEN]
#define rare(COND)
Definition: config.h:565
#define GLASS_FORMAT_VERSION
Glass format version (date of change):
include <sys/stat.h> with portability enhancements
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::termcount get_wdf_upper_bound() const
Xapian::totallength total_doclen
The total of the lengths of all documents in the database.
bool io_full_sync(int fd)
Definition: io_utils.h:88
void merge_stats(const GlassVersion &o)
Merge the database stats.
uint32_t uint4
Definition: internaltypes.h:32
Definition: fd.h:30
int fd
File descriptor.
Xapian::termcount get_doclength_lower_bound() const
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Wrapper class around a file descriptor to avoid leaks.
bool io_tmp_rename(const std::string &tmp_file, const std::string &real_file)
Rename a temporary file to its final position.
Definition: io_utils.cc:271
static const uint4 compress_min_tab[]
#define VERSION_TO_DAY(V)
size_t io_read(int fd, char *p, size_t n, size_t min)
Read n bytes (or until EOF) into block pointed to by p from file descriptor fd.
Definition: io_utils.cc:123
std::string serialised_stats
The serialised database stats.
Indicates an attempt to access a database not present.
Definition: error.h:1055
const int DB_DANGEROUS
Update the database in-place.
Definition: constants.h:103
void read()
Read the version file and check it&#39;s a version we understand.
DatabaseVersionError indicates that a database is in an unsupported format.
Definition: error.h:632
const int DB_NO_SYNC
Don&#39;t attempt to ensure changes have hit disk.
Definition: constants.h:66
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
RootInfo old_root[Glass::MAX_]
Definition: glass_version.h:98
#define GLASS_BTREE_CURSOR_LEVELS
Allow for this many levels in the B-tree.
Definition: glass_defs.h:43
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
void init(unsigned blocksize_, uint4 compress_min_)
const size_t COMPRESS_MIN
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Definition: pack.h:477
Xapian::termcount spelling_wordfreq_ubound
An upper bound on the spelling wordfreq in this database.
Xapian::totallength get_total_doclen() const
static constexpr unsigned BINARY_SIZE
The size of a UUID in bytes.
Definition: uuids.h:31
Xapian::termcount get_spelling_wordfreq_upper_bound() const
Pack types into strings and unpack them again.
Wrappers for low-level POSIX I/O routines.
void write_block(const char *p, size_t len)
Various handy helpers which std::string really should provide.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
#define posixy_open
Class for handling UUIDs.
<unistd.h>, but with compat.
Xapian::termcount doclen_ubound
An upper bound on the greatest document length in this database.
bool single_file() const
Xapian::docid last_docid
Greatest document id ever used in this database.
void unserialise_stats()
Various assertion macros.
const char * data() const
Definition: uuids.h:60
Xapian::termcount doclen_lbound
A lower bound on the smallest document length in this database.
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
Definition: pack.h:504
DatabaseError indicates some sort of database related error.
Definition: error.h:367
const std::string write(glass_revision_number_t new_rev, int flags)
#define GLASS_VERSION_MAGIC_AND_VERSION_LEN
#define GLASS_MAX_BLOCKSIZE
Maximum B-tree block size.
Definition: glass_defs.h:36
void serialise(std::string &s) const
std::string db_dir
The database directory.
include <fcntl.h>, but working around broken platforms.
Debug logging macros.
Xapian::doccount doccount
The number of documents in the database.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
void generate()
Definition: uuids.cc:63
Xapian::doccount get_doccount() const