xapian-core  1.4.26
chert_btreebase.cc
Go to the documentation of this file.
1 /* chert_btreebase.cc: Btree base file implementation
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002,2003,2004,2006,2008,2009,2011,2014 Olly Betts
5  * Copyright 2010 Richard Boulton
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include <xapian/error.h>
26 
27 #include "chert_btreebase.h"
28 #include "errno_to_string.h"
29 #include "fd.h"
30 #include "io_utils.h"
31 #include "omassert.h"
32 #include "pack.h"
33 #include "posixy_wrapper.h"
34 #include "str.h"
35 
36 #include <algorithm>
37 #include <cerrno>
38 #include <climits>
39 #include <cstring>
40 
41 using namespace std;
42 
43 /************ Base file parameters ************/
44 
72 #define CURR_FORMAT 5U
73 
75  : revision(0),
76  block_size(0),
77  root(0),
78  level(0),
79  bit_map_size(0),
80  item_count(0),
81  last_block(0),
82  have_fakeroot(false),
83  sequential(false),
84  bit_map_low(0),
85  bit_map0(0),
86  bit_map(0)
87 {
88 }
89 
90 void
92 {
93  std::swap(revision, other.revision);
94  std::swap(block_size, other.block_size);
95  std::swap(root, other.root);
96  std::swap(level, other.level);
97  std::swap(bit_map_size, other.bit_map_size);
98  std::swap(item_count, other.item_count);
99  std::swap(last_block, other.last_block);
100  std::swap(have_fakeroot, other.have_fakeroot);
101  std::swap(sequential, other.sequential);
102  std::swap(bit_map_low, other.bit_map_low);
103  std::swap(bit_map0, other.bit_map0);
104  std::swap(bit_map, other.bit_map);
105 }
106 
108 {
109  delete [] bit_map;
110  bit_map = 0;
111  delete [] bit_map0;
112  bit_map0 = 0;
113 }
114 
116 static bool
117 do_unpack_uint(const char **start, const char *end,
118  uint4 *dest, string &err_msg,
119  const string &basename,
120  const char *varname)
121 {
122  bool result = unpack_uint(start, end, dest);
123  if (rare(!result)) {
124  err_msg += "Unable to read ";
125  err_msg += varname;
126  err_msg += " from ";
127  err_msg += basename;
128  err_msg += '\n';
129  }
130  return result;
131 }
132 
133 static bool
134 do_unpack_uint(const char **start, const char *end,
135  chert_tablesize_t *dest, string &err_msg,
136  const string &basename,
137  const char *varname)
138 {
139  bool result = unpack_uint(start, end, dest);
140  if (rare(!result)) {
141  err_msg += "Unable to read ";
142  err_msg += varname;
143  err_msg += " from ";
144  err_msg += basename;
145  err_msg += '\n';
146  }
147  return result;
148 }
149 
150 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
151 do { \
152  if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
153  return false; \
154  } \
155 } while (0)
156 
157 /* How much of the base file to read at the first go (in bytes).
158  * This must be big enough that the base file without bitmap
159  * will fit in to this size with no problem. Other than that
160  * it's fairly arbitrary, but shouldn't be big enough to cause
161  * serious memory problems!
162  */
163 #define REASONABLE_BASE_SIZE 1024
164 
165 bool
166 ChertTable_base::read(const string & name, char ch, bool read_bitmap,
167  string &err_msg)
168 {
169  string basename = name + "base" + ch;
170  FD h(posixy_open(basename.c_str(), O_RDONLY | O_CLOEXEC));
171  if (h == -1) {
172  err_msg += "Couldn't open ";
173  err_msg += basename;
174  err_msg += ": ";
175  errno_to_string(errno, err_msg);
176  err_msg += "\n";
177  return false;
178  }
179 
180  char buf[REASONABLE_BASE_SIZE];
181 
182  const char *start = buf;
183  const char *end = buf + io_read(h, buf, REASONABLE_BASE_SIZE);
184 
185  DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
186  uint4 format;
187  DO_UNPACK_UINT_ERRCHECK(&start, end, format);
188  if (format != CURR_FORMAT) {
189  err_msg += "Bad base file format " + str(format) + " in " +
190  basename + "\n";
191  return false;
192  }
193  DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
194  DO_UNPACK_UINT_ERRCHECK(&start, end, root);
195  DO_UNPACK_UINT_ERRCHECK(&start, end, level);
197  DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
198  DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
199  uint4 have_fakeroot_;
200  DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
201  have_fakeroot = have_fakeroot_;
202 
203  uint4 sequential_;
204  DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
205  sequential = sequential_;
206 
207  if (have_fakeroot && !sequential) {
208  sequential = true; // FIXME : work out why we need this...
209  /*
210  err_msg += "Corrupt base file, '" + basename + "':\n"
211  "sequential must be set whenever have_fakeroot is set.\n" +
212  "sequential=" + (sequential?"true":"false") +
213  ", have_fakeroot=" + (have_fakeroot?"true":"false") + "\n";
214  return false;
215  */
216  }
217 
218  uint4 revision2;
219  DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
220  if (revision != revision2) {
221  err_msg += "Revision number mismatch in " +
222  basename + ": " +
223  str(revision) + " vs " + str(revision2) + "\n";
224  return false;
225  }
226 
227  /* It's ok to delete a zero pointer */
228  delete [] bit_map0;
229  bit_map0 = 0;
230  delete [] bit_map;
231  bit_map = 0;
232 
233  if (!read_bitmap)
234  return true;
235 
236  bit_map0 = new uint8_t[bit_map_size];
237  bit_map = new uint8_t[bit_map_size];
238 
239  size_t n = end - start;
240  if (n < bit_map_size) {
241  memcpy(bit_map0, start, n);
242  (void)io_read(h, reinterpret_cast<char *>(bit_map0) + n,
243  bit_map_size - n, bit_map_size - n);
244  n = 0;
245  } else {
246  memcpy(bit_map0, start, bit_map_size);
247  n -= bit_map_size;
248  if (n) memmove(buf, start + bit_map_size, n);
249  }
250  memcpy(bit_map, bit_map0, bit_map_size);
251 
252  start = buf;
253  end = buf + n;
254  end += io_read(h, buf + n, REASONABLE_BASE_SIZE - n, 0);
255 
256  uint4 revision3;
257  if (!unpack_uint(&start, end, &revision3)) {
258  err_msg += "Couldn't read revision3 from base file " +
259  basename + "\n";
260  return false;
261  }
262 
263  if (revision != revision3) {
264  err_msg += "Revision number mismatch in " +
265  basename + ": " +
266  str(revision) + " vs " + str(revision3) + "\n";
267  return false;
268  }
269 
270  if (start != end) {
271  err_msg += "Junk at end of base file " + basename + "\n";
272  return false;
273  }
274 
275  return true;
276 }
277 
278 void
279 ChertTable_base::write_to_file(const string &filename,
280  char base_letter,
281  const string &tablename,
282  int changes_fd,
283  const string * changes_tail)
284 {
286 
287  string buf;
288  pack_uint(buf, revision);
289  pack_uint(buf, CURR_FORMAT);
290  pack_uint(buf, block_size);
291  pack_uint(buf, static_cast<uint4>(root));
292  pack_uint(buf, static_cast<uint4>(level));
293  pack_uint(buf, static_cast<uint4>(bit_map_size));
294  pack_uint(buf, item_count);
295  pack_uint(buf, static_cast<uint4>(last_block));
296  pack_uint(buf, have_fakeroot);
297  pack_uint(buf, sequential);
298  pack_uint(buf, revision); // REVISION2
299  if (bit_map_size > 0) {
300  buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
301  }
302  pack_uint(buf, revision); // REVISION3
303 
304  FD h(posixy_open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666));
305  if (h < 0) {
306  string message("Couldn't open base ");
307  message += filename;
308  message += " to write: ";
309  errno_to_string(errno, message);
310  throw Xapian::DatabaseOpeningError(message);
311  }
312 
313  if (changes_fd >= 0) {
314  string changes_buf;
315  pack_uint(changes_buf, 1u); // Indicate the item is a base file.
316  pack_string(changes_buf, tablename);
317  changes_buf += base_letter; // The base file letter.
318  pack_uint(changes_buf, buf.size());
319  io_write(changes_fd, changes_buf.data(), changes_buf.size());
320  io_write(changes_fd, buf.data(), buf.size());
321  if (changes_tail != NULL) {
322  io_write(changes_fd, changes_tail->data(), changes_tail->size());
323  // changes_tail is only specified for the final table, so sync.
324  io_sync(changes_fd);
325  }
326  }
327 
328  io_write(h, buf.data(), buf.size());
329  io_sync(h);
330 }
331 
332 /*
333  block_free_at_start(B, n) is true iff (if and only if) block n was free at
334  the start of the transaction on the B-tree.
335 */
336 
337 bool
339 {
340  size_t i = n / CHAR_BIT;
341  int bit = 0x1 << n % CHAR_BIT;
342  return (bit_map0[i] & bit) == 0;
343 }
344 
345 /* free_block(B, n) causes block n to be marked free in the bit map.
346  B->bit_map_low is the lowest byte in the bit map known to have a free bit
347  set. Searching starts from there when looking for a free block.
348 */
349 
350 void
352 {
353  uint4 i = n / CHAR_BIT;
354  int bit = 0x1 << n % CHAR_BIT;
355  bit_map[i] &= ~ bit;
356 
357  if (bit_map_low > i)
358  if ((bit_map0[i] & bit) == 0) /* free at start */
359  bit_map_low = i;
360 }
361 
362 /* mark_block(B, n) causes block n to be marked allocated in the bit map.
363  B->bit_map_low is the lowest byte in the bit map known to have a free bit
364  set. Searching starts from there when looking for a free block.
365 */
366 
367 void
369 {
370  uint4 i = n / CHAR_BIT;
371  int bit = 0x1 << n % CHAR_BIT;
372  while (i >= bit_map_size)
373  extend_bit_map();
374  bit_map[i] |= bit;
375 
376  if (bit_map_low == i && bit_map[i] == 0xff)
377  ++bit_map_low;
378 }
379 
380 /* extend(B) increases the size of the two bit maps in an obvious way.
381  The bitmap file grows and shrinks as the DB file grows and shrinks in
382  internal usage. But the DB file itself does not reduce in size, no matter
383  how many blocks are freed.
384 */
385 
386 #define BIT_MAP_INC 1000
387  /* increase the bit map by this number of bytes if it overflows */
388 
389 void
391 {
392  int n = bit_map_size + BIT_MAP_INC;
393  uint8_t *new_bit_map0 = 0;
394  uint8_t *new_bit_map = 0;
395 
396  try {
397  new_bit_map0 = new uint8_t[n];
398  new_bit_map = new uint8_t[n];
399 
400  memcpy(new_bit_map0, bit_map0, bit_map_size);
401  memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
402 
403  memcpy(new_bit_map, bit_map, bit_map_size);
404  memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
405  } catch (...) {
406  delete [] new_bit_map0;
407  delete [] new_bit_map;
408  throw;
409  }
410  delete [] bit_map0;
411  bit_map0 = new_bit_map0;
412  delete [] bit_map;
413  bit_map = new_bit_map;
414  bit_map_size = n;
415 }
416 
417 /* next_free_block(B) returns the number of the next available free block in
418  the bitmap, marking it as 'in use' before returning. More precisely, we get
419  a block that is both free now (in bit_map) and was free at the beginning of
420  the transaction on the B-tree (in bit_map0).
421 
422  Starting at bit_map_low we go up byte at a time until we find a byte with a
423  free (zero) bit, and then go up that byte bit at a time. If the bit map has
424  no free bits it is extended so that it will have.
425 */
426 
427 uint4
429 {
430  uint4 i;
431  int x;
432  for (i = bit_map_low;; i++) {
433  if (i >= bit_map_size) {
434  extend_bit_map();
435  }
436  x = bit_map0[i] | bit_map[i];
437  if (x != UCHAR_MAX) break;
438  }
439  uint4 n = i * CHAR_BIT;
440  int d = 0x1;
441  while ((x & d) != 0) { d <<= 1; n++; }
442  bit_map[i] |= d; /* set as 'in use' */
443  bit_map_low = i;
444  if (n > last_block) {
445  last_block = n;
446  }
447  return n;
448 }
449 
450 bool
452 {
453  // Search for a block which was free at the start of the transaction, but
454  // isn't now.
455  while ((*n) <= last_block) {
456  size_t offset = (*n) / CHAR_BIT;
457  int bit = 0x1 << (*n) % CHAR_BIT;
458 
459  if (((bit_map0[offset] & bit) == 0) && ((bit_map[offset] & bit) != 0)) {
460  return true;
461  }
462  ++(*n);
463  }
464 
465  return false;
466 }
467 
468 bool
470 {
471  uint4 i = n / CHAR_BIT;
472  int bit = 0x1 << n % CHAR_BIT;
473  return (bit_map[i] & bit) == 0;
474 }
475 
476 void
478 {
479  int i = bit_map_size - 1;
480  while (i >= 0 && bit_map[i] == 0) {
481  i--;
482  }
483  bit_map_size = i + 1;
484 
485  /* Check for when there are no blocks */
486  if (bit_map_size == 0) {
487  last_block = 0;
488  return;
489  }
490 
491  int x = bit_map[i];
492 
493  uint4 n = (i + 1) * CHAR_BIT - 1;
494  int d = 0x1 << (CHAR_BIT - 1);
495  while ((x & d) == 0) { d >>= 1; n--; }
496 
497  last_block = n;
498 }
499 
500 void
502 {
503  memset(bit_map, 0, bit_map_size);
504 }
505 
506 // We've committed, so "bitmap at start" needs to be reset to the current bitmap.
507 void
509 {
510  memcpy(bit_map0, bit_map, bit_map_size);
511  bit_map_low = 0;
512 }
uint4 bit_map_low
byte offset into the bit map below which there are no free blocks
#define REASONABLE_BASE_SIZE
void io_write(int fd, const char *p, size_t n)
Write n bytes from block pointed to by p to file descriptor fd.
Definition: io_utils.cc:145
void swap(ChertTable_base &other)
~ChertTable_base()
Destructor - frees resources.
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
bool io_sync(int fd)
Ensure all data previously written to file descriptor fd has been written to disk.
Definition: io_utils.h:73
Provides wrappers with POSIXy semantics.
Convert errno value to std::string, thread-safe if possible.
Btree base file implementation.
static bool do_unpack_uint(const char **start, const char *end, uint4 *dest, string &err_msg, const string &basename, const char *varname)
Do most of the error handling from unpack_uint()
STL namespace.
Convert types to std::string.
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
#define O_CLOEXEC
Definition: safefcntl.h:90
#define false
Definition: header.h:9
#define rare(COND)
Definition: config.h:575
bool block_free_now(uint4 n) const
Hierarchy of classes which Xapian can throw as exceptions.
chert_tablesize_t item_count
uint32_t uint4
Definition: internaltypes.h:32
Definition: fd.h:30
void errno_to_string(int e, string &s)
unsigned long long chert_tablesize_t
A type used to store the number of entries in a table.
Definition: chert_types.h:46
#define DO_UNPACK_UINT_ERRCHECK(start, end, var)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Wrapper class around a file descriptor to avoid leaks.
bool find_changed_block(uint4 *n) const
Find the first changed block at or after position *n.
bool read(const std::string &name, char ch, bool read_bitmap, std::string &err_msg)
Read values from a base file.
size_t io_read(int fd, char *p, size_t n, size_t min)
Read n bytes (or until EOF) into block pointed to by p from file descriptor fd.
Definition: io_utils.cc:123
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Definition: pack.h:477
void write_to_file(const std::string &filename, char base_letter, const std::string &tablename, int changes_fd, const std::string *changes_tail)
Write the btree base file to disk.
Pack types into strings and unpack them again.
bool block_free_at_start(uint4 n) const
true iff block n was free at the start of the transaction on the B-tree.
Wrappers for low-level POSIX I/O routines.
uint8_t * bit_map
the current state of the bit map of blocks
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
#define BIT_MAP_INC
void mark_block(uint4 n)
#define posixy_open
void free_block(uint4 n)
Definition: header.h:151
uint8_t * bit_map0
the initial state of the bit map of blocks: 1 means in use, 0 means free
static string format(const char *fmt, T value)
Definition: str.cc:127
Various assertion macros.
ChertTable_base()
Construct an object with all zero fields.
#define CURR_FORMAT
This is the current description of the base file format: