xapian-core  1.4.21
chert_table.cc
Go to the documentation of this file.
1 /* chert_table.cc: Btree implementation
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
6  * Copyright 2008 Lemur Consulting Ltd
7  * Copyright 2010 Richard Boulton
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "chert_table.h"
28 
29 #include <xapian/error.h>
30 
31 #include <cerrno>
32 
33 #include "errno_to_string.h"
34 #include "omassert.h"
35 #include "stringutils.h" // For STRINGIZE().
36 
37 // Define to use "dangerous" mode - in this mode we write modified btree
38 // blocks back in place. This is somewhat faster (especially once we're
39 // I/O bound) but the database can't be safely searched during indexing
40 // and if indexing is terminated uncleanly, the database may be corrupt.
41 //
42 // Despite the risks, it's useful for speeding up a full rebuild.
43 //
44 // FIXME: make this mode run-time selectable, and record that it is currently
45 // in use somewhere on disk, so readers can check and refuse to open the
46 // database.
47 //
48 // #define DANGEROUS
49 
50 #include <sys/types.h>
51 
52 #include <cstring> /* for memmove */
53 #include <climits> /* for CHAR_BIT */
54 
55 #include "chert_btreebase.h"
56 #include "chert_cursor.h"
57 
58 #include "filetests.h"
59 #include "io_utils.h"
60 #include "debuglog.h"
61 #include "pack.h"
62 #include "str.h"
63 #include "wordaccess.h"
64 
65 #include <algorithm> // for std::min()
66 #include <string>
67 
68 using namespace std;
69 
70 // Only try to compress tags longer than this many bytes.
71 const size_t COMPRESS_MIN = 4;
72 
73 //#define BTREE_DEBUG_FULL 1
74 #undef BTREE_DEBUG_FULL
75 
76 #ifdef BTREE_DEBUG_FULL
77 /*------debugging aids from here--------*/
78 
79 static void print_key(const uint8_t * p, int c, int j);
80 static void print_tag(const uint8_t * p, int c, int j);
81 
82 /*
83 static void report_cursor(int N, Btree * B, Cursor * C)
84 {
85  int i;
86  printf("%d)\n", N);
87  for (i = 0; i <= B->level; i++)
88  printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
89  C[i].p, C[i].c, C[i].n, C[i].rewrite);
90 }
91 */
92 
93 /*------to here--------*/
94 #endif /* BTREE_DEBUG_FULL */
95 
96 static inline uint8_t *zeroed_new(size_t size)
97 {
98  uint8_t *temp = new uint8_t[size];
99  memset(temp, 0, size);
100  return temp;
101 }
102 
103 /* A B-tree comprises (a) a base file, containing essential information (Block
104  size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
105  bitmap being set if the Nth block of the B-tree file is in use, and (c) a
106  file DB containing the B-tree proper. The DB file is divided into a sequence
107  of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
108  use. Those in use are arranged in a tree.
109 
110  Each block, b, has a structure like this:
111 
112  R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
113  <---------- D ----------> <-M->
114 
115  And then,
116 
117  R = REVISION(b) is the revision number the B-tree had when the block was
118  written into the DB file.
119  L = GET_LEVEL(b) is the level of the block, which is the number of levels
120  towards the root of the B-tree structure. So leaf blocks
121  have level 0 and the one root block has the highest level
122  equal to the number of levels in the B-tree.
123  M = MAX_FREE(b) is the size of the gap between the end of the directory and
124  the first item of data. (It is not necessarily the maximum
125  size among the bits of space that are free, but I can't
126  think of a better name.)
127  T = TOTAL_FREE(b)is the total amount of free space left in b.
128  D = DIR_END(b) gives the offset to the end of the directory.
129 
130  o1, o2 ... oN are a directory of offsets to the N items held in the block.
131  The items are key-tag pairs, and as they occur in the directory are ordered
132  by the keys.
133 
134  An item has this form:
135 
136  I K key x C tag
137  <--K-->
138  <------I------>
139 
140  A long tag presented through the API is split up into C tags small enough to
141  be accommodated in the blocks of the B-tree. The key is extended to include
142  a counter, x, which runs from 1 to C. The key is preceded by a length, K,
143  and the whole item with a length, I, as depicted above.
144 
145  Here are the corresponding definitions:
146 
147 */
148 
151 #define SEQ_START_POINT (-10)
152 
153 
154 /* There are two bit maps in bit_map0 and bit_map. The nth bit of bitmap is 0
155  if the nth block is free, otherwise 1. bit_map0 is the initial state of
156  bitmap at the start of the current transaction.
157 
158  Note use of the limits.h values:
159  UCHAR_MAX = 255, an unsigned with all bits set, and
160  CHAR_BIT = 8, the number of bits per byte
161 
162  BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
163  bytes -- 64K effectively.
164 */
165 
166 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
167 
169 void
170 ChertTable::read_block(uint4 n, uint8_t * p) const
171 {
172  // Log the value of p, not the contents of the block it points to...
173  LOGCALL_VOID(DB, "ChertTable::read_block", n | (void*)p);
174  if (rare(handle == -2))
176 
177  /* Use the base bit_map_size not the bitmap's size, because
178  * the latter is uninitialised in readonly mode.
179  */
180  Assert(n / CHAR_BIT < base.get_bit_map_size());
181 
182  io_read_block(handle, reinterpret_cast<char *>(p), block_size, n);
183 
184  int dir_end = DIR_END(p);
185  if (rare(dir_end < DIR_START || unsigned(dir_end) > block_size)) {
186  string msg("dir_end invalid in block ");
187  msg += str(n);
188  throw Xapian::DatabaseCorruptError(msg);
189  }
190 }
191 
198 void
199 ChertTable::write_block(uint4 n, const uint8_t * p) const
200 {
201  LOGCALL_VOID(DB, "ChertTable::write_block", n | p);
202  Assert(writable);
203  /* Check that n is in range. */
204  Assert(n / CHAR_BIT < base.get_bit_map_size());
205 
206  /* don't write to non-free */;
207  AssertParanoid(base.block_free_at_start(n));
208 
209  /* write revision is okay */
210  AssertEqParanoid(REVISION(p), latest_revision_number + 1);
211 
212  if (both_bases) {
213  // Delete the old base before modifying the database.
214  //
215  // If the file is on NFS, then io_unlink() may return false even if
216  // the file was removed, so on balance throwing an exception in this
217  // case is unhelpful, since we wanted the file gone anyway! The
218  // likely explanation is that somebody moved, deleted, or changed a
219  // symlink to the database directory.
220  (void)io_unlink(name + "base" + other_base_letter());
221  both_bases = false;
222  latest_revision_number = revision_number;
223  }
224 
225  io_write_block(handle, reinterpret_cast<const char *>(p), block_size, n);
226 }
227 
228 
229 /* A note on cursors:
230 
231  Each B-tree level has a corresponding array element C[j] in a
232  cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
233  root block level. Within a level j,
234 
235  C[j].p addresses the block
236  C[j].c is the offset into the directory entry in the block
237  C[j].n is the number of the block at C[j].p
238 
239  A look up in the B-tree causes navigation of the blocks starting
240  from the root. In each block, p, we find an offset, c, to an item
241  which gives the number, n, of the block for the next level. This
242  leads to an array of values p,c,n which are held inside the cursor.
243 
244  Any Btree object B has a built-in cursor, at B->C. But other cursors may
245  be created. If BC is a created cursor, BC->C is the cursor in the
246  sense given above, and BC->B is the handle for the B-tree again.
247 */
248 
249 
250 void
252 {
253  LOGCALL_VOID(DB, "ChertTable::set_overwritten", NO_ARGS);
254  // If we're writable, there shouldn't be another writer who could cause
255  // overwritten to be flagged, so that's a DatabaseCorruptError.
256  if (writable)
257  throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
258  throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
259 }
260 
261 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
262  C, writing the block currently at C[j] back to disk if necessary.
263  Note that
264 
265  C[j].rewrite
266 
267  is true iff C[j].n is different from block n in file DB. If it is
268  false no rewriting is necessary.
269 */
270 
271 void
273 {
274  LOGCALL_VOID(DB, "ChertTable::block_to_cursor", (void*)C_ | j | n);
275  if (n == C_[j].n) return;
276  uint8_t * p = C_[j].p;
277  Assert(p);
278 
279  // FIXME: only needs to be done in write mode
280  if (C_[j].rewrite) {
281  Assert(writable);
282  Assert(C == C_);
283  write_block(C_[j].n, p);
284  C_[j].rewrite = false;
285  }
286 
287  // Check if the block is in the built-in cursor (potentially in
288  // modified form).
289  if (n == C[j].n) {
290  if (p != C[j].p)
291  memcpy(p, C[j].p, block_size);
292  } else {
293  read_block(n, p);
294  }
295 
296  C_[j].n = n;
297  if (j < level) {
298  /* unsigned comparison */
299  if (rare(REVISION(p) > REVISION(C_[j + 1].p))) {
300  set_overwritten();
301  return;
302  }
303  }
304 
305  if (rare(j != GET_LEVEL(p))) {
306  string msg = "Expected block ";
307  msg += str(n);
308  msg += " to be level ";
309  msg += str(j);
310  msg += ", not ";
311  msg += str(GET_LEVEL(p));
312  throw Xapian::DatabaseCorruptError(msg);
313  }
314 }
315 
337 void
339 {
340  LOGCALL_VOID(DB, "ChertTable::alter", NO_ARGS);
341  Assert(writable);
342 #ifdef DANGEROUS
343  C[0].rewrite = true;
344 #else
345  int j = 0;
346  uint8_t * p = C[j].p;
347  while (true) {
348  if (C[j].rewrite) return; /* all new, so return */
349  C[j].rewrite = true;
350 
351  uint4 n = C[j].n;
352  if (base.block_free_at_start(n)) {
353  Assert(REVISION(p) == latest_revision_number + 1);
354  return;
355  }
356  Assert(REVISION(p) < latest_revision_number + 1);
357  base.free_block(n);
358  n = base.next_free_block();
359  C[j].n = n;
360  SET_REVISION(p, latest_revision_number + 1);
361 
362  if (j == level) return;
363  j++;
364  p = C[j].p;
365  Item_wr(p, C[j].c).set_block_given_by(n);
366  }
367 #endif
368 }
369 
385 int
386 ChertTable::find_in_block(const uint8_t * p, Key key, bool leaf, int c)
387 {
388  LOGCALL_STATIC(DB, int, "ChertTable::find_in_block", (const void*)p | (const void *)key.get_address() | leaf | c);
389  // c should be odd (either -1, or an even offset from DIR_START).
390  Assert((c & 1) == 1);
391  int i = DIR_START;
392  if (leaf) i -= D2;
393  if (c != -1) {
394  AssertRel(i,<=,c);
395  }
396  int j = DIR_END(p);
397 
398  if (c != -1) {
399  if (c < j && i < c && Item(p, c).key() <= key)
400  i = c;
401  c += D2;
402  if (c < j && i < c && key < Item(p, c).key())
403  j = c;
404  }
405 
406  while (j - i > D2) {
407  int k = i + ((j - i)/(D2 * 2))*D2; /* mid way */
408  if (key < Item(p, k).key()) j = k; else i = k;
409  }
410  if (leaf) {
411  AssertRel(DIR_START - D2,<=,i);
412  } else {
413  AssertRel(DIR_START,<=,i);
414  }
415  AssertRel(i,<,DIR_END(p));
416  RETURN(i);
417 }
418 
432 bool
434 {
435  LOGCALL(DB, bool, "ChertTable::find", (void*)C_);
436  // Note: the parameter is needed when we're called by ChertCursor
437  const uint8_t * p;
438  int c;
439  Key key = kt.key();
440  for (int j = level; j > 0; --j) {
441  p = C_[j].p;
442  c = find_in_block(p, key, false, C_[j].c);
443 #ifdef BTREE_DEBUG_FULL
444  printf("Block in ChertTable:find - code position 1");
445  report_block_full(j, C_[j].n, p);
446 #endif /* BTREE_DEBUG_FULL */
447  C_[j].c = c;
448  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
449  }
450  p = C_[0].p;
451  c = find_in_block(p, key, true, C_[0].c);
452 #ifdef BTREE_DEBUG_FULL
453  printf("Block in ChertTable:find - code position 2");
454  report_block_full(0, C_[0].n, p);
455 #endif /* BTREE_DEBUG_FULL */
456  C_[0].c = c;
457  if (c < DIR_START) {
458  RETURN(false);
459  }
460  RETURN(Item(p, c).key() == key);
461 }
462 
468 void
470 {
471  LOGCALL_VOID(DB, "ChertTable::compact", (void*)p);
472  Assert(writable);
473  int e = block_size;
474  uint8_t * b = buffer;
475  int dir_end = DIR_END(p);
476  for (int c = DIR_START; c < dir_end; c += D2) {
477  Item item(p, c);
478  int l = item.size();
479  e -= l;
480  memmove(b + e, item.get_address(), l);
481  setD(p, c, e); /* reform in b */
482  }
483  memmove(p + e, b + e, block_size - e); /* copy back */
484  e -= dir_end;
485  SET_TOTAL_FREE(p, e);
486  SET_MAX_FREE(p, e);
487 }
488 
492 void
494 {
495  LOGCALL_VOID(DB, "ChertTable::split_root", split_n);
496  /* gain a level */
497  ++level;
498 
499  /* check level overflow - this isn't something that should ever happen
500  * but deserves more than an Assert()... */
501  if (level == BTREE_CURSOR_LEVELS) {
502  throw Xapian::DatabaseCorruptError("Btree has grown impossibly large (" STRINGIZE(BTREE_CURSOR_LEVELS) " levels)");
503  }
504 
505  uint8_t * q = zeroed_new(block_size);
506  C[level].p = q;
507  C[level].c = DIR_START;
508  C[level].n = base.next_free_block();
509  C[level].rewrite = true;
510  SET_REVISION(q, latest_revision_number + 1);
511  SET_LEVEL(q, level);
513  compact(q); /* to reset TOTAL_FREE, MAX_FREE */
514 
515  /* form a null key in b with a pointer to the old root */
516  uint8_t b[10]; /* 7 is exact */
517  Item_wr item(b);
518  item.form_null_key(split_n);
519  add_item(item, level);
520 }
521 
537 void
538 ChertTable::enter_key(int j, Key prevkey, Key newkey)
539 {
540  LOGCALL_VOID(DB, "ChertTable::enter_key", j | Literal("prevkey") | Literal("newkey"));
541  Assert(writable);
542  Assert(prevkey < newkey);
543  AssertRel(j,>=,1);
544 
545  uint4 blocknumber = C[j - 1].n;
546 
547  // FIXME update to use Key
548  // Keys are truncated here: but don't truncate the count at the end away.
549  const int newkey_len = newkey.length();
550  AssertRel(newkey_len,>,0);
551  int i;
552 
553  if (j == 1) {
554  // Truncate the key to the minimal key which differs from prevkey,
555  // the preceding key in the block.
556  i = 0;
557  const int min_len = min(newkey_len, prevkey.length());
558  while (i < min_len && prevkey[i] == newkey[i]) {
559  i++;
560  }
561 
562  // Want one byte of difference.
563  if (i < newkey_len) i++;
564  } else {
565  /* Can't truncate between branch levels, since the separated keys
566  * are in at the leaf level, and truncating again will change the
567  * branch point.
568  */
569  i = newkey_len;
570  }
571 
572  uint8_t b[UCHAR_MAX + 6];
573  Item_wr item(b);
574  Assert(i <= 256 - I2 - C2);
575  Assert(i <= (int)sizeof(b) - I2 - C2 - 4);
576  item.set_key_and_block(newkey, i, blocknumber);
577 
578  // When j > 1 we can make the first key of block p null. This is probably
579  // worthwhile as it trades a small amount of CPU and RAM use for a small
580  // saving in disk use. Other redundant keys will still creep in though.
581  if (j > 1) {
582  uint8_t * p = C[j - 1].p;
583  uint4 n = getint4(newkey.get_address(), newkey_len + K1 + C2);
584  int new_total_free = TOTAL_FREE(p) + newkey_len + C2;
585  // FIXME: incredibly icky going from key to item like this...
586  auto byte_addr = const_cast<uint8_t*>(newkey.get_address());
587  Item_wr(byte_addr - I2).form_null_key(n);
588  SET_TOTAL_FREE(p, new_total_free);
589  }
590 
591  // The split block gets inserted into the parent after the pointer to the
592  // current child.
593  AssertEq(C[j].c, find_in_block(C[j].p, item.key(), false, C[j].c));
594  C[j].c += D2;
595  C[j].rewrite = true; /* a subtle point: this *is* required. */
596  add_item(item, j);
597 }
598 
603 int
604 ChertTable::mid_point(uint8_t * p) const
605 {
606  LOGCALL(DB, int, "ChertTable::mid_point", (void*)p);
607  int n = 0;
608  int dir_end = DIR_END(p);
609  int size = block_size - TOTAL_FREE(p) - dir_end;
610  for (int c = DIR_START; c < dir_end; c += D2) {
611  int l = Item(p, c).size();
612  n += 2 * l;
613  if (n >= size) {
614  if (l < n - size) RETURN(c);
615  RETURN(c + D2);
616  }
617  }
618 
619  /* This shouldn't happen, as the sum of the item sizes should be the same
620  * as the value calculated in size, so assert but return a sane value just
621  * in case. */
622  Assert(false);
623  RETURN(dir_end);
624 }
625 
635 void
636 ChertTable::add_item_to_block(uint8_t * p, Item_wr kt_, int c)
637 {
638  LOGCALL_VOID(DB, "ChertTable::add_item_to_block", (void*)p | Literal("kt_") | c);
639  Assert(writable);
640  int dir_end = DIR_END(p);
641  int kt_len = kt_.size();
642  int needed = kt_len + D2;
643  int new_total = TOTAL_FREE(p) - needed;
644  int new_max = MAX_FREE(p) - needed;
645 
646  Assert(new_total >= 0);
647 
648  AssertRel(MAX_FREE(p),>=,needed);
649 
650  AssertRel(DIR_START,<=,c);
651  AssertRel(c,<=,dir_end);
652 
653  memmove(p + c + D2, p + c, dir_end - c);
654  dir_end += D2;
655  SET_DIR_END(p, dir_end);
656 
657  int o = dir_end + new_max;
658  setD(p, c, o);
659  memmove(p + o, kt_.get_address(), kt_len);
660 
661  SET_MAX_FREE(p, new_max);
662 
663  SET_TOTAL_FREE(p, new_total);
664 }
665 
671 void
673 {
674  LOGCALL_VOID(DB, "ChertTable::add_item", Literal("kt_") | j);
675  Assert(writable);
676  uint8_t * p = C[j].p;
677  int c = C[j].c;
678  uint4 n;
679 
680  int needed = kt_.size() + D2;
681  if (TOTAL_FREE(p) < needed) {
682  int m;
683  // Prepare to split p. After splitting, the block is in two halves, the
684  // lower half is split_p, the upper half p again. add_to_upper_half
685  // becomes true when the item gets added to p, false when it gets added
686  // to split_p.
687 
688  if (seq_count < 0) {
689  // If we're not in sequential mode, we split at the mid point
690  // of the node.
691  m = mid_point(p);
692  } else {
693  // During sequential addition, split at the insert point
694  AssertRel(c,>=,DIR_START);
695  m = c;
696  }
697 
698  uint4 split_n = C[j].n;
699  C[j].n = base.next_free_block();
700 
701  memcpy(split_p, p, block_size); // replicate the whole block in split_p
702  SET_DIR_END(split_p, m);
703  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
704 
705  {
706  int residue = DIR_END(p) - m;
707  int new_dir_end = DIR_START + residue;
708  memmove(p + DIR_START, p + m, residue);
709  SET_DIR_END(p, new_dir_end);
710  }
711 
712  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
713 
714  bool add_to_upper_half;
715  if (seq_count < 0) {
716  add_to_upper_half = (c >= m);
717  } else {
718  // And add item to lower half if split_p has room, otherwise upper
719  // half
720  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
721  }
722 
723  if (add_to_upper_half) {
724  c -= (m - DIR_START);
725  Assert(seq_count < 0 || c <= DIR_START + D2);
726  Assert(c >= DIR_START);
727  Assert(c <= DIR_END(p));
728  add_item_to_block(p, kt_, c);
729  n = C[j].n;
730  } else {
731  Assert(c >= DIR_START);
732  Assert(c <= DIR_END(split_p));
733  add_item_to_block(split_p, kt_, c);
734  n = split_n;
735  }
736  write_block(split_n, split_p);
737 
738  // Check if we're splitting the root block.
739  if (j == level) split_root(split_n);
740 
741  /* Enter a separating key at level j + 1 between */
742  /* the last key of block split_p, and the first key of block p */
743  enter_key(j + 1,
744  Item(split_p, DIR_END(split_p) - D2).key(),
745  Item(p, DIR_START).key());
746  } else {
747  AssertRel(TOTAL_FREE(p),>=,needed);
748 
749  if (MAX_FREE(p) < needed) {
750  compact(p);
751  AssertRel(MAX_FREE(p),>=,needed);
752  }
753 
754  add_item_to_block(p, kt_, c);
755  n = C[j].n;
756  }
757  if (j == 0) {
758  changed_n = n;
759  changed_c = c;
760  }
761 }
762 
770 void
771 ChertTable::delete_item(int j, bool repeatedly)
772 {
773  LOGCALL_VOID(DB, "ChertTable::delete_item", j | repeatedly);
774  Assert(writable);
775  uint8_t * p = C[j].p;
776  int c = C[j].c;
777  AssertRel(DIR_START,<=,c);
778  AssertRel(c,<,DIR_END(p));
779  int kt_len = Item(p, c).size(); /* size of the item to be deleted */
780  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
781 
782  memmove(p + c, p + c + D2, dir_end - c);
783  SET_DIR_END(p, dir_end);
784  SET_MAX_FREE(p, MAX_FREE(p) + D2);
785  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
786 
787  if (!repeatedly) return;
788  if (j < level) {
789  if (dir_end == DIR_START) {
790  base.free_block(C[j].n);
791  C[j].rewrite = false;
792  C[j].n = BLK_UNUSED;
793  C[j + 1].rewrite = true; /* *is* necessary */
794  delete_item(j + 1, true);
795  }
796  } else {
797  Assert(j == level);
798  while (dir_end == DIR_START + D2 && level > 0) {
799  /* single item in the root block, so lose a level */
800  uint4 new_root = Item(p, DIR_START).block_given_by();
801  delete [] p;
802  C[level].p = 0;
803  base.free_block(C[level].n);
804  C[level].rewrite = false;
805  C[level].n = BLK_UNUSED;
806  level--;
807 
808  block_to_cursor(C, level, new_root);
809 
810  p = C[level].p;
811  dir_end = DIR_END(p); /* prepare for the loop */
812  }
813  }
814 }
815 
816 /* debugging aid:
817 static addcount = 0;
818 */
819 
845 int
847 {
848  LOGCALL(DB, int, "ChertTable::add_kt", found);
849  Assert(writable);
850  int components = 0;
851 
852  /*
853  {
854  printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
855  print_bytes(kt[I2] - K1 - C2, kt + I2 + K1); putchar('\n');
856  }
857  */
858  alter();
859 
860  if (found) { /* replacement */
861  seq_count = SEQ_START_POINT;
862  sequential = false;
863 
864  uint8_t * p = C[0].p;
865  int c = C[0].c;
866  AssertRel(DIR_START,<=,c);
867  AssertRel(c,<,DIR_END(p));
868  Item item(p, c);
869  int kt_size = kt.size();
870  int needed = kt_size - item.size();
871 
872  components = item.components_of();
873 
874  if (needed <= 0) {
875  /* simple replacement */
876  memmove(const_cast<uint8_t *>(item.get_address()),
877  kt.get_address(), kt_size);
878  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
879  } else {
880  /* new item into the block's freespace */
881  int new_max = MAX_FREE(p) - kt_size;
882  if (new_max >= 0) {
883  int o = DIR_END(p) + new_max;
884  memmove(p + o, kt.get_address(), kt_size);
885  setD(p, c, o);
886  SET_MAX_FREE(p, new_max);
887  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
888  } else {
889  /* do it the long way */
890  delete_item(0, false);
891  add_item(kt, 0);
892  }
893  }
894  } else {
895  /* addition */
896  if (changed_n == C[0].n && changed_c == C[0].c) {
897  if (seq_count < 0) seq_count++;
898  } else {
899  seq_count = SEQ_START_POINT;
900  sequential = false;
901  }
902  C[0].c += D2;
903  add_item(kt, 0);
904  }
905  RETURN(components);
906 }
907 
908 /* delete_kt() corresponds to add_kt(found), but there are only
909  two cases: if the key is not found nothing is done, and if it is
910  found the corresponding item is deleted with delete_item.
911 */
912 
913 int
915 {
916  LOGCALL(DB, int, "ChertTable::delete_kt", NO_ARGS);
917  Assert(writable);
918 
919  bool found = find(C);
920 
921  int components = 0;
922  seq_count = SEQ_START_POINT;
923  sequential = false;
924 
925  /*
926  {
927  printf("%d) %s ", addcount++, (found ? "deleting " : "ignoring "));
928  print_bytes(B->kt[I2] - K1 - C2, B->kt + I2 + K1); putchar('\n');
929  }
930  */
931  if (found) {
932  components = Item(C[0].p, C[0].c).components_of();
933  alter();
934  delete_item(0, true);
935  }
936  RETURN(components);
937 }
938 
939 /* ChertTable::form_key(key) treats address kt as an item holder and fills in
940 the key part:
941 
942  (I) K key c (C tag)
943 
944 The bracketed parts are left blank. The key is filled in with key_len bytes and
945 K set accordingly. c is set to 1.
946 */
947 
948 void ChertTable::form_key(const string & key) const
949 {
950  LOGCALL_VOID(DB, "ChertTable::form_key", key);
951  kt.form_key(key);
952 }
953 
954 /* ChertTable::add(key, tag) adds the key/tag item to the
955  B-tree, replacing any existing item with the same key.
956 
957  For a long tag, we end up having to add m components, of the form
958 
959  key 1 m tag1
960  key 2 m tag2
961  ...
962  key m m tagm
963 
964  and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
965  n components of the form
966 
967  key 1 n TAG1
968  key 2 n TAG2
969  ...
970  key n n TAGn
971 
972  and n may be greater than, equal to, or less than m. These cases are dealt
973  with in the code below. If m < n for example, we end up with a series of
974  deletions.
975 */
976 
977 void
978 ChertTable::add(const string &key, string tag, bool already_compressed)
979 {
980  LOGCALL_VOID(DB, "ChertTable::add", key | tag | already_compressed);
981  Assert(writable);
982 
983  if (handle < 0) create_and_open(block_size);
984 
985  form_key(key);
986 
987  bool compressed = false;
988  if (already_compressed) {
989  compressed = true;
990  } else if (compress_strategy != DONT_COMPRESS && tag.size() > COMPRESS_MIN) {
991  static_assert(DONT_COMPRESS != Z_DEFAULT_STRATEGY,
992  "DONT_COMPRESS clashes with zlib constant");
993  static_assert(DONT_COMPRESS != Z_FILTERED,
994  "DONT_COMPRESS clashes with zlib constant");
995  static_assert(DONT_COMPRESS != Z_HUFFMAN_ONLY,
996  "DONT_COMPRESS clashes with zlib constant");
997 #ifdef Z_RLE
998  static_assert(DONT_COMPRESS != Z_RLE,
999  "DONT_COMPRESS clashes with zlib constant");
1000 #endif
1001 
1002  lazy_alloc_deflate_zstream();
1003 
1004  deflate_zstream->next_in =
1005  reinterpret_cast<Bytef *>(const_cast<char *>(tag.data()));
1006  deflate_zstream->avail_in = static_cast<uInt>(tag.size());
1007 
1008  // If compressed size is >= tag.size(), we don't want to compress.
1009  unsigned long blk_len = tag.size() - 1;
1010  unsigned char * blk = new unsigned char[blk_len];
1011  deflate_zstream->next_out = blk;
1012  deflate_zstream->avail_out = static_cast<uInt>(blk_len);
1013 
1014  int err = deflate(deflate_zstream, Z_FINISH);
1015  if (err == Z_STREAM_END) {
1016  // If deflate succeeded, then the output was at least one byte
1017  // smaller than the input.
1018  tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
1019  compressed = true;
1020  } else {
1021  // Deflate failed - presumably the data wasn't compressible.
1022  }
1023 
1024  delete [] blk;
1025  }
1026 
1027  // sort of matching kt.append_chunk(), but setting the chunk
1028  const size_t cd = kt.key().length() + K1 + I2 + C2 + C2; // offset to the tag data
1029  const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1030  size_t first_L = L; // - amount for tag1
1031  bool found = find(C);
1032  if (!found) {
1033  uint8_t * p = C[0].p;
1034  size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1035  if (n > D2 + cd) {
1036  n -= (D2 + cd);
1037  // if n >= last then fully filling this block won't produce
1038  // an extra item, so we might as well do this even if
1039  // full_compaction isn't active.
1040  //
1041  // In the full_compaction case, it turns out we shouldn't always
1042  // try to fill every last byte. Doing so can actually increase the
1043  // total space required (I believe this effect is due to longer
1044  // dividing keys being required in the index blocks). Empirically,
1045  // n >= key.size() + K appears a good criterion for K ~= 34. This
1046  // seems to save about 0.2% in total database size over always
1047  // splitting the tag. It'll also give be slightly faster retrieval
1048  // as we can avoid reading an extra block occasionally.
1049  size_t last = tag.length() % L;
1050  if (n >= last || (full_compaction && n >= key.size() + 34))
1051  first_L = n;
1052  }
1053  }
1054 
1055  // a null tag must be added in of course
1056  int m = tag.empty() ? 1 : (tag.length() - first_L + L - 1) / L + 1;
1057  // there are m items to add
1058  /* FIXME: sort out this error higher up and turn this into
1059  * an assert.
1060  */
1061  if (m >= BYTE_PAIR_RANGE)
1062  throw Xapian::UnimplementedError("Can't handle insanely large tags");
1063 
1064  int n = 0; // initialise to shut off warning
1065  // - and there will be n to delete
1066  int o = 0; // Offset into the tag
1067  size_t residue = tag.length(); // Bytes of the tag remaining to add in
1068  int replacement = false; // Has there been a replacement ?
1069  int i;
1070  kt.set_components_of(m);
1071  for (i = 1; i <= m; i++) {
1072  size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1073  Assert(cd + l <= block_size);
1074  Assert(string::size_type(o + l) <= tag.length());
1075  kt.set_tag(cd, tag.data() + o, l, compressed);
1076  kt.set_component_of(i);
1077 
1078  o += l;
1079  residue -= l;
1080 
1081  if (i > 1) found = find(C);
1082  n = add_kt(found);
1083  if (n > 0) replacement = true;
1084  }
1085  /* o == tag.length() here, and n may be zero */
1086  for (i = m + 1; i <= n; i++) {
1087  kt.set_component_of(i);
1088  delete_kt();
1089  }
1090  if (!replacement) ++item_count;
1091  Btree_modified = true;
1092  if (cursor_created_since_last_modification) {
1093  cursor_created_since_last_modification = false;
1094  ++cursor_version;
1095  }
1096 }
1097 
1098 /* ChertTable::del(key) returns false if the key is not in the B-tree,
1099  otherwise deletes it and returns true.
1100 
1101  Again, this is parallel to ChertTable::add, but simpler in form.
1102 */
1103 
1104 bool
1105 ChertTable::del(const string &key)
1106 {
1107  LOGCALL(DB, bool, "ChertTable::del", key);
1108  Assert(writable);
1109 
1110  if (handle < 0) {
1111  if (handle == -2) {
1113  }
1114  RETURN(false);
1115  }
1116 
1117  // We can't delete a key which we is too long for us to store.
1118  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1119 
1120  if (key.empty()) RETURN(false);
1121  form_key(key);
1122 
1123  int n = delete_kt(); /* there are n items to delete */
1124  if (n <= 0) RETURN(false);
1125 
1126  for (int i = 2; i <= n; i++) {
1127  kt.set_component_of(i);
1128  delete_kt();
1129  }
1130 
1131  item_count--;
1132  Btree_modified = true;
1133  if (cursor_created_since_last_modification) {
1134  cursor_created_since_last_modification = false;
1135  ++cursor_version;
1136  }
1137  RETURN(true);
1138 }
1139 
1140 bool
1141 ChertTable::readahead_key(const string &key) const
1142 {
1143  LOGCALL(DB, bool, "ChertTable::readahead_key", key);
1144  Assert(!key.empty());
1145 
1146  // Two cases:
1147  //
1148  // handle = -1: Lazy table which isn't yet open
1149  //
1150  // handle = -2: Table has been closed. Since the readahead is just a
1151  // hint, we can safely ignore it for a closed table.
1152  if (handle < 0)
1153  RETURN(false);
1154 
1155  // If the table only has one level, there are no branch blocks to preread.
1156  if (level == 0)
1157  RETURN(false);
1158 
1159  // An overlong key cannot be found.
1160  if (key.size() > CHERT_BTREE_MAX_KEY_LEN)
1161  RETURN(true);
1162 
1163  form_key(key);
1164  Key ktkey = kt.key();
1165 
1166  // We'll only readahead the first level, since descending the B-tree would
1167  // require actual reads that would likely hurt performance more than help.
1168  const uint8_t * p = C[level].p;
1169  int c = find_in_block(p, ktkey, false, C[level].c);
1170  uint4 n = Item(p, c).block_given_by();
1171  // Don't preread if it's the block we last preread or already in the
1172  // cursor.
1173  if (n != last_readahead && n != C[level - 1].n) {
1174  /* Use the base bit_map_size not the bitmap's size, because the latter
1175  * is uninitialised in readonly mode.
1176  */
1177  Assert(n / CHAR_BIT < base.get_bit_map_size());
1178 
1179  last_readahead = n;
1180  if (!io_readahead_block(handle, block_size, n))
1181  RETURN(false);
1182  }
1183  RETURN(true);
1184 }
1185 
1186 bool
1187 ChertTable::get_exact_entry(const string &key, string & tag) const
1188 {
1189  LOGCALL(DB, bool, "ChertTable::get_exact_entry", key | tag);
1190  Assert(!key.empty());
1191 
1192  if (handle < 0) {
1193  if (handle == -2) {
1195  }
1196  RETURN(false);
1197  }
1198 
1199  // An oversized key can't exist, so attempting to search for it should fail.
1200  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1201 
1202  form_key(key);
1203  if (!find(C)) RETURN(false);
1204 
1205  (void)read_tag(C, &tag, false);
1206  RETURN(true);
1207 }
1208 
1209 bool
1210 ChertTable::key_exists(const string &key) const
1211 {
1212  LOGCALL(DB, bool, "ChertTable::key_exists", key);
1213  Assert(!key.empty());
1214 
1215  // An oversized key can't exist, so attempting to search for it should fail.
1216  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1217 
1218  form_key(key);
1219  RETURN(find(C));
1220 }
1221 
1222 bool
1223 ChertTable::read_tag(Cursor * C_, string *tag, bool keep_compressed) const
1224 {
1225  LOGCALL(DB, bool, "ChertTable::read_tag", Literal("C_") | tag | keep_compressed);
1226  Item item(C_[0].p, C_[0].c);
1227 
1228  /* n components to join */
1229  int n = item.components_of();
1230 
1231  tag->resize(0);
1232  // max_item_size also includes K1 + I2 + C2 + C2 bytes overhead and the key
1233  // (which is at least 1 byte long).
1234  if (n > 1) tag->reserve((max_item_size - (1 + K1 + I2 + C2 + C2)) * n);
1235 
1236  item.append_chunk(tag);
1237  bool compressed = item.get_compressed();
1238 
1239  for (int i = 2; i <= n; i++) {
1240  if (!next(C_, 0)) {
1241  throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1242  }
1243  (void)Item(C_[0].p, C_[0].c).append_chunk(tag);
1244  }
1245  // At this point the cursor is on the last item - calling next will move
1246  // it to the next key (ChertCursor::get_tag() relies on this).
1247  if (!compressed || keep_compressed) RETURN(compressed);
1248 
1249  // FIXME: Perhaps we should decompress each chunk as we read it so we
1250  // don't need both the full compressed and uncompressed tags in memory
1251  // at once.
1252 
1253  string utag;
1254  // May not be enough for a compressed tag, but it's a reasonable guess.
1255  utag.reserve(tag->size() + tag->size() / 2);
1256 
1257  Bytef buf[8192];
1258 
1259  lazy_alloc_inflate_zstream();
1260 
1261  inflate_zstream->next_in =
1262  reinterpret_cast<Bytef*>(const_cast<char *>(tag->data()));
1263  inflate_zstream->avail_in = static_cast<uInt>(tag->size());
1264 
1265  int err = Z_OK;
1266  while (err != Z_STREAM_END) {
1267  inflate_zstream->next_out = buf;
1268  inflate_zstream->avail_out = static_cast<uInt>(sizeof(buf));
1269  err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1270  if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
1271  LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
1272  Bytef header2[4];
1273  aligned_write4(header2, inflate_zstream->adler);
1274  inflate_zstream->next_in = header2;
1275  inflate_zstream->avail_in = 4;
1276  err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1277  if (err == Z_STREAM_END) break;
1278  }
1279 
1280  if (err != Z_OK && err != Z_STREAM_END) {
1281  if (err == Z_MEM_ERROR) throw std::bad_alloc();
1282  string msg = "inflate failed";
1283  if (inflate_zstream->msg) {
1284  msg += " (";
1285  msg += inflate_zstream->msg;
1286  msg += ')';
1287  }
1288  throw Xapian::DatabaseError(msg);
1289  }
1290 
1291  utag.append(reinterpret_cast<const char *>(buf),
1292  inflate_zstream->next_out - buf);
1293  }
1294  if (utag.size() != inflate_zstream->total_out) {
1295  string msg = "compressed tag didn't expand to the expected size: ";
1296  msg += str(utag.size());
1297  msg += " != ";
1298  // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
1299  msg += str(size_t(inflate_zstream->total_out));
1300  throw Xapian::DatabaseCorruptError(msg);
1301  }
1302 
1303  swap(*tag, utag);
1304 
1305  RETURN(false);
1306 }
1307 
1308 void
1310 {
1311  LOGCALL_VOID(DB, "ChertTable::set_full_compaction", parity);
1312  Assert(writable);
1313 
1314  if (parity) seq_count = 0;
1315  full_compaction = parity;
1316 }
1317 
1319  LOGCALL(DB, ChertCursor *, "ChertTable::cursor_get", NO_ARGS);
1320  if (handle < 0) {
1321  if (handle == -2) {
1323  }
1324  RETURN(NULL);
1325  }
1326  // FIXME Ick - casting away const is nasty
1327  RETURN(new ChertCursor(const_cast<ChertTable *>(this)));
1328 }
1329 
1330 /************ B-tree opening and closing ************/
1331 
1332 bool
1333 ChertTable::basic_open(bool revision_supplied, chert_revision_number_t revision_)
1334 {
1335  LOGCALL(DB, bool, "ChertTable::basic_open", revision_supplied | revision_);
1336  int ch = 'X'; /* will be 'A' or 'B' */
1337 
1338  {
1339  const size_t BTREE_BASES = 2;
1340  string err_msg;
1341  static const char basenames[BTREE_BASES] = { 'A', 'B' };
1342 
1343  ChertTable_base bases[BTREE_BASES];
1344  bool base_ok[BTREE_BASES];
1345 
1346  both_bases = true;
1347  bool valid_base = false;
1348  {
1349  for (size_t i = 0; i < BTREE_BASES; ++i) {
1350  bool ok = bases[i].read(name, basenames[i], writable, err_msg);
1351  base_ok[i] = ok;
1352  if (ok) {
1353  valid_base = true;
1354  } else {
1355  both_bases = false;
1356  }
1357  }
1358  }
1359 
1360  if (!valid_base) {
1361  if (handle >= 0) {
1362  ::close(handle);
1363  handle = -1;
1364  }
1365  string message = "Error opening table '";
1366  message += name;
1367  message += "':\n";
1368  message += err_msg;
1369  throw Xapian::DatabaseOpeningError(message);
1370  }
1371 
1372  if (revision_supplied) {
1373  bool found_revision = false;
1374  for (size_t i = 0; i < BTREE_BASES; ++i) {
1375  if (base_ok[i] && bases[i].get_revision() == revision_) {
1376  ch = basenames[i];
1377  found_revision = true;
1378  break;
1379  }
1380  }
1381  if (!found_revision) {
1382  /* Couldn't open the revision that was asked for.
1383  * This shouldn't throw an exception, but should just return
1384  * false to upper levels.
1385  */
1386  RETURN(false);
1387  }
1388  } else {
1389  chert_revision_number_t highest_revision = 0;
1390  for (size_t i = 0; i < BTREE_BASES; ++i) {
1391  if (base_ok[i] && bases[i].get_revision() >= highest_revision) {
1392  ch = basenames[i];
1393  highest_revision = bases[i].get_revision();
1394  }
1395  }
1396  }
1397 
1398  ChertTable_base *basep = 0;
1399  ChertTable_base *other_base = 0;
1400 
1401  for (size_t i = 0; i < BTREE_BASES; ++i) {
1402  LOGLINE(DB, "Checking (ch == " << ch << ") against "
1403  "basenames[" << i << "] == " << basenames[i]);
1404  LOGLINE(DB, "bases[" << i << "].get_revision() == " <<
1405  bases[i].get_revision());
1406  LOGLINE(DB, "base_ok[" << i << "] == " << base_ok[i]);
1407  if (ch == basenames[i]) {
1408  basep = &bases[i];
1409 
1410  // FIXME: assuming only two bases for other_base
1411  size_t otherbase_num = 1 - i;
1412  if (base_ok[otherbase_num]) {
1413  other_base = &bases[otherbase_num];
1414  }
1415  break;
1416  }
1417  }
1418  Assert(basep);
1419 
1420  /* basep now points to the most recent base block */
1421 
1422  /* Avoid copying the bitmap etc. - swap contents with the base
1423  * object in the vector, since it'll be destroyed anyway soon.
1424  */
1425  base.swap(*basep);
1426 
1427  revision_number = base.get_revision();
1428  block_size = base.get_block_size();
1429  root = base.get_root();
1430  level = base.get_level();
1431  //bit_map_size = basep->get_bit_map_size();
1432  item_count = base.get_item_count();
1433  faked_root_block = base.get_have_fakeroot();
1434  sequential = base.get_sequential();
1435 
1436  if (other_base != 0) {
1437  latest_revision_number = other_base->get_revision();
1438  if (revision_number > latest_revision_number)
1439  latest_revision_number = revision_number;
1440  } else {
1441  latest_revision_number = revision_number;
1442  }
1443  }
1444 
1445  /* kt holds constructed items as well as keys */
1446  kt = Item_wr(zeroed_new(block_size));
1447 
1448  set_max_item_size(BLOCK_CAPACITY);
1449 
1450  base_letter = ch;
1451 
1452  if (cursor_created_since_last_modification) {
1453  cursor_created_since_last_modification = false;
1454  ++cursor_version;
1455  }
1456 
1457  /* ready to open the main file */
1458 
1459  RETURN(true);
1460 }
1461 
1462 void
1464 {
1465  LOGCALL_VOID(DB, "ChertTable::read_root", NO_ARGS);
1466  if (faked_root_block) {
1467  /* root block for an unmodified database. */
1468  uint8_t * p = C[0].p;
1469  Assert(p);
1470 
1471  /* clear block - shouldn't be necessary, but is a bit nicer,
1472  * and means that the same operations should always produce
1473  * the same database. */
1474  memset(p, 0, block_size);
1475 
1476  int o = block_size - I2 - K1 - C2 - C2;
1477  Item_wr(p + o).fake_root_item();
1478 
1479  setD(p, DIR_START, o); // its directory entry
1480  SET_DIR_END(p, DIR_START + D2);// the directory size
1481 
1482  o -= (DIR_START + D2);
1483  SET_MAX_FREE(p, o);
1484  SET_TOTAL_FREE(p, o);
1485  SET_LEVEL(p, 0);
1486 
1487  if (!writable) {
1488  /* reading - revision number doesn't matter as long as
1489  * it's not greater than the current one. */
1490  SET_REVISION(p, 0);
1491  C[0].n = 0;
1492  } else {
1493  /* writing - */
1494  SET_REVISION(p, latest_revision_number + 1);
1495  C[0].n = base.next_free_block();
1496  }
1497  } else {
1498  /* using a root block stored on disk */
1499  block_to_cursor(C, level, root);
1500 
1501  if (REVISION(C[level].p) > revision_number) set_overwritten();
1502  /* although this is unlikely */
1503  }
1504 }
1505 
1506 bool
1507 ChertTable::do_open_to_write(bool revision_supplied,
1508  chert_revision_number_t revision_,
1509  bool create_db)
1510 {
1511  LOGCALL(DB, bool, "ChertTable::do_open_to_write", revision_supplied | revision_ | create_db);
1512  if (handle == -2) {
1514  }
1515  handle = io_open_block_wr(name + "DB", create_db);
1516  if (handle < 0) {
1517  // lazy doesn't make a lot of sense with create_db anyway, but ENOENT
1518  // with O_CREAT means a parent directory doesn't exist.
1519  if (lazy && !create_db && errno == ENOENT) {
1520  revision_number = revision_;
1521  RETURN(true);
1522  }
1523  string message(create_db ? "Couldn't create " : "Couldn't open ");
1524  message += name;
1525  message += "DB read/write: ";
1526  errno_to_string(errno, message);
1527  throw Xapian::DatabaseOpeningError(message);
1528  }
1529 
1530  if (!basic_open(revision_supplied, revision_)) {
1531  ::close(handle);
1532  handle = -1;
1533  if (!revision_supplied) {
1534  throw Xapian::DatabaseOpeningError("Failed to open for writing");
1535  }
1536  /* When the revision is supplied, it's not an exceptional
1537  * case when open failed, so we just return false here.
1538  */
1539  RETURN(false);
1540  }
1541 
1542  writable = true;
1543 
1544  for (int j = 0; j <= level; j++) {
1545  C[j].n = BLK_UNUSED;
1546  C[j].p = new uint8_t[block_size];
1547  }
1548  split_p = new uint8_t[block_size];
1549  read_root();
1550 
1551  buffer = zeroed_new(block_size);
1552 
1553  changed_n = 0;
1554  changed_c = DIR_START;
1555  seq_count = SEQ_START_POINT;
1556 
1557  RETURN(true);
1558 }
1559 
1560 ChertTable::ChertTable(const char * tablename_, const string & path_,
1561  bool readonly_, int compress_strategy_, bool lazy_)
1562  : tablename(tablename_),
1563  revision_number(0),
1564  item_count(0),
1565  block_size(0),
1566  latest_revision_number(0),
1567  both_bases(false),
1568  base_letter('A'),
1569  faked_root_block(true),
1570  sequential(true),
1571  handle(-1),
1572  level(0),
1573  root(0),
1574  kt(0),
1575  buffer(0),
1576  base(),
1577  name(path_),
1578  seq_count(0),
1579  changed_n(0),
1580  changed_c(0),
1581  max_item_size(0),
1582  Btree_modified(false),
1583  full_compaction(false),
1584  writable(!readonly_),
1585  cursor_created_since_last_modification(false),
1586  cursor_version(0),
1587  split_p(0),
1588  compress_strategy(compress_strategy_),
1589  deflate_zstream(NULL),
1590  inflate_zstream(NULL),
1591  lazy(lazy_),
1592  last_readahead(BLK_UNUSED)
1593 {
1594  LOGCALL_CTOR(DB, "ChertTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
1595 }
1596 
1597 bool
1599 {
1600  if (handle < 0) {
1601  if (handle == -2) {
1603  }
1604  return true;
1605  }
1606  ChertCursor cur(const_cast<ChertTable*>(this));
1607  cur.find_entry(string());
1608  return !cur.next();
1609 }
1610 
1611 void
1613  if (usual(deflate_zstream)) {
1614  if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
1615  // Try to recover by deleting the stream and starting from scratch.
1616  delete deflate_zstream;
1617  }
1618 
1619  deflate_zstream = new z_stream;
1620 
1621  deflate_zstream->zalloc = Z_NULL;
1622  deflate_zstream->zfree = Z_NULL;
1623  deflate_zstream->opaque = Z_NULL;
1624 
1625  // -15 means raw deflate with 32K LZ77 window (largest)
1626  // memLevel 9 is the highest (8 is default)
1627  int err;
1628  err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
1629  -15, 9, compress_strategy);
1630  if (rare(err != Z_OK)) {
1631  if (err == Z_MEM_ERROR) {
1632  delete deflate_zstream;
1633  deflate_zstream = 0;
1634  throw std::bad_alloc();
1635  }
1636  string msg = "deflateInit2 failed (";
1637  if (deflate_zstream->msg) {
1638  msg += deflate_zstream->msg;
1639  } else {
1640  msg += str(err);
1641  }
1642  msg += ')';
1643  delete deflate_zstream;
1644  deflate_zstream = 0;
1645  throw Xapian::DatabaseError(msg);
1646  }
1647 }
1648 
1649 void
1651  if (usual(inflate_zstream)) {
1652  if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
1653  // Try to recover by deleting the stream and starting from scratch.
1654  delete inflate_zstream;
1655  }
1656 
1657  inflate_zstream = new z_stream;
1658 
1659  inflate_zstream->zalloc = Z_NULL;
1660  inflate_zstream->zfree = Z_NULL;
1661  inflate_zstream->opaque = Z_NULL;
1662 
1663  inflate_zstream->next_in = Z_NULL;
1664  inflate_zstream->avail_in = 0;
1665 
1666  int err = inflateInit2(inflate_zstream, -15);
1667  if (rare(err != Z_OK)) {
1668  if (err == Z_MEM_ERROR) {
1669  delete inflate_zstream;
1670  inflate_zstream = 0;
1671  throw std::bad_alloc();
1672  }
1673  string msg = "inflateInit2 failed (";
1674  if (inflate_zstream->msg) {
1675  msg += inflate_zstream->msg;
1676  } else {
1677  msg += str(err);
1678  }
1679  msg += ')';
1680  delete inflate_zstream;
1681  inflate_zstream = 0;
1682  throw Xapian::DatabaseError(msg);
1683  }
1684 }
1685 
1686 bool
1688  LOGCALL(DB, bool, "ChertTable::exists", NO_ARGS);
1689  RETURN(file_exists(name + "DB") &&
1690  (file_exists(name + "baseA") || file_exists(name + "baseB")));
1691 }
1692 
1693 void
1695 {
1696  LOGCALL_VOID(DB, "ChertTable::erase", NO_ARGS);
1697  close();
1698 
1699  (void)io_unlink(name + "baseA");
1700  (void)io_unlink(name + "baseB");
1701  (void)io_unlink(name + "DB");
1702 }
1703 
1704 void
1705 ChertTable::set_block_size(unsigned int block_size_)
1706 {
1707  LOGCALL_VOID(DB, "ChertTable::set_block_size", block_size_);
1708  // Block size must in the range 2048..BYTE_PAIR_RANGE, and a power of two.
1709  if (block_size_ < 2048 || block_size_ > BYTE_PAIR_RANGE ||
1710  (block_size_ & (block_size_ - 1)) != 0) {
1711  block_size_ = CHERT_DEFAULT_BLOCK_SIZE;
1712  }
1713  block_size = block_size_;
1714 }
1715 
1716 void
1717 ChertTable::create_and_open(unsigned int block_size_)
1718 {
1719  LOGCALL_VOID(DB, "ChertTable::create_and_open", block_size_);
1720  if (handle == -2) {
1722  }
1723  Assert(writable);
1724  close();
1725 
1726  set_block_size(block_size_);
1727 
1728  // FIXME: it would be good to arrange that this works such that there's
1729  // always a valid table in place if you run create_and_open() on an
1730  // existing table.
1731 
1732  /* write initial values to files */
1733 
1734  /* create the base file */
1735  ChertTable_base base_;
1737  base_.set_block_size(block_size);
1738  base_.set_have_fakeroot(true);
1739  base_.set_sequential(true);
1740  // Doing a full sync here would be overly paranoid, as an empty table
1741  // contains no precious data and xapian-check can recreate lost base
1742  // files.
1743  base_.write_to_file(name + "baseA", 'A', string(), -1, NULL);
1744 
1745  /* remove the alternative base file, if any */
1746  (void)io_unlink(name + "baseB");
1747 
1748  // Any errors are thrown if revision_supplied is false.
1749  (void)do_open_to_write(false, 0, true);
1750 }
1751 
1753  LOGCALL_DTOR(DB, "ChertTable");
1755 
1756  if (deflate_zstream) {
1757  // Errors which we care about have already been handled, so just ignore
1758  // any which get returned here.
1759  (void) deflateEnd(deflate_zstream);
1760  delete deflate_zstream;
1761  }
1762 
1763  if (inflate_zstream) {
1764  // Errors which we care about have already been handled, so just ignore
1765  // any which get returned here.
1766  (void) inflateEnd(inflate_zstream);
1767  delete inflate_zstream;
1768  }
1769 }
1770 
1771 void ChertTable::close(bool permanent) {
1772  LOGCALL_VOID(DB, "ChertTable::close", permanent);
1773 
1774  if (handle >= 0) {
1775  // If an error occurs here, we just ignore it, since we're just
1776  // trying to free everything.
1777  (void)::close(handle);
1778  handle = -1;
1779  }
1780 
1781  if (permanent) {
1782  handle = -2;
1783  // Don't delete the resources in the table, since they may
1784  // still be used to look up cached content.
1785  return;
1786  }
1787  for (int j = level; j >= 0; j--) {
1788  delete [] C[j].p;
1789  C[j].p = 0;
1790  }
1791  delete [] split_p;
1792  split_p = 0;
1793 
1794  delete [] kt.get_address();
1795  kt = Item_wr(0);
1796  delete [] buffer;
1797  buffer = 0;
1798 }
1799 
1800 void
1802 {
1803  LOGCALL_VOID(DB, "ChertTable::flush_db", NO_ARGS);
1804  Assert(writable);
1805  if (handle < 0) {
1806  if (handle == -2) {
1808  }
1809  return;
1810  }
1811 
1812  for (int j = level; j >= 0; j--) {
1813  if (C[j].rewrite) {
1814  write_block(C[j].n, C[j].p);
1815  }
1816  }
1817 
1818  if (Btree_modified) {
1819  faked_root_block = false;
1820  }
1821 }
1822 
1823 void
1825  const string * changes_tail)
1826 {
1827  LOGCALL_VOID(DB, "ChertTable::commit", revision | changes_fd | changes_tail);
1828  Assert(writable);
1829 
1830  if (revision <= revision_number) {
1831  throw Xapian::DatabaseError("New revision too low");
1832  }
1833 
1834  if (handle < 0) {
1835  if (handle == -2) {
1837  }
1839  return;
1840  }
1841 
1842  try {
1843  if (faked_root_block) {
1844  /* We will use a dummy bitmap. */
1845  base.clear_bit_map();
1846  }
1847 
1848  base.set_revision(revision);
1849  base.set_root(C[level].n);
1850  base.set_level(level);
1854 
1856 
1857  both_bases = true;
1859  root = C[level].n;
1860 
1861  Btree_modified = false;
1862 
1863  for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
1864  C[i].n = BLK_UNUSED;
1865  C[i].c = -1;
1866  C[i].rewrite = false;
1867  }
1868 
1869  // Save to "<table>.tmp" and then rename to "<table>.base<letter>" so
1870  // that a reader can't try to read a partially written base file.
1871  string tmp = name;
1872  tmp += "tmp";
1873  string basefile = name;
1874  basefile += "base";
1875  basefile += char(base_letter);
1876  base.write_to_file(tmp, base_letter, tablename, changes_fd, changes_tail);
1877 
1878  // Do this as late as possible to allow maximum time for writes to
1879  // happen, and so the calls to io_sync() are adjacent which may be
1880  // more efficient, at least with some Linux kernel versions.
1881  if (changes_tail ? !io_full_sync(handle) : !io_sync(handle)) {
1882  (void)::close(handle);
1883  handle = -1;
1884  (void)unlink(tmp.c_str());
1885  throw Xapian::DatabaseError("Can't commit new revision - failed to flush DB to disk");
1886  }
1887 
1888  if (!io_tmp_rename(tmp, basefile)) {
1889  string msg("Couldn't update base file ");
1890  msg += basefile;
1891  throw Xapian::DatabaseError(msg, errno);
1892  }
1893  base.commit();
1894 
1895  read_root();
1896 
1897  changed_n = 0;
1898  changed_c = DIR_START;
1900  } catch (...) {
1902  throw;
1903  }
1904 }
1905 
1906 void
1908 {
1909  LOGCALL_VOID(DB, "ChertTable::write_changed_blocks", changes_fd);
1910  Assert(changes_fd >= 0);
1911  if (handle < 0) return;
1912  if (faked_root_block) return;
1913 
1914  string buf;
1915  pack_uint(buf, 2u); // Indicate the item is a list of blocks
1916  pack_string(buf, tablename);
1917  pack_uint(buf, block_size);
1918  io_write(changes_fd, buf.data(), buf.size());
1919 
1920  // Compare the old and new bitmaps to find blocks which have changed, and
1921  // write them to the file descriptor.
1922  uint4 n = 0;
1923  uint8_t * p = new uint8_t[block_size];
1924  try {
1926  while (base.find_changed_block(&n)) {
1927  buf.resize(0);
1928  pack_uint(buf, n + 1);
1929  io_write(changes_fd, buf.data(), buf.size());
1930 
1931  // Read block n.
1932  read_block(n, p);
1933 
1934  // Write block n to the file.
1935  io_write(changes_fd, reinterpret_cast<const char *>(p),
1936  block_size);
1937  ++n;
1938  }
1939  delete[] p;
1940  p = 0;
1941  } catch (...) {
1942  delete[] p;
1943  throw;
1944  }
1945  buf.resize(0);
1946  pack_uint(buf, 0u);
1947  io_write(changes_fd, buf.data(), buf.size());
1948 }
1949 
1950 void
1952 {
1953  LOGCALL_VOID(DB, "ChertTable::cancel", NO_ARGS);
1954  Assert(writable);
1955 
1956  if (handle < 0) {
1957  if (handle == -2) {
1959  }
1960  latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1961  return;
1962  }
1963 
1964  // This causes problems: if (!Btree_modified) return;
1965 
1966  string err_msg;
1967  if (!base.read(name, base_letter, writable, err_msg)) {
1968  throw Xapian::DatabaseCorruptError(string("Couldn't reread base ") + base_letter);
1969  }
1970 
1973  root = base.get_root();
1974  level = base.get_level();
1975  //bit_map_size = basep->get_bit_map_size();
1979 
1980  latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1981 
1982  Btree_modified = false;
1983 
1984  for (int j = 0; j <= level; j++) {
1985  C[j].n = BLK_UNUSED;
1986  C[j].rewrite = false;
1987  }
1988  read_root();
1989 
1990  changed_n = 0;
1991  changed_c = DIR_START;
1993 
1996  ++cursor_version;
1997  }
1998 }
1999 
2000 /************ B-tree reading ************/
2001 
2002 bool
2003 ChertTable::do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
2004 {
2005  LOGCALL(DB, bool, "ChertTable::do_open_to_read", revision_supplied | revision_);
2006  if (handle == -2) {
2008  }
2009  handle = io_open_block_rd(name + "DB");
2010  if (handle < 0) {
2011  if (lazy) {
2012  // This table is optional when reading!
2013  revision_number = revision_;
2014  RETURN(true);
2015  }
2016  string message("Couldn't open ");
2017  message += name;
2018  message += "DB to read: ";
2019  errno_to_string(errno, message);
2020  throw Xapian::DatabaseOpeningError(message);
2021  }
2022 
2023  if (!basic_open(revision_supplied, revision_)) {
2024  ::close(handle);
2025  handle = -1;
2026  if (revision_supplied) {
2027  // The requested revision was not available.
2028  // This could be because the database was modified underneath us, or
2029  // because a base file is missing. Return false, and work out what
2030  // the problem was at a higher level.
2031  RETURN(false);
2032  }
2033  throw Xapian::DatabaseOpeningError("Failed to open table for reading");
2034  }
2035 
2036  for (int j = 0; j <= level; j++) {
2037  C[j].n = BLK_UNUSED;
2038  C[j].p = new uint8_t[block_size];
2039  }
2040 
2041  read_root();
2042  RETURN(true);
2043 }
2044 
2045 void
2047 {
2048  LOGCALL_VOID(DB, "ChertTable::open", NO_ARGS);
2049  LOGLINE(DB, "opening at path " << name);
2050  close();
2051 
2052  if (!writable) {
2053  // Any errors are thrown if revision_supplied is false
2054  (void)do_open_to_read(false, 0);
2055  return;
2056  }
2057 
2058  // Any errors are thrown if revision_supplied is false.
2059  (void)do_open_to_write(false, 0);
2060 }
2061 
2062 bool
2064 {
2065  LOGCALL(DB, bool, "ChertTable::open", revision);
2066  LOGLINE(DB, "opening for particular revision at path " << name);
2067  close();
2068 
2069  if (!writable) {
2070  if (do_open_to_read(true, revision)) {
2071  AssertEq(revision_number, revision);
2072  RETURN(true);
2073  } else {
2074  close();
2075  RETURN(false);
2076  }
2077  }
2078 
2079  if (!do_open_to_write(true, revision)) {
2080  // Can't open at the requested revision.
2081  close();
2082  RETURN(false);
2083  }
2084 
2085  AssertEq(revision_number, revision);
2086  RETURN(true);
2087 }
2088 
2089 bool
2090 ChertTable::prev_for_sequential(Cursor * C_, int /*dummy*/) const
2091 {
2092  LOGCALL(DB, bool, "ChertTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2093  int c = C_[0].c;
2094  AssertRel(DIR_START,<=,c);
2095  AssertRel(c,<,DIR_END(C_[0].p));
2096  if (c == DIR_START) {
2097  uint8_t * p = C_[0].p;
2098  Assert(p);
2099  uint4 n = C_[0].n;
2100  while (true) {
2101  if (n == 0) RETURN(false);
2102  n--;
2103  if (writable) {
2104  if (n == C[0].n) {
2105  // Block is a leaf block in the built-in cursor
2106  // (potentially in modified form).
2107  memcpy(p, C[0].p, block_size);
2108  } else {
2109  // Blocks in the built-in cursor may not have been written
2110  // to disk yet, so we have to check that the block number
2111  // isn't in the built-in cursor or we'll read an
2112  // uninitialised block (for which GET_LEVEL(p) will
2113  // probably return 0).
2114  int j;
2115  for (j = 1; j <= level; ++j) {
2116  if (n == C[j].n) break;
2117  }
2118  if (j <= level) continue;
2119 
2120  // Block isn't in the built-in cursor, so the form on disk
2121  // is valid, so read it to check if it's the next level 0
2122  // block.
2123  read_block(n, p);
2124  }
2125  } else {
2126  read_block(n, p);
2127  }
2129  if (REVISION(p) > revision_number + writable) {
2130  set_overwritten();
2131  RETURN(false);
2132  }
2133  if (GET_LEVEL(p) == 0) break;
2134  }
2135  c = DIR_END(p);
2136  C_[0].n = n;
2137  AssertRel(DIR_START,<,c);
2138  }
2139  c -= D2;
2140  C_[0].c = c;
2141  RETURN(true);
2142 }
2143 
2144 bool
2145 ChertTable::next_for_sequential(Cursor * C_, int /*dummy*/) const
2146 {
2147  LOGCALL(DB, bool, "ChertTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2148  uint8_t * p = C_[0].p;
2149  Assert(p);
2150  int c = C_[0].c;
2151  AssertRel(c,<,DIR_END(p));
2152  c += D2;
2153  Assert((unsigned)c < block_size);
2154  if (c == DIR_END(p)) {
2155  uint4 n = C_[0].n;
2156  while (true) {
2157  n++;
2158  if (n > base.get_last_block()) RETURN(false);
2159  if (writable) {
2160  if (n == C[0].n) {
2161  // Block is a leaf block in the built-in cursor
2162  // (potentially in modified form).
2163  memcpy(p, C[0].p, block_size);
2164  } else {
2165  // Blocks in the built-in cursor may not have been written
2166  // to disk yet, so we have to check that the block number
2167  // isn't in the built-in cursor or we'll read an
2168  // uninitialised block (for which GET_LEVEL(p) will
2169  // probably return 0).
2170  int j;
2171  for (j = 1; j <= level; ++j) {
2172  if (n == C[j].n) break;
2173  }
2174  if (j <= level) continue;
2175 
2176  // Block isn't in the built-in cursor, so the form on disk
2177  // is valid, so read it to check if it's the next level 0
2178  // block.
2179  read_block(n, p);
2180  }
2181  } else {
2182  read_block(n, p);
2183  }
2185  if (REVISION(p) > revision_number + writable) {
2186  set_overwritten();
2187  RETURN(false);
2188  }
2189  if (GET_LEVEL(p) == 0) break;
2190  }
2191  c = DIR_START;
2192  C_[0].n = n;
2193  }
2194  C_[0].c = c;
2195  RETURN(true);
2196 }
2197 
2198 bool
2200 {
2201  LOGCALL(DB, bool, "ChertTable::prev_default", Literal("C_") | j);
2202  uint8_t * p = C_[j].p;
2203  int c = C_[j].c;
2204  AssertRel(DIR_START,<=,c);
2205  AssertRel(c,<,DIR_END(p));
2206  AssertRel((unsigned)DIR_END(p),<=,block_size);
2207  if (c == DIR_START) {
2208  if (j == level) RETURN(false);
2209  if (!prev_default(C_, j + 1)) RETURN(false);
2210  c = DIR_END(p);
2211  AssertRel(DIR_START,<,c);
2212  }
2213  c -= D2;
2214  C_[j].c = c;
2215  if (j > 0) {
2216  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2217  }
2218  RETURN(true);
2219 }
2220 
2221 bool
2223 {
2224  LOGCALL(DB, bool, "ChertTable::next_default", Literal("C_") | j);
2225  uint8_t * p = C_[j].p;
2226  int c = C_[j].c;
2227  AssertRel(c,<,DIR_END(p));
2228  AssertRel((unsigned)DIR_END(p),<=,block_size);
2229  c += D2;
2230  if (j > 0) {
2231  AssertRel(DIR_START,<,c);
2232  } else {
2233  AssertRel(DIR_START,<=,c);
2234  }
2235  // Sometimes c can be DIR_END(p) + 2 here it appears...
2236  if (c >= DIR_END(p)) {
2237  if (j == level) RETURN(false);
2238  if (!next_default(C_, j + 1)) RETURN(false);
2239  c = DIR_START;
2240  }
2241  C_[j].c = c;
2242  if (j > 0) {
2243  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2244 #ifdef BTREE_DEBUG_FULL
2245  printf("Block in ChertTable:next_default");
2246  report_block_full(j - 1, C_[j - 1].n, C_[j - 1].p);
2247 #endif /* BTREE_DEBUG_FULL */
2248  }
2249  RETURN(true);
2250 }
2251 
2252 void
2254 {
2255  throw Xapian::DatabaseClosedError("Database has been closed");
2256 }
2257 
2273 bool Key::operator<(Key key2) const
2274 {
2275  LOGCALL(DB, bool, "Key::operator<", static_cast<const void*>(key2.p));
2276  int key1_len = length();
2277  int key2_len = key2.length();
2278  if (key1_len == key2_len) {
2279  // The keys are the same length, so we can compare the counts
2280  // in the same operation since they're stored as 2 byte
2281  // bigendian numbers.
2282  RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) < 0);
2283  }
2284 
2285  int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
2286 
2287  // Compare the common part of the keys
2288  int diff = memcmp(p + K1, key2.p + K1, k_smaller);
2289  if (diff != 0) RETURN(diff < 0);
2290 
2291  // We dealt with the "same length" case above so we never need to check
2292  // the count here.
2293  RETURN(key1_len < key2_len);
2294 }
2295 
2296 bool Key::operator==(Key key2) const
2297 {
2298  LOGCALL(DB, bool, "Key::operator==", static_cast<const void*>(key2.p));
2299  int key1_len = length();
2300  if (key1_len != key2.length()) RETURN(false);
2301  // The keys are the same length, so we can compare the counts
2302  // in the same operation since they're stored as 2 byte
2303  // bigendian numbers.
2304  RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) == 0);
2305 }
bool get_sequential() const
int close(FD &fd)
Definition: fd.h:63
void set_overwritten() const
Definition: chert_table.cc:251
static uint8_t * zeroed_new(size_t size)
Definition: chert_table.cc:96
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:480
#define RETURN(A)
Definition: debuglog.h:482
bool get_have_fakeroot() const
#define Assert(COND)
Definition: omassert.h:122
char other_base_letter() const
Definition: chert_table.h:711
int io_open_block_rd(const char *fname)
Open a block-based file for reading.
Definition: io_utils.h:38
void set_level(uint4 level_)
int DIR_END(const uint8_t *b)
Definition: chert_table.h:154
bool both_bases
set to true if baseA and baseB both exist as valid bases.
Definition: chert_table.h:742
int c
offset in the block&#39;s directory
Definition: chert_cursor.h:47
void io_write(int fd, const char *p, size_t n)
Write n bytes from block pointed to by p to file descriptor fd.
Definition: io_utils.cc:145
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: chert_cursor.h:66
Indicates an attempt to access a closed database.
Definition: error.h:1097
void add_item(Item_wr kt, int j)
ChertTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
Definition: chert_table.cc:672
#define AssertEq(A, B)
Definition: omassert.h:124
void read_root()
int add_kt(bool found)
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C...
Definition: chert_table.cc:846
Key key() const
Definition: chert_table.h:214
z_stream * inflate_zstream
Zlib state object for inflating.
Definition: chert_table.h:858
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
T get_address() const
Definition: chert_table.h:200
int length() const
Definition: chert_table.h:181
#define true
Definition: header.h:8
bool next()
Advance to the next key.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
int getint4(const unsigned char *p, int c)
Definition: chert_table.h:79
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:180
const size_t BLOCK_CAPACITY
Even for items of at maximum size, it must be possible to get this number of items in a block...
Definition: chert_table.h:105
#define usual(COND)
Definition: config.h:574
const int C2
Definition: chert_table.h:123
bool io_sync(int fd)
Ensure all data previously written to file descriptor fd has been written to disk.
Definition: io_utils.h:73
void compact(uint8_t *p)
compact(p) compact the block at p by shuffling all the items up to the end.
Definition: chert_table.cc:469
void set_full_compaction(bool parity)
const char * tablename
The name of the table (used when writing changesets).
Definition: chert_table.h:716
const int I2
Definition: chert_table.h:121
Convert errno value to std::string, thread-safe if possible.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:479
bool faked_root_block
true if the root block is faked (not written to disk).
Definition: chert_table.h:751
int delete_kt()
Definition: chert_table.cc:914
int io_open_block_wr(const char *fname, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:67
Btree base file implementation.
uint4 root
the root block of the B-tree
Definition: chert_table.h:771
Item_wr kt
buffer of size block_size for making up key-tag items
Definition: chert_table.h:774
#define STRINGIZE(X)
The STRINGIZE macro converts its parameter into a string constant.
Definition: stringutils.h:36
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
void append_chunk(std::string *tag) const
Definition: chert_table.h:215
#define BLK_UNUSED
Definition: chert_cursor.h:31
STL namespace.
void lazy_alloc_inflate_zstream() const
Allocate the zstream for inflating, if not already allocated.
Convert types to std::string.
chert_tablesize_t item_count
keeps a count of the number of items in the B-tree.
Definition: chert_table.h:728
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
void open()
Open the btree at the latest revision.
Utility functions for testing files.
Cursor C[BTREE_CURSOR_LEVELS]
Definition: chert_table.h:841
#define false
Definition: header.h:9
unsigned long cursor_version
Version count for tracking when cursors need to rebuild.
Definition: chert_table.h:813
#define rare(COND)
Definition: config.h:573
bool exists() const
Determine whether the btree exists on disk.
bool key_exists(const std::string &key) const
Check if a key exists in the Btree.
void create_and_open(unsigned int blocksize)
Create a new empty btree structure on disk and open it at the initial revision.
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
const int DIR_START
Definition: chert_table.h:155
void aligned_write4(unsigned char *ptr, T value)
Definition: wordaccess.h:170
chert_tablesize_t get_item_count() const
void set_have_fakeroot(bool have_fakeroot_)
uint8_t * split_p
Buffer used when splitting a block.
Definition: chert_table.h:848
ChertTable(const ChertTable &)
Copying not allowed.
Hierarchy of classes which Xapian can throw as exceptions.
bool readahead_key(const string &key) const
bool del(const std::string &key)
Delete an entry from the table.
static uint4 block_given_by(const uint8_t *p, int c)
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value...
Definition: pretty.h:45
bool prev_for_sequential(Cursor *C_, int dummy) const
~ChertTable()
Close the Btree.
bool next_for_sequential(Cursor *C_, int dummy) const
bool io_full_sync(int fd)
Definition: io_utils.h:88
void set_sequential(bool sequential_)
uint4 get_block_size() const
uint32_t uint4
Definition: internaltypes.h:32
bool next_default(Cursor *C_, int j) const
DatabaseModifiedError indicates a database was modified.
Definition: error.h:539
void cancel()
Cancel any outstanding changes.
functions for reading and writing different width words
uint4 get_level() const
void errno_to_string(int e, string &s)
int size() const
I in diagram above.
Definition: chert_table.h:202
void form_null_key(uint4 n)
Form an item with a null key and with block number n in the tag.
Definition: chert_table.h:286
void set_block_given_by(uint4 n)
Set this item&#39;s tag to point to block n (this block should not be at level 0).
Definition: chert_table.h:273
Interface to Btree cursors.
bool do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
Perform the opening operation to read.
std::string name
The path name of the B tree.
Definition: chert_table.h:783
bool rewrite
true if the block is not the same as on disk, and so needs rewriting
Definition: chert_cursor.h:58
void SET_LEVEL(uint8_t *b, int x)
Definition: chert_table.h:158
uint8_t * p
pointer to a block
Definition: chert_cursor.h:45
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:228
int GET_LEVEL(const uint8_t *b)
Definition: chert_table.h:151
bool do_open_to_write(bool revision_supplied, chert_revision_number_t revision_, bool create_db=false)
Perform the opening operation to write.
bool operator<(Key key2) const
Compares this key with key2.
chert_revision_number_t revision_number
revision number of the opened B-tree.
Definition: chert_table.h:725
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
void set_root(uint4 root_)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void erase()
Erase this table from disk.
static int find_in_block(const uint8_t *p, Key key, bool leaf, int c)
find_in_block(p, key, leaf, c) searches for the key in the block at p.
Definition: chert_table.cc:386
bool io_tmp_rename(const std::string &tmp_file, const std::string &real_file)
Rename a temporary file to its final position.
Definition: io_utils.cc:271
void set_block_size(unsigned int block_size_)
Set the block size.
void SET_DIR_END(uint8_t *b, int x)
Definition: chert_table.h:161
bool writable
Set to true when the database is opened to write.
Definition: chert_table.h:807
const int K1
Definition: chert_table.h:120
bool find_changed_block(uint4 *n) const
Find the first changed block at or after position *n.
bool read(const std::string &name, char ch, bool read_bitmap, std::string &err_msg)
Read values from a base file.
void close(bool permanent=false)
Close the Btree.
bool really_empty() const
Return true if there are no entries in the table.
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
void set_item_count(chert_tablesize_t item_count_)
bool basic_open(bool revision_supplied, chert_revision_number_t revision)
#define BYTE_PAIR_RANGE
Definition: chert_table.cc:166
void setD(unsigned char *p, int c, int x)
Definition: chert_table.h:128
void write_changed_blocks(int changes_fd)
Append the list of blocks changed to a changeset file.
bool sequential
true iff the data has been written in a single write in sequential order.
Definition: chert_table.h:756
#define AssertParanoid(COND)
Definition: omassert.h:129
void enter_key(int j, Key prevkey, Key newkey)
enter_key(j, prevkey, newkey) is called after a block split.
Definition: chert_table.cc:538
z_stream * deflate_zstream
Zlib state object for deflating.
Definition: chert_table.h:855
bool prev_default(Cursor *C_, int j) const
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:478
uint4 get_root() const
bool find(Cursor *) const
find(C_) searches for the key of B->kt in the B-tree.
Definition: chert_table.cc:433
uint4 changed_n
the last block to be changed by an addition
Definition: chert_table.h:791
void commit(chert_revision_number_t revision, int changes_fd=-1, const std::string *changes_tail=NULL)
Commit any outstanding changes to the table.
void lazy_alloc_deflate_zstream() const
Allocate the zstream for deflating, if not already allocated.
uint4 block_given_by() const
Get this item&#39;s tag as a block number (this block should not be at level 0).
Definition: chert_table.h:224
void set_block_size(uint4 block_size_)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
void fake_root_item()
Definition: chert_table.h:314
int mid_point(uint8_t *p) const
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in ...
Definition: chert_table.cc:604
void block_to_cursor(Cursor *C_, int j, uint4 n) const
Definition: chert_table.cc:272
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
Definition: chert_table.cc:978
uint4 get_revision() const
bool get_compressed() const
Definition: chert_table.h:207
int MAX_FREE(const uint8_t *b)
Definition: chert_table.h:152
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
int seq_count
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POIN...
Definition: chert_table.h:788
#define CHERT_DEFAULT_BLOCK_SIZE
The default block size to use in a B-tree table.
Definition: chert_types.h:57
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
Btree implementation.
uint4 n
block number
Definition: chert_cursor.h:56
void alter()
Btree::alter(); is called when the B-tree is to be altered.
Definition: chert_table.cc:338
char name[9]
Definition: dbcheck.cc:55
#define SEQ_START_POINT
Flip to sequential addition block-splitting after this number of observed sequential additions (in ne...
Definition: chert_table.cc:151
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Definition: pack.h:477
bool Btree_modified
Set to true the first time the B-tree is modified.
Definition: chert_table.h:801
#define CHERT_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: chert_table.h:101
void delete_item(int j, bool repeatedly)
ChertTable::delete_item(j, repeatedly) is (almost) the converse of add_item.
Definition: chert_table.cc:771
const int DONT_COMPRESS
Definition: chert_table.h:94
int components_of() const
Definition: chert_table.h:211
char base_letter
the value &#39;A&#39; or &#39;B&#39; of the current base
Definition: chert_table.h:745
void write_to_file(const std::string &filename, char base_letter, const std::string &tablename, int changes_fd, const std::string *changes_tail)
Write the btree base file to disk.
chert_revision_number_t latest_revision_number
Revision number of the other base, or zero if there is only one base file.
Definition: chert_table.h:736
unsigned int block_size
block size of the B tree in bytes
Definition: chert_table.h:731
int changed_c
directory offset corresponding to last block to be changed by an addition
Definition: chert_table.h:795
Pack types into strings and unpack them again.
int handle
File descriptor of the table.
Definition: chert_table.h:765
const int D2
Definition: chert_table.h:122
Wrappers for low-level POSIX I/O routines.
const int BTREE_CURSOR_LEVELS
Definition: chert_table.h:325
Various handy helpers which std::string really should provide.
void form_key(const std::string &key) const
Definition: chert_table.cc:948
uint4 get_last_block() const
void read_block(uint4 n, uint8_t *p) const
read_block(n, p) reads block n of the DB file to address p.
Definition: chert_table.cc:170
void flush_db()
Flush any outstanding changes to the DB file of the table.
Definition: header.h:151
unsigned REVISION(const uint8_t *b)
Definition: chert_table.h:150
ChertTable_base base
For writing back as file baseA or baseB.
Definition: chert_table.h:780
bool operator==(Key key2) const
bool cursor_created_since_last_modification
Flag for tracking when cursors need to rebuild.
Definition: chert_table.h:810
void write_block(uint4 n, const uint8_t *p) const
write_block(n, p) writes block n in the DB file from address p.
Definition: chert_table.cc:199
Various assertion macros.
#define LOGLINE(a, b)
Definition: debuglog.h:483
void set_revision(uint4 revision_)
DatabaseError indicates some sort of database related error.
Definition: error.h:367
void split_root(uint4 split_n)
Btree needs to gain a new level to insert more items: so split root block and construct a new one...
Definition: chert_table.cc:493
const size_t COMPRESS_MIN
Definition: chert_table.cc:71
uint8_t * buffer
buffer of size block_size for reforming blocks
Definition: chert_table.h:777
static void throw_database_closed()
Throw an exception indicating that the database is closed.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
void SET_REVISION(uint8_t *b, uint4 rev)
Definition: chert_table.h:157
int level
number of levels, counting from 0
Definition: chert_table.h:768
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
void SET_MAX_FREE(uint8_t *b, int x)
Definition: chert_table.h:159
bool lazy
If true, don&#39;t create the table until it&#39;s needed.
Definition: chert_table.h:861
void set_key_and_block(Key newkey, int truncate_size, uint4 n)
Definition: chert_table.h:250
Debug logging macros.
const uint8_t * p
Definition: chert_table.h:168
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
int TOTAL_FREE(const uint8_t *b)
Definition: chert_table.h:153
void add_item_to_block(uint8_t *p, Item_wr kt, int c)
add_item_to_block(p, kt_, c) adds item kt_ to the block at p.
Definition: chert_table.cc:636
#define C(X)
void SET_TOTAL_FREE(uint8_t *b, int x)
Definition: chert_table.h:160
bool read_tag(Cursor *C_, std::string *tag, bool keep_compressed) const
Read the tag value for the key pointed to by cursor C_.
const uint8_t * get_address() const
Definition: chert_table.h:171
int compress_strategy
DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY, Z_RLE.
Definition: chert_table.h:852