xapian-core  1.4.20
chert_table.cc
Go to the documentation of this file.
1 /* chert_table.cc: Btree implementation
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
6  * Copyright 2008 Lemur Consulting Ltd
7  * Copyright 2010 Richard Boulton
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "chert_table.h"
28 
29 #include <xapian/error.h>
30 
31 #include <cerrno>
32 
33 #include "errno_to_string.h"
34 #include "omassert.h"
35 #include "stringutils.h" // For STRINGIZE().
36 
37 // Define to use "dangerous" mode - in this mode we write modified btree
38 // blocks back in place. This is somewhat faster (especially once we're
39 // I/O bound) but the database can't be safely searched during indexing
40 // and if indexing is terminated uncleanly, the database may be corrupt.
41 //
42 // Despite the risks, it's useful for speeding up a full rebuild.
43 //
44 // FIXME: make this mode run-time selectable, and record that it is currently
45 // in use somewhere on disk, so readers can check and refuse to open the
46 // database.
47 //
48 // #define DANGEROUS
49 
50 #include <sys/types.h>
51 
52 #include <cstring> /* for memmove */
53 #include <climits> /* for CHAR_BIT */
54 
55 #include "chert_btreebase.h"
56 #include "chert_cursor.h"
57 
58 #include "filetests.h"
59 #include "io_utils.h"
60 #include "debuglog.h"
61 #include "pack.h"
62 #include "str.h"
63 #include "wordaccess.h"
64 
65 #include <algorithm> // for std::min()
66 #include <string>
67 
68 using namespace std;
69 
70 // Only try to compress tags longer than this many bytes.
71 const size_t COMPRESS_MIN = 4;
72 
73 //#define BTREE_DEBUG_FULL 1
74 #undef BTREE_DEBUG_FULL
75 
76 #ifdef BTREE_DEBUG_FULL
77 /*------debugging aids from here--------*/
78 
79 static void print_key(const uint8_t * p, int c, int j);
80 static void print_tag(const uint8_t * p, int c, int j);
81 
82 /*
83 static void report_cursor(int N, Btree * B, Cursor * C)
84 {
85  int i;
86  printf("%d)\n", N);
87  for (i = 0; i <= B->level; i++)
88  printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
89  C[i].p, C[i].c, C[i].n, C[i].rewrite);
90 }
91 */
92 
93 /*------to here--------*/
94 #endif /* BTREE_DEBUG_FULL */
95 
96 static inline uint8_t *zeroed_new(size_t size)
97 {
98  uint8_t *temp = new uint8_t[size];
99  memset(temp, 0, size);
100  return temp;
101 }
102 
103 /* A B-tree comprises (a) a base file, containing essential information (Block
104  size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
105  bitmap being set if the Nth block of the B-tree file is in use, and (c) a
106  file DB containing the B-tree proper. The DB file is divided into a sequence
107  of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
108  use. Those in use are arranged in a tree.
109 
110  Each block, b, has a structure like this:
111 
112  R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
113  <---------- D ----------> <-M->
114 
115  And then,
116 
117  R = REVISION(b) is the revision number the B-tree had when the block was
118  written into the DB file.
119  L = GET_LEVEL(b) is the level of the block, which is the number of levels
120  towards the root of the B-tree structure. So leaf blocks
121  have level 0 and the one root block has the highest level
122  equal to the number of levels in the B-tree.
123  M = MAX_FREE(b) is the size of the gap between the end of the directory and
124  the first item of data. (It is not necessarily the maximum
125  size among the bits of space that are free, but I can't
126  think of a better name.)
127  T = TOTAL_FREE(b)is the total amount of free space left in b.
128  D = DIR_END(b) gives the offset to the end of the directory.
129 
130  o1, o2 ... oN are a directory of offsets to the N items held in the block.
131  The items are key-tag pairs, and as they occur in the directory are ordered
132  by the keys.
133 
134  An item has this form:
135 
136  I K key x C tag
137  <--K-->
138  <------I------>
139 
140  A long tag presented through the API is split up into C tags small enough to
141  be accommodated in the blocks of the B-tree. The key is extended to include
142  a counter, x, which runs from 1 to C. The key is preceded by a length, K,
143  and the whole item with a length, I, as depicted above.
144 
145  Here are the corresponding definitions:
146 
147 */
148 
151 #define SEQ_START_POINT (-10)
152 
153 
154 /* There are two bit maps in bit_map0 and bit_map. The nth bit of bitmap is 0
155  if the nth block is free, otherwise 1. bit_map0 is the initial state of
156  bitmap at the start of the current transaction.
157 
158  Note use of the limits.h values:
159  UCHAR_MAX = 255, an unsigned with all bits set, and
160  CHAR_BIT = 8, the number of bits per byte
161 
162  BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
163  bytes -- 64K effectively.
164 */
165 
166 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
167 
169 void
170 ChertTable::read_block(uint4 n, uint8_t * p) const
171 {
172  // Log the value of p, not the contents of the block it points to...
173  LOGCALL_VOID(DB, "ChertTable::read_block", n | (void*)p);
174  if (rare(handle == -2))
176 
177  /* Use the base bit_map_size not the bitmap's size, because
178  * the latter is uninitialised in readonly mode.
179  */
180  Assert(n / CHAR_BIT < base.get_bit_map_size());
181 
182  io_read_block(handle, reinterpret_cast<char *>(p), block_size, n);
183 
184  int dir_end = DIR_END(p);
185  if (rare(dir_end < DIR_START || unsigned(dir_end) > block_size)) {
186  string msg("dir_end invalid in block ");
187  msg += str(n);
188  throw Xapian::DatabaseCorruptError(msg);
189  }
190 }
191 
198 void
199 ChertTable::write_block(uint4 n, const uint8_t * p) const
200 {
201  LOGCALL_VOID(DB, "ChertTable::write_block", n | p);
202  Assert(writable);
203  /* Check that n is in range. */
204  Assert(n / CHAR_BIT < base.get_bit_map_size());
205 
206  /* don't write to non-free */;
207  AssertParanoid(base.block_free_at_start(n));
208 
209  /* write revision is okay */
210  AssertEqParanoid(REVISION(p), latest_revision_number + 1);
211 
212  if (both_bases) {
213  // Delete the old base before modifying the database.
214  //
215  // If the file is on NFS, then io_unlink() may return false even if
216  // the file was removed, so on balance throwing an exception in this
217  // case is unhelpful, since we wanted the file gone anyway! The
218  // likely explanation is that somebody moved, deleted, or changed a
219  // symlink to the database directory.
220  (void)io_unlink(name + "base" + other_base_letter());
221  both_bases = false;
222  latest_revision_number = revision_number;
223  }
224 
225  io_write_block(handle, reinterpret_cast<const char *>(p), block_size, n);
226 }
227 
228 
229 /* A note on cursors:
230 
231  Each B-tree level has a corresponding array element C[j] in a
232  cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
233  root block level. Within a level j,
234 
235  C[j].p addresses the block
236  C[j].c is the offset into the directory entry in the block
237  C[j].n is the number of the block at C[j].p
238 
239  A look up in the B-tree causes navigation of the blocks starting
240  from the root. In each block, p, we find an offset, c, to an item
241  which gives the number, n, of the block for the next level. This
242  leads to an array of values p,c,n which are held inside the cursor.
243 
244  Any Btree object B has a built-in cursor, at B->C. But other cursors may
245  be created. If BC is a created cursor, BC->C is the cursor in the
246  sense given above, and BC->B is the handle for the B-tree again.
247 */
248 
249 
250 void
252 {
253  LOGCALL_VOID(DB, "ChertTable::set_overwritten", NO_ARGS);
254  // If we're writable, there shouldn't be another writer who could cause
255  // overwritten to be flagged, so that's a DatabaseCorruptError.
256  if (writable)
257  throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
258  throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
259 }
260 
261 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
262  C, writing the block currently at C[j] back to disk if necessary.
263  Note that
264 
265  C[j].rewrite
266 
267  is true iff C[j].n is different from block n in file DB. If it is
268  false no rewriting is necessary.
269 */
270 
271 void
273 {
274  LOGCALL_VOID(DB, "ChertTable::block_to_cursor", (void*)C_ | j | n);
275  if (n == C_[j].n) return;
276  uint8_t * p = C_[j].p;
277  Assert(p);
278 
279  // FIXME: only needs to be done in write mode
280  if (C_[j].rewrite) {
281  Assert(writable);
282  Assert(C == C_);
283  write_block(C_[j].n, p);
284  C_[j].rewrite = false;
285  }
286 
287  // Check if the block is in the built-in cursor (potentially in
288  // modified form).
289  if (n == C[j].n) {
290  if (p != C[j].p)
291  memcpy(p, C[j].p, block_size);
292  } else {
293  read_block(n, p);
294  }
295 
296  C_[j].n = n;
297  if (j < level) {
298  /* unsigned comparison */
299  if (rare(REVISION(p) > REVISION(C_[j + 1].p))) {
300  set_overwritten();
301  return;
302  }
303  }
304 
305  if (rare(j != GET_LEVEL(p))) {
306  string msg = "Expected block ";
307  msg += str(n);
308  msg += " to be level ";
309  msg += str(j);
310  msg += ", not ";
311  msg += str(GET_LEVEL(p));
312  throw Xapian::DatabaseCorruptError(msg);
313  }
314 }
315 
337 void
339 {
340  LOGCALL_VOID(DB, "ChertTable::alter", NO_ARGS);
341  Assert(writable);
342 #ifdef DANGEROUS
343  C[0].rewrite = true;
344 #else
345  int j = 0;
346  uint8_t * p = C[j].p;
347  while (true) {
348  if (C[j].rewrite) return; /* all new, so return */
349  C[j].rewrite = true;
350 
351  uint4 n = C[j].n;
352  if (base.block_free_at_start(n)) {
353  Assert(REVISION(p) == latest_revision_number + 1);
354  return;
355  }
356  Assert(REVISION(p) < latest_revision_number + 1);
357  base.free_block(n);
358  n = base.next_free_block();
359  C[j].n = n;
360  SET_REVISION(p, latest_revision_number + 1);
361 
362  if (j == level) return;
363  j++;
364  p = C[j].p;
365  Item_wr(p, C[j].c).set_block_given_by(n);
366  }
367 #endif
368 }
369 
385 int
386 ChertTable::find_in_block(const uint8_t * p, Key key, bool leaf, int c)
387 {
388  LOGCALL_STATIC(DB, int, "ChertTable::find_in_block", (const void*)p | (const void *)key.get_address() | leaf | c);
389  // c should be odd (either -1, or an even offset from DIR_START).
390  Assert((c & 1) == 1);
391  int i = DIR_START;
392  if (leaf) i -= D2;
393  if (c != -1) {
394  AssertRel(i,<=,c);
395  }
396  int j = DIR_END(p);
397 
398  if (c != -1) {
399  if (c < j && i < c && Item(p, c).key() <= key)
400  i = c;
401  c += D2;
402  if (c < j && i < c && key < Item(p, c).key())
403  j = c;
404  }
405 
406  while (j - i > D2) {
407  int k = i + ((j - i)/(D2 * 2))*D2; /* mid way */
408  if (key < Item(p, k).key()) j = k; else i = k;
409  }
410  if (leaf) {
411  AssertRel(DIR_START - D2,<=,i);
412  } else {
413  AssertRel(DIR_START,<=,i);
414  }
415  AssertRel(i,<,DIR_END(p));
416  RETURN(i);
417 }
418 
432 bool
434 {
435  LOGCALL(DB, bool, "ChertTable::find", (void*)C_);
436  // Note: the parameter is needed when we're called by ChertCursor
437  const uint8_t * p;
438  int c;
439  Key key = kt.key();
440  for (int j = level; j > 0; --j) {
441  p = C_[j].p;
442  c = find_in_block(p, key, false, C_[j].c);
443 #ifdef BTREE_DEBUG_FULL
444  printf("Block in ChertTable:find - code position 1");
445  report_block_full(j, C_[j].n, p);
446 #endif /* BTREE_DEBUG_FULL */
447  C_[j].c = c;
448  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
449  }
450  p = C_[0].p;
451  c = find_in_block(p, key, true, C_[0].c);
452 #ifdef BTREE_DEBUG_FULL
453  printf("Block in ChertTable:find - code position 2");
454  report_block_full(0, C_[0].n, p);
455 #endif /* BTREE_DEBUG_FULL */
456  C_[0].c = c;
457  if (c < DIR_START) {
458  RETURN(false);
459  }
460  RETURN(Item(p, c).key() == key);
461 }
462 
468 void
470 {
471  LOGCALL_VOID(DB, "ChertTable::compact", (void*)p);
472  Assert(writable);
473  int e = block_size;
474  uint8_t * b = buffer;
475  int dir_end = DIR_END(p);
476  for (int c = DIR_START; c < dir_end; c += D2) {
477  Item item(p, c);
478  int l = item.size();
479  e -= l;
480  memmove(b + e, item.get_address(), l);
481  setD(p, c, e); /* reform in b */
482  }
483  memmove(p + e, b + e, block_size - e); /* copy back */
484  e -= dir_end;
485  SET_TOTAL_FREE(p, e);
486  SET_MAX_FREE(p, e);
487 }
488 
492 void
494 {
495  LOGCALL_VOID(DB, "ChertTable::split_root", split_n);
496  /* gain a level */
497  ++level;
498 
499  /* check level overflow - this isn't something that should ever happen
500  * but deserves more than an Assert()... */
501  if (level == BTREE_CURSOR_LEVELS) {
502  throw Xapian::DatabaseCorruptError("Btree has grown impossibly large (" STRINGIZE(BTREE_CURSOR_LEVELS) " levels)");
503  }
504 
505  uint8_t * q = zeroed_new(block_size);
506  C[level].p = q;
507  C[level].c = DIR_START;
508  C[level].n = base.next_free_block();
509  C[level].rewrite = true;
510  SET_REVISION(q, latest_revision_number + 1);
511  SET_LEVEL(q, level);
513  compact(q); /* to reset TOTAL_FREE, MAX_FREE */
514 
515  /* form a null key in b with a pointer to the old root */
516  uint8_t b[10]; /* 7 is exact */
517  Item_wr item(b);
518  item.form_null_key(split_n);
519  add_item(item, level);
520 }
521 
537 void
538 ChertTable::enter_key(int j, Key prevkey, Key newkey)
539 {
540  LOGCALL_VOID(DB, "ChertTable::enter_key", j | Literal("prevkey") | Literal("newkey"));
541  Assert(writable);
542  Assert(prevkey < newkey);
543  AssertRel(j,>=,1);
544 
545  uint4 blocknumber = C[j - 1].n;
546 
547  // FIXME update to use Key
548  // Keys are truncated here: but don't truncate the count at the end away.
549  const int newkey_len = newkey.length();
550  AssertRel(newkey_len,>,0);
551  int i;
552 
553  if (j == 1) {
554  // Truncate the key to the minimal key which differs from prevkey,
555  // the preceding key in the block.
556  i = 0;
557  const int min_len = min(newkey_len, prevkey.length());
558  while (i < min_len && prevkey[i] == newkey[i]) {
559  i++;
560  }
561 
562  // Want one byte of difference.
563  if (i < newkey_len) i++;
564  } else {
565  /* Can't truncate between branch levels, since the separated keys
566  * are in at the leaf level, and truncating again will change the
567  * branch point.
568  */
569  i = newkey_len;
570  }
571 
572  uint8_t b[UCHAR_MAX + 6];
573  Item_wr item(b);
574  Assert(i <= 256 - I2 - C2);
575  Assert(i <= (int)sizeof(b) - I2 - C2 - 4);
576  item.set_key_and_block(newkey, i, blocknumber);
577 
578  // When j > 1 we can make the first key of block p null. This is probably
579  // worthwhile as it trades a small amount of CPU and RAM use for a small
580  // saving in disk use. Other redundant keys will still creep in though.
581  if (j > 1) {
582  uint8_t * p = C[j - 1].p;
583  uint4 n = getint4(newkey.get_address(), newkey_len + K1 + C2);
584  int new_total_free = TOTAL_FREE(p) + newkey_len + C2;
585  // FIXME: incredibly icky going from key to item like this...
586  auto byte_addr = const_cast<uint8_t*>(newkey.get_address());
587  Item_wr(byte_addr - I2).form_null_key(n);
588  SET_TOTAL_FREE(p, new_total_free);
589  }
590 
591  // The split block gets inserted into the parent after the pointer to the
592  // current child.
593  AssertEq(C[j].c, find_in_block(C[j].p, item.key(), false, C[j].c));
594  C[j].c += D2;
595  C[j].rewrite = true; /* a subtle point: this *is* required. */
596  add_item(item, j);
597 }
598 
603 int
604 ChertTable::mid_point(uint8_t * p) const
605 {
606  LOGCALL(DB, int, "ChertTable::mid_point", (void*)p);
607  int n = 0;
608  int dir_end = DIR_END(p);
609  int size = block_size - TOTAL_FREE(p) - dir_end;
610  for (int c = DIR_START; c < dir_end; c += D2) {
611  int l = Item(p, c).size();
612  n += 2 * l;
613  if (n >= size) {
614  if (l < n - size) RETURN(c);
615  RETURN(c + D2);
616  }
617  }
618 
619  /* This shouldn't happen, as the sum of the item sizes should be the same
620  * as the value calculated in size, so assert but return a sane value just
621  * in case. */
622  Assert(false);
623  RETURN(dir_end);
624 }
625 
635 void
636 ChertTable::add_item_to_block(uint8_t * p, Item_wr kt_, int c)
637 {
638  LOGCALL_VOID(DB, "ChertTable::add_item_to_block", (void*)p | Literal("kt_") | c);
639  Assert(writable);
640  int dir_end = DIR_END(p);
641  int kt_len = kt_.size();
642  int needed = kt_len + D2;
643  int new_total = TOTAL_FREE(p) - needed;
644  int new_max = MAX_FREE(p) - needed;
645 
646  Assert(new_total >= 0);
647 
648  AssertRel(MAX_FREE(p),>=,needed);
649 
650  AssertRel(DIR_START,<=,c);
651  AssertRel(c,<=,dir_end);
652 
653  memmove(p + c + D2, p + c, dir_end - c);
654  dir_end += D2;
655  SET_DIR_END(p, dir_end);
656 
657  int o = dir_end + new_max;
658  setD(p, c, o);
659  memmove(p + o, kt_.get_address(), kt_len);
660 
661  SET_MAX_FREE(p, new_max);
662 
663  SET_TOTAL_FREE(p, new_total);
664 }
665 
671 void
673 {
674  LOGCALL_VOID(DB, "ChertTable::add_item", Literal("kt_") | j);
675  Assert(writable);
676  uint8_t * p = C[j].p;
677  int c = C[j].c;
678  uint4 n;
679 
680  int needed = kt_.size() + D2;
681  if (TOTAL_FREE(p) < needed) {
682  int m;
683  // Prepare to split p. After splitting, the block is in two halves, the
684  // lower half is split_p, the upper half p again. add_to_upper_half
685  // becomes true when the item gets added to p, false when it gets added
686  // to split_p.
687 
688  if (seq_count < 0) {
689  // If we're not in sequential mode, we split at the mid point
690  // of the node.
691  m = mid_point(p);
692  } else {
693  // During sequential addition, split at the insert point
694  AssertRel(c,>=,DIR_START);
695  m = c;
696  }
697 
698  uint4 split_n = C[j].n;
699  C[j].n = base.next_free_block();
700 
701  memcpy(split_p, p, block_size); // replicate the whole block in split_p
702  SET_DIR_END(split_p, m);
703  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
704 
705  {
706  int residue = DIR_END(p) - m;
707  int new_dir_end = DIR_START + residue;
708  memmove(p + DIR_START, p + m, residue);
709  SET_DIR_END(p, new_dir_end);
710  }
711 
712  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
713 
714  bool add_to_upper_half;
715  if (seq_count < 0) {
716  add_to_upper_half = (c >= m);
717  } else {
718  // And add item to lower half if split_p has room, otherwise upper
719  // half
720  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
721  }
722 
723  if (add_to_upper_half) {
724  c -= (m - DIR_START);
725  Assert(seq_count < 0 || c <= DIR_START + D2);
726  Assert(c >= DIR_START);
727  Assert(c <= DIR_END(p));
728  add_item_to_block(p, kt_, c);
729  n = C[j].n;
730  } else {
731  Assert(c >= DIR_START);
732  Assert(c <= DIR_END(split_p));
733  add_item_to_block(split_p, kt_, c);
734  n = split_n;
735  }
736  write_block(split_n, split_p);
737 
738  // Check if we're splitting the root block.
739  if (j == level) split_root(split_n);
740 
741  /* Enter a separating key at level j + 1 between */
742  /* the last key of block split_p, and the first key of block p */
743  enter_key(j + 1,
744  Item(split_p, DIR_END(split_p) - D2).key(),
745  Item(p, DIR_START).key());
746  } else {
747  AssertRel(TOTAL_FREE(p),>=,needed);
748 
749  if (MAX_FREE(p) < needed) {
750  compact(p);
751  AssertRel(MAX_FREE(p),>=,needed);
752  }
753 
754  add_item_to_block(p, kt_, c);
755  n = C[j].n;
756  }
757  if (j == 0) {
758  changed_n = n;
759  changed_c = c;
760  }
761 }
762 
770 void
771 ChertTable::delete_item(int j, bool repeatedly)
772 {
773  LOGCALL_VOID(DB, "ChertTable::delete_item", j | repeatedly);
774  Assert(writable);
775  uint8_t * p = C[j].p;
776  int c = C[j].c;
777  AssertRel(DIR_START,<=,c);
778  AssertRel(c,<,DIR_END(p));
779  int kt_len = Item(p, c).size(); /* size of the item to be deleted */
780  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
781 
782  memmove(p + c, p + c + D2, dir_end - c);
783  SET_DIR_END(p, dir_end);
784  SET_MAX_FREE(p, MAX_FREE(p) + D2);
785  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
786 
787  if (!repeatedly) return;
788  if (j < level) {
789  if (dir_end == DIR_START) {
790  base.free_block(C[j].n);
791  C[j].rewrite = false;
792  C[j].n = BLK_UNUSED;
793  C[j + 1].rewrite = true; /* *is* necessary */
794  delete_item(j + 1, true);
795  }
796  } else {
797  Assert(j == level);
798  while (dir_end == DIR_START + D2 && level > 0) {
799  /* single item in the root block, so lose a level */
800  uint4 new_root = Item(p, DIR_START).block_given_by();
801  delete [] p;
802  C[level].p = 0;
803  base.free_block(C[level].n);
804  C[level].rewrite = false;
805  C[level].n = BLK_UNUSED;
806  level--;
807 
808  block_to_cursor(C, level, new_root);
809 
810  p = C[level].p;
811  dir_end = DIR_END(p); /* prepare for the loop */
812  }
813  }
814 }
815 
816 /* debugging aid:
817 static addcount = 0;
818 */
819 
845 int
847 {
848  LOGCALL(DB, int, "ChertTable::add_kt", found);
849  Assert(writable);
850  int components = 0;
851 
852  /*
853  {
854  printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
855  print_bytes(kt[I2] - K1 - C2, kt + I2 + K1); putchar('\n');
856  }
857  */
858  alter();
859 
860  if (found) { /* replacement */
861  seq_count = SEQ_START_POINT;
862  sequential = false;
863 
864  uint8_t * p = C[0].p;
865  int c = C[0].c;
866  AssertRel(DIR_START,<=,c);
867  AssertRel(c,<,DIR_END(p));
868  Item item(p, c);
869  int kt_size = kt.size();
870  int needed = kt_size - item.size();
871 
872  components = item.components_of();
873 
874  if (needed <= 0) {
875  /* simple replacement */
876  memmove(const_cast<uint8_t *>(item.get_address()),
877  kt.get_address(), kt_size);
878  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
879  } else {
880  /* new item into the block's freespace */
881  int new_max = MAX_FREE(p) - kt_size;
882  if (new_max >= 0) {
883  int o = DIR_END(p) + new_max;
884  memmove(p + o, kt.get_address(), kt_size);
885  setD(p, c, o);
886  SET_MAX_FREE(p, new_max);
887  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
888  } else {
889  /* do it the long way */
890  delete_item(0, false);
891  add_item(kt, 0);
892  }
893  }
894  } else {
895  /* addition */
896  if (changed_n == C[0].n && changed_c == C[0].c) {
897  if (seq_count < 0) seq_count++;
898  } else {
899  seq_count = SEQ_START_POINT;
900  sequential = false;
901  }
902  C[0].c += D2;
903  add_item(kt, 0);
904  }
905  RETURN(components);
906 }
907 
908 /* delete_kt() corresponds to add_kt(found), but there are only
909  two cases: if the key is not found nothing is done, and if it is
910  found the corresponding item is deleted with delete_item.
911 */
912 
913 int
915 {
916  LOGCALL(DB, int, "ChertTable::delete_kt", NO_ARGS);
917  Assert(writable);
918 
919  bool found = find(C);
920 
921  int components = 0;
922  seq_count = SEQ_START_POINT;
923  sequential = false;
924 
925  /*
926  {
927  printf("%d) %s ", addcount++, (found ? "deleting " : "ignoring "));
928  print_bytes(B->kt[I2] - K1 - C2, B->kt + I2 + K1); putchar('\n');
929  }
930  */
931  if (found) {
932  components = Item(C[0].p, C[0].c).components_of();
933  alter();
934  delete_item(0, true);
935  }
936  RETURN(components);
937 }
938 
939 /* ChertTable::form_key(key) treats address kt as an item holder and fills in
940 the key part:
941 
942  (I) K key c (C tag)
943 
944 The bracketed parts are left blank. The key is filled in with key_len bytes and
945 K set accordingly. c is set to 1.
946 */
947 
948 void ChertTable::form_key(const string & key) const
949 {
950  LOGCALL_VOID(DB, "ChertTable::form_key", key);
951  kt.form_key(key);
952 }
953 
954 /* ChertTable::add(key, tag) adds the key/tag item to the
955  B-tree, replacing any existing item with the same key.
956 
957  For a long tag, we end up having to add m components, of the form
958 
959  key 1 m tag1
960  key 2 m tag2
961  ...
962  key m m tagm
963 
964  and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
965  n components of the form
966 
967  key 1 n TAG1
968  key 2 n TAG2
969  ...
970  key n n TAGn
971 
972  and n may be greater than, equal to, or less than m. These cases are dealt
973  with in the code below. If m < n for example, we end up with a series of
974  deletions.
975 */
976 
977 void
978 ChertTable::add(const string &key, string tag, bool already_compressed)
979 {
980  LOGCALL_VOID(DB, "ChertTable::add", key | tag | already_compressed);
981  Assert(writable);
982 
983  if (handle < 0) create_and_open(block_size);
984 
985  form_key(key);
986 
987  bool compressed = false;
988  if (already_compressed) {
989  compressed = true;
990  } else if (compress_strategy != DONT_COMPRESS && tag.size() > COMPRESS_MIN) {
991  static_assert(DONT_COMPRESS != Z_DEFAULT_STRATEGY,
992  "DONT_COMPRESS clashes with zlib constant");
993  static_assert(DONT_COMPRESS != Z_FILTERED,
994  "DONT_COMPRESS clashes with zlib constant");
995  static_assert(DONT_COMPRESS != Z_HUFFMAN_ONLY,
996  "DONT_COMPRESS clashes with zlib constant");
997 #ifdef Z_RLE
998  static_assert(DONT_COMPRESS != Z_RLE,
999  "DONT_COMPRESS clashes with zlib constant");
1000 #endif
1001 
1002  lazy_alloc_deflate_zstream();
1003 
1004  deflate_zstream->next_in =
1005  reinterpret_cast<Bytef *>(const_cast<char *>(tag.data()));
1006  deflate_zstream->avail_in = static_cast<uInt>(tag.size());
1007 
1008  // If compressed size is >= tag.size(), we don't want to compress.
1009  unsigned long blk_len = tag.size() - 1;
1010  unsigned char * blk = new unsigned char[blk_len];
1011  deflate_zstream->next_out = blk;
1012  deflate_zstream->avail_out = static_cast<uInt>(blk_len);
1013 
1014  int err = deflate(deflate_zstream, Z_FINISH);
1015  if (err == Z_STREAM_END) {
1016  // If deflate succeeded, then the output was at least one byte
1017  // smaller than the input.
1018  tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
1019  compressed = true;
1020  } else {
1021  // Deflate failed - presumably the data wasn't compressible.
1022  }
1023 
1024  delete [] blk;
1025  }
1026 
1027  // sort of matching kt.append_chunk(), but setting the chunk
1028  const size_t cd = kt.key().length() + K1 + I2 + C2 + C2; // offset to the tag data
1029  const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1030  size_t first_L = L; // - amount for tag1
1031  bool found = find(C);
1032  if (!found) {
1033  uint8_t * p = C[0].p;
1034  size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1035  if (n > D2 + cd) {
1036  n -= (D2 + cd);
1037  // if n >= last then fully filling this block won't produce
1038  // an extra item, so we might as well do this even if
1039  // full_compaction isn't active.
1040  //
1041  // In the full_compaction case, it turns out we shouldn't always
1042  // try to fill every last byte. Doing so can actually increase the
1043  // total space required (I believe this effect is due to longer
1044  // dividing keys being required in the index blocks). Empirically,
1045  // n >= key.size() + K appears a good criterion for K ~= 34. This
1046  // seems to save about 0.2% in total database size over always
1047  // splitting the tag. It'll also give be slightly faster retrieval
1048  // as we can avoid reading an extra block occasionally.
1049  size_t last = tag.length() % L;
1050  if (n >= last || (full_compaction && n >= key.size() + 34))
1051  first_L = n;
1052  }
1053  }
1054 
1055  // a null tag must be added in of course
1056  int m = tag.empty() ? 1 : (tag.length() - first_L + L - 1) / L + 1;
1057  // there are m items to add
1058  /* FIXME: sort out this error higher up and turn this into
1059  * an assert.
1060  */
1061  if (m >= BYTE_PAIR_RANGE)
1062  throw Xapian::UnimplementedError("Can't handle insanely large tags");
1063 
1064  int n = 0; // initialise to shut off warning
1065  // - and there will be n to delete
1066  int o = 0; // Offset into the tag
1067  size_t residue = tag.length(); // Bytes of the tag remaining to add in
1068  int replacement = false; // Has there been a replacement ?
1069  int i;
1070  kt.set_components_of(m);
1071  for (i = 1; i <= m; i++) {
1072  size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1073  Assert(cd + l <= block_size);
1074  Assert(string::size_type(o + l) <= tag.length());
1075  kt.set_tag(cd, tag.data() + o, l, compressed);
1076  kt.set_component_of(i);
1077 
1078  o += l;
1079  residue -= l;
1080 
1081  if (i > 1) found = find(C);
1082  n = add_kt(found);
1083  if (n > 0) replacement = true;
1084  }
1085  /* o == tag.length() here, and n may be zero */
1086  for (i = m + 1; i <= n; i++) {
1087  kt.set_component_of(i);
1088  delete_kt();
1089  }
1090  if (!replacement) ++item_count;
1091  Btree_modified = true;
1092  if (cursor_created_since_last_modification) {
1093  cursor_created_since_last_modification = false;
1094  ++cursor_version;
1095  }
1096 }
1097 
1098 /* ChertTable::del(key) returns false if the key is not in the B-tree,
1099  otherwise deletes it and returns true.
1100 
1101  Again, this is parallel to ChertTable::add, but simpler in form.
1102 */
1103 
1104 bool
1105 ChertTable::del(const string &key)
1106 {
1107  LOGCALL(DB, bool, "ChertTable::del", key);
1108  Assert(writable);
1109 
1110  if (handle < 0) {
1111  if (handle == -2) {
1113  }
1114  RETURN(false);
1115  }
1116 
1117  // We can't delete a key which we is too long for us to store.
1118  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1119 
1120  if (key.empty()) RETURN(false);
1121  form_key(key);
1122 
1123  int n = delete_kt(); /* there are n items to delete */
1124  if (n <= 0) RETURN(false);
1125 
1126  for (int i = 2; i <= n; i++) {
1127  kt.set_component_of(i);
1128  delete_kt();
1129  }
1130 
1131  item_count--;
1132  Btree_modified = true;
1133  if (cursor_created_since_last_modification) {
1134  cursor_created_since_last_modification = false;
1135  ++cursor_version;
1136  }
1137  RETURN(true);
1138 }
1139 
1140 bool
1141 ChertTable::readahead_key(const string &key) const
1142 {
1143  LOGCALL(DB, bool, "ChertTable::readahead_key", key);
1144  Assert(!key.empty());
1145 
1146  // Two cases:
1147  //
1148  // handle = -1: Lazy table which isn't yet open
1149  //
1150  // handle = -2: Table has been closed. Since the readahead is just a
1151  // hint, we can safely ignore it for a closed table.
1152  if (handle < 0)
1153  RETURN(false);
1154 
1155  // If the table only has one level, there are no branch blocks to preread.
1156  if (level == 0)
1157  RETURN(false);
1158 
1159  form_key(key);
1160  Key ktkey = kt.key();
1161 
1162  // We'll only readahead the first level, since descending the B-tree would
1163  // require actual reads that would likely hurt performance more than help.
1164  const uint8_t * p = C[level].p;
1165  int c = find_in_block(p, ktkey, false, C[level].c);
1166  uint4 n = Item(p, c).block_given_by();
1167  // Don't preread if it's the block we last preread or already in the
1168  // cursor.
1169  if (n != last_readahead && n != C[level - 1].n) {
1170  /* Use the base bit_map_size not the bitmap's size, because the latter
1171  * is uninitialised in readonly mode.
1172  */
1173  Assert(n / CHAR_BIT < base.get_bit_map_size());
1174 
1175  last_readahead = n;
1176  if (!io_readahead_block(handle, block_size, n))
1177  RETURN(false);
1178  }
1179  RETURN(true);
1180 }
1181 
1182 bool
1183 ChertTable::get_exact_entry(const string &key, string & tag) const
1184 {
1185  LOGCALL(DB, bool, "ChertTable::get_exact_entry", key | tag);
1186  Assert(!key.empty());
1187 
1188  if (handle < 0) {
1189  if (handle == -2) {
1191  }
1192  RETURN(false);
1193  }
1194 
1195  // An oversized key can't exist, so attempting to search for it should fail.
1196  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1197 
1198  form_key(key);
1199  if (!find(C)) RETURN(false);
1200 
1201  (void)read_tag(C, &tag, false);
1202  RETURN(true);
1203 }
1204 
1205 bool
1206 ChertTable::key_exists(const string &key) const
1207 {
1208  LOGCALL(DB, bool, "ChertTable::key_exists", key);
1209  Assert(!key.empty());
1210 
1211  // An oversized key can't exist, so attempting to search for it should fail.
1212  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1213 
1214  form_key(key);
1215  RETURN(find(C));
1216 }
1217 
1218 bool
1219 ChertTable::read_tag(Cursor * C_, string *tag, bool keep_compressed) const
1220 {
1221  LOGCALL(DB, bool, "ChertTable::read_tag", Literal("C_") | tag | keep_compressed);
1222  Item item(C_[0].p, C_[0].c);
1223 
1224  /* n components to join */
1225  int n = item.components_of();
1226 
1227  tag->resize(0);
1228  // max_item_size also includes K1 + I2 + C2 + C2 bytes overhead and the key
1229  // (which is at least 1 byte long).
1230  if (n > 1) tag->reserve((max_item_size - (1 + K1 + I2 + C2 + C2)) * n);
1231 
1232  item.append_chunk(tag);
1233  bool compressed = item.get_compressed();
1234 
1235  for (int i = 2; i <= n; i++) {
1236  if (!next(C_, 0)) {
1237  throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1238  }
1239  (void)Item(C_[0].p, C_[0].c).append_chunk(tag);
1240  }
1241  // At this point the cursor is on the last item - calling next will move
1242  // it to the next key (ChertCursor::get_tag() relies on this).
1243  if (!compressed || keep_compressed) RETURN(compressed);
1244 
1245  // FIXME: Perhaps we should decompress each chunk as we read it so we
1246  // don't need both the full compressed and uncompressed tags in memory
1247  // at once.
1248 
1249  string utag;
1250  // May not be enough for a compressed tag, but it's a reasonable guess.
1251  utag.reserve(tag->size() + tag->size() / 2);
1252 
1253  Bytef buf[8192];
1254 
1255  lazy_alloc_inflate_zstream();
1256 
1257  inflate_zstream->next_in =
1258  reinterpret_cast<Bytef*>(const_cast<char *>(tag->data()));
1259  inflate_zstream->avail_in = static_cast<uInt>(tag->size());
1260 
1261  int err = Z_OK;
1262  while (err != Z_STREAM_END) {
1263  inflate_zstream->next_out = buf;
1264  inflate_zstream->avail_out = static_cast<uInt>(sizeof(buf));
1265  err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1266  if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
1267  LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
1268  Bytef header2[4];
1269  aligned_write4(header2, inflate_zstream->adler);
1270  inflate_zstream->next_in = header2;
1271  inflate_zstream->avail_in = 4;
1272  err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1273  if (err == Z_STREAM_END) break;
1274  }
1275 
1276  if (err != Z_OK && err != Z_STREAM_END) {
1277  if (err == Z_MEM_ERROR) throw std::bad_alloc();
1278  string msg = "inflate failed";
1279  if (inflate_zstream->msg) {
1280  msg += " (";
1281  msg += inflate_zstream->msg;
1282  msg += ')';
1283  }
1284  throw Xapian::DatabaseError(msg);
1285  }
1286 
1287  utag.append(reinterpret_cast<const char *>(buf),
1288  inflate_zstream->next_out - buf);
1289  }
1290  if (utag.size() != inflate_zstream->total_out) {
1291  string msg = "compressed tag didn't expand to the expected size: ";
1292  msg += str(utag.size());
1293  msg += " != ";
1294  // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
1295  msg += str(size_t(inflate_zstream->total_out));
1296  throw Xapian::DatabaseCorruptError(msg);
1297  }
1298 
1299  swap(*tag, utag);
1300 
1301  RETURN(false);
1302 }
1303 
1304 void
1306 {
1307  LOGCALL_VOID(DB, "ChertTable::set_full_compaction", parity);
1308  Assert(writable);
1309 
1310  if (parity) seq_count = 0;
1311  full_compaction = parity;
1312 }
1313 
1315  LOGCALL(DB, ChertCursor *, "ChertTable::cursor_get", NO_ARGS);
1316  if (handle < 0) {
1317  if (handle == -2) {
1319  }
1320  RETURN(NULL);
1321  }
1322  // FIXME Ick - casting away const is nasty
1323  RETURN(new ChertCursor(const_cast<ChertTable *>(this)));
1324 }
1325 
1326 /************ B-tree opening and closing ************/
1327 
1328 bool
1329 ChertTable::basic_open(bool revision_supplied, chert_revision_number_t revision_)
1330 {
1331  LOGCALL(DB, bool, "ChertTable::basic_open", revision_supplied | revision_);
1332  int ch = 'X'; /* will be 'A' or 'B' */
1333 
1334  {
1335  const size_t BTREE_BASES = 2;
1336  string err_msg;
1337  static const char basenames[BTREE_BASES] = { 'A', 'B' };
1338 
1339  ChertTable_base bases[BTREE_BASES];
1340  bool base_ok[BTREE_BASES];
1341 
1342  both_bases = true;
1343  bool valid_base = false;
1344  {
1345  for (size_t i = 0; i < BTREE_BASES; ++i) {
1346  bool ok = bases[i].read(name, basenames[i], writable, err_msg);
1347  base_ok[i] = ok;
1348  if (ok) {
1349  valid_base = true;
1350  } else {
1351  both_bases = false;
1352  }
1353  }
1354  }
1355 
1356  if (!valid_base) {
1357  if (handle >= 0) {
1358  ::close(handle);
1359  handle = -1;
1360  }
1361  string message = "Error opening table '";
1362  message += name;
1363  message += "':\n";
1364  message += err_msg;
1365  throw Xapian::DatabaseOpeningError(message);
1366  }
1367 
1368  if (revision_supplied) {
1369  bool found_revision = false;
1370  for (size_t i = 0; i < BTREE_BASES; ++i) {
1371  if (base_ok[i] && bases[i].get_revision() == revision_) {
1372  ch = basenames[i];
1373  found_revision = true;
1374  break;
1375  }
1376  }
1377  if (!found_revision) {
1378  /* Couldn't open the revision that was asked for.
1379  * This shouldn't throw an exception, but should just return
1380  * false to upper levels.
1381  */
1382  RETURN(false);
1383  }
1384  } else {
1385  chert_revision_number_t highest_revision = 0;
1386  for (size_t i = 0; i < BTREE_BASES; ++i) {
1387  if (base_ok[i] && bases[i].get_revision() >= highest_revision) {
1388  ch = basenames[i];
1389  highest_revision = bases[i].get_revision();
1390  }
1391  }
1392  }
1393 
1394  ChertTable_base *basep = 0;
1395  ChertTable_base *other_base = 0;
1396 
1397  for (size_t i = 0; i < BTREE_BASES; ++i) {
1398  LOGLINE(DB, "Checking (ch == " << ch << ") against "
1399  "basenames[" << i << "] == " << basenames[i]);
1400  LOGLINE(DB, "bases[" << i << "].get_revision() == " <<
1401  bases[i].get_revision());
1402  LOGLINE(DB, "base_ok[" << i << "] == " << base_ok[i]);
1403  if (ch == basenames[i]) {
1404  basep = &bases[i];
1405 
1406  // FIXME: assuming only two bases for other_base
1407  size_t otherbase_num = 1 - i;
1408  if (base_ok[otherbase_num]) {
1409  other_base = &bases[otherbase_num];
1410  }
1411  break;
1412  }
1413  }
1414  Assert(basep);
1415 
1416  /* basep now points to the most recent base block */
1417 
1418  /* Avoid copying the bitmap etc. - swap contents with the base
1419  * object in the vector, since it'll be destroyed anyway soon.
1420  */
1421  base.swap(*basep);
1422 
1423  revision_number = base.get_revision();
1424  block_size = base.get_block_size();
1425  root = base.get_root();
1426  level = base.get_level();
1427  //bit_map_size = basep->get_bit_map_size();
1428  item_count = base.get_item_count();
1429  faked_root_block = base.get_have_fakeroot();
1430  sequential = base.get_sequential();
1431 
1432  if (other_base != 0) {
1433  latest_revision_number = other_base->get_revision();
1434  if (revision_number > latest_revision_number)
1435  latest_revision_number = revision_number;
1436  } else {
1437  latest_revision_number = revision_number;
1438  }
1439  }
1440 
1441  /* kt holds constructed items as well as keys */
1442  kt = Item_wr(zeroed_new(block_size));
1443 
1444  set_max_item_size(BLOCK_CAPACITY);
1445 
1446  base_letter = ch;
1447 
1448  if (cursor_created_since_last_modification) {
1449  cursor_created_since_last_modification = false;
1450  ++cursor_version;
1451  }
1452 
1453  /* ready to open the main file */
1454 
1455  RETURN(true);
1456 }
1457 
1458 void
1460 {
1461  LOGCALL_VOID(DB, "ChertTable::read_root", NO_ARGS);
1462  if (faked_root_block) {
1463  /* root block for an unmodified database. */
1464  uint8_t * p = C[0].p;
1465  Assert(p);
1466 
1467  /* clear block - shouldn't be necessary, but is a bit nicer,
1468  * and means that the same operations should always produce
1469  * the same database. */
1470  memset(p, 0, block_size);
1471 
1472  int o = block_size - I2 - K1 - C2 - C2;
1473  Item_wr(p + o).fake_root_item();
1474 
1475  setD(p, DIR_START, o); // its directory entry
1476  SET_DIR_END(p, DIR_START + D2);// the directory size
1477 
1478  o -= (DIR_START + D2);
1479  SET_MAX_FREE(p, o);
1480  SET_TOTAL_FREE(p, o);
1481  SET_LEVEL(p, 0);
1482 
1483  if (!writable) {
1484  /* reading - revision number doesn't matter as long as
1485  * it's not greater than the current one. */
1486  SET_REVISION(p, 0);
1487  C[0].n = 0;
1488  } else {
1489  /* writing - */
1490  SET_REVISION(p, latest_revision_number + 1);
1491  C[0].n = base.next_free_block();
1492  }
1493  } else {
1494  /* using a root block stored on disk */
1495  block_to_cursor(C, level, root);
1496 
1497  if (REVISION(C[level].p) > revision_number) set_overwritten();
1498  /* although this is unlikely */
1499  }
1500 }
1501 
1502 bool
1503 ChertTable::do_open_to_write(bool revision_supplied,
1504  chert_revision_number_t revision_,
1505  bool create_db)
1506 {
1507  LOGCALL(DB, bool, "ChertTable::do_open_to_write", revision_supplied | revision_ | create_db);
1508  if (handle == -2) {
1510  }
1511  handle = io_open_block_wr(name + "DB", create_db);
1512  if (handle < 0) {
1513  // lazy doesn't make a lot of sense with create_db anyway, but ENOENT
1514  // with O_CREAT means a parent directory doesn't exist.
1515  if (lazy && !create_db && errno == ENOENT) {
1516  revision_number = revision_;
1517  RETURN(true);
1518  }
1519  string message(create_db ? "Couldn't create " : "Couldn't open ");
1520  message += name;
1521  message += "DB read/write: ";
1522  errno_to_string(errno, message);
1523  throw Xapian::DatabaseOpeningError(message);
1524  }
1525 
1526  if (!basic_open(revision_supplied, revision_)) {
1527  ::close(handle);
1528  handle = -1;
1529  if (!revision_supplied) {
1530  throw Xapian::DatabaseOpeningError("Failed to open for writing");
1531  }
1532  /* When the revision is supplied, it's not an exceptional
1533  * case when open failed, so we just return false here.
1534  */
1535  RETURN(false);
1536  }
1537 
1538  writable = true;
1539 
1540  for (int j = 0; j <= level; j++) {
1541  C[j].n = BLK_UNUSED;
1542  C[j].p = new uint8_t[block_size];
1543  }
1544  split_p = new uint8_t[block_size];
1545  read_root();
1546 
1547  buffer = zeroed_new(block_size);
1548 
1549  changed_n = 0;
1550  changed_c = DIR_START;
1551  seq_count = SEQ_START_POINT;
1552 
1553  RETURN(true);
1554 }
1555 
1556 ChertTable::ChertTable(const char * tablename_, const string & path_,
1557  bool readonly_, int compress_strategy_, bool lazy_)
1558  : tablename(tablename_),
1559  revision_number(0),
1560  item_count(0),
1561  block_size(0),
1562  latest_revision_number(0),
1563  both_bases(false),
1564  base_letter('A'),
1565  faked_root_block(true),
1566  sequential(true),
1567  handle(-1),
1568  level(0),
1569  root(0),
1570  kt(0),
1571  buffer(0),
1572  base(),
1573  name(path_),
1574  seq_count(0),
1575  changed_n(0),
1576  changed_c(0),
1577  max_item_size(0),
1578  Btree_modified(false),
1579  full_compaction(false),
1580  writable(!readonly_),
1581  cursor_created_since_last_modification(false),
1582  cursor_version(0),
1583  split_p(0),
1584  compress_strategy(compress_strategy_),
1585  deflate_zstream(NULL),
1586  inflate_zstream(NULL),
1587  lazy(lazy_),
1588  last_readahead(BLK_UNUSED)
1589 {
1590  LOGCALL_CTOR(DB, "ChertTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
1591 }
1592 
1593 bool
1595 {
1596  if (handle < 0) {
1597  if (handle == -2) {
1599  }
1600  return true;
1601  }
1602  ChertCursor cur(const_cast<ChertTable*>(this));
1603  cur.find_entry(string());
1604  return !cur.next();
1605 }
1606 
1607 void
1609  if (usual(deflate_zstream)) {
1610  if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
1611  // Try to recover by deleting the stream and starting from scratch.
1612  delete deflate_zstream;
1613  }
1614 
1615  deflate_zstream = new z_stream;
1616 
1617  deflate_zstream->zalloc = Z_NULL;
1618  deflate_zstream->zfree = Z_NULL;
1619  deflate_zstream->opaque = Z_NULL;
1620 
1621  // -15 means raw deflate with 32K LZ77 window (largest)
1622  // memLevel 9 is the highest (8 is default)
1623  int err;
1624  err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
1625  -15, 9, compress_strategy);
1626  if (rare(err != Z_OK)) {
1627  if (err == Z_MEM_ERROR) {
1628  delete deflate_zstream;
1629  deflate_zstream = 0;
1630  throw std::bad_alloc();
1631  }
1632  string msg = "deflateInit2 failed (";
1633  if (deflate_zstream->msg) {
1634  msg += deflate_zstream->msg;
1635  } else {
1636  msg += str(err);
1637  }
1638  msg += ')';
1639  delete deflate_zstream;
1640  deflate_zstream = 0;
1641  throw Xapian::DatabaseError(msg);
1642  }
1643 }
1644 
1645 void
1647  if (usual(inflate_zstream)) {
1648  if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
1649  // Try to recover by deleting the stream and starting from scratch.
1650  delete inflate_zstream;
1651  }
1652 
1653  inflate_zstream = new z_stream;
1654 
1655  inflate_zstream->zalloc = Z_NULL;
1656  inflate_zstream->zfree = Z_NULL;
1657  inflate_zstream->opaque = Z_NULL;
1658 
1659  inflate_zstream->next_in = Z_NULL;
1660  inflate_zstream->avail_in = 0;
1661 
1662  int err = inflateInit2(inflate_zstream, -15);
1663  if (rare(err != Z_OK)) {
1664  if (err == Z_MEM_ERROR) {
1665  delete inflate_zstream;
1666  inflate_zstream = 0;
1667  throw std::bad_alloc();
1668  }
1669  string msg = "inflateInit2 failed (";
1670  if (inflate_zstream->msg) {
1671  msg += inflate_zstream->msg;
1672  } else {
1673  msg += str(err);
1674  }
1675  msg += ')';
1676  delete inflate_zstream;
1677  inflate_zstream = 0;
1678  throw Xapian::DatabaseError(msg);
1679  }
1680 }
1681 
1682 bool
1684  LOGCALL(DB, bool, "ChertTable::exists", NO_ARGS);
1685  RETURN(file_exists(name + "DB") &&
1686  (file_exists(name + "baseA") || file_exists(name + "baseB")));
1687 }
1688 
1689 void
1691 {
1692  LOGCALL_VOID(DB, "ChertTable::erase", NO_ARGS);
1693  close();
1694 
1695  (void)io_unlink(name + "baseA");
1696  (void)io_unlink(name + "baseB");
1697  (void)io_unlink(name + "DB");
1698 }
1699 
1700 void
1701 ChertTable::set_block_size(unsigned int block_size_)
1702 {
1703  LOGCALL_VOID(DB, "ChertTable::set_block_size", block_size_);
1704  // Block size must in the range 2048..BYTE_PAIR_RANGE, and a power of two.
1705  if (block_size_ < 2048 || block_size_ > BYTE_PAIR_RANGE ||
1706  (block_size_ & (block_size_ - 1)) != 0) {
1707  block_size_ = CHERT_DEFAULT_BLOCK_SIZE;
1708  }
1709  block_size = block_size_;
1710 }
1711 
1712 void
1713 ChertTable::create_and_open(unsigned int block_size_)
1714 {
1715  LOGCALL_VOID(DB, "ChertTable::create_and_open", block_size_);
1716  if (handle == -2) {
1718  }
1719  Assert(writable);
1720  close();
1721 
1722  set_block_size(block_size_);
1723 
1724  // FIXME: it would be good to arrange that this works such that there's
1725  // always a valid table in place if you run create_and_open() on an
1726  // existing table.
1727 
1728  /* write initial values to files */
1729 
1730  /* create the base file */
1731  ChertTable_base base_;
1733  base_.set_block_size(block_size);
1734  base_.set_have_fakeroot(true);
1735  base_.set_sequential(true);
1736  // Doing a full sync here would be overly paranoid, as an empty table
1737  // contains no precious data and xapian-check can recreate lost base
1738  // files.
1739  base_.write_to_file(name + "baseA", 'A', string(), -1, NULL);
1740 
1741  /* remove the alternative base file, if any */
1742  (void)io_unlink(name + "baseB");
1743 
1744  // Any errors are thrown if revision_supplied is false.
1745  (void)do_open_to_write(false, 0, true);
1746 }
1747 
1749  LOGCALL_DTOR(DB, "ChertTable");
1751 
1752  if (deflate_zstream) {
1753  // Errors which we care about have already been handled, so just ignore
1754  // any which get returned here.
1755  (void) deflateEnd(deflate_zstream);
1756  delete deflate_zstream;
1757  }
1758 
1759  if (inflate_zstream) {
1760  // Errors which we care about have already been handled, so just ignore
1761  // any which get returned here.
1762  (void) inflateEnd(inflate_zstream);
1763  delete inflate_zstream;
1764  }
1765 }
1766 
1767 void ChertTable::close(bool permanent) {
1768  LOGCALL_VOID(DB, "ChertTable::close", permanent);
1769 
1770  if (handle >= 0) {
1771  // If an error occurs here, we just ignore it, since we're just
1772  // trying to free everything.
1773  (void)::close(handle);
1774  handle = -1;
1775  }
1776 
1777  if (permanent) {
1778  handle = -2;
1779  // Don't delete the resources in the table, since they may
1780  // still be used to look up cached content.
1781  return;
1782  }
1783  for (int j = level; j >= 0; j--) {
1784  delete [] C[j].p;
1785  C[j].p = 0;
1786  }
1787  delete [] split_p;
1788  split_p = 0;
1789 
1790  delete [] kt.get_address();
1791  kt = Item_wr(0);
1792  delete [] buffer;
1793  buffer = 0;
1794 }
1795 
1796 void
1798 {
1799  LOGCALL_VOID(DB, "ChertTable::flush_db", NO_ARGS);
1800  Assert(writable);
1801  if (handle < 0) {
1802  if (handle == -2) {
1804  }
1805  return;
1806  }
1807 
1808  for (int j = level; j >= 0; j--) {
1809  if (C[j].rewrite) {
1810  write_block(C[j].n, C[j].p);
1811  }
1812  }
1813 
1814  if (Btree_modified) {
1815  faked_root_block = false;
1816  }
1817 }
1818 
1819 void
1821  const string * changes_tail)
1822 {
1823  LOGCALL_VOID(DB, "ChertTable::commit", revision | changes_fd | changes_tail);
1824  Assert(writable);
1825 
1826  if (revision <= revision_number) {
1827  throw Xapian::DatabaseError("New revision too low");
1828  }
1829 
1830  if (handle < 0) {
1831  if (handle == -2) {
1833  }
1835  return;
1836  }
1837 
1838  try {
1839  if (faked_root_block) {
1840  /* We will use a dummy bitmap. */
1841  base.clear_bit_map();
1842  }
1843 
1844  base.set_revision(revision);
1845  base.set_root(C[level].n);
1846  base.set_level(level);
1850 
1852 
1853  both_bases = true;
1855  root = C[level].n;
1856 
1857  Btree_modified = false;
1858 
1859  for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
1860  C[i].n = BLK_UNUSED;
1861  C[i].c = -1;
1862  C[i].rewrite = false;
1863  }
1864 
1865  // Save to "<table>.tmp" and then rename to "<table>.base<letter>" so
1866  // that a reader can't try to read a partially written base file.
1867  string tmp = name;
1868  tmp += "tmp";
1869  string basefile = name;
1870  basefile += "base";
1871  basefile += char(base_letter);
1872  base.write_to_file(tmp, base_letter, tablename, changes_fd, changes_tail);
1873 
1874  // Do this as late as possible to allow maximum time for writes to
1875  // happen, and so the calls to io_sync() are adjacent which may be
1876  // more efficient, at least with some Linux kernel versions.
1877  if (changes_tail ? !io_full_sync(handle) : !io_sync(handle)) {
1878  (void)::close(handle);
1879  handle = -1;
1880  (void)unlink(tmp.c_str());
1881  throw Xapian::DatabaseError("Can't commit new revision - failed to flush DB to disk");
1882  }
1883 
1884  if (!io_tmp_rename(tmp, basefile)) {
1885  string msg("Couldn't update base file ");
1886  msg += basefile;
1887  throw Xapian::DatabaseError(msg, errno);
1888  }
1889  base.commit();
1890 
1891  read_root();
1892 
1893  changed_n = 0;
1894  changed_c = DIR_START;
1896  } catch (...) {
1898  throw;
1899  }
1900 }
1901 
1902 void
1904 {
1905  LOGCALL_VOID(DB, "ChertTable::write_changed_blocks", changes_fd);
1906  Assert(changes_fd >= 0);
1907  if (handle < 0) return;
1908  if (faked_root_block) return;
1909 
1910  string buf;
1911  pack_uint(buf, 2u); // Indicate the item is a list of blocks
1912  pack_string(buf, tablename);
1913  pack_uint(buf, block_size);
1914  io_write(changes_fd, buf.data(), buf.size());
1915 
1916  // Compare the old and new bitmaps to find blocks which have changed, and
1917  // write them to the file descriptor.
1918  uint4 n = 0;
1919  uint8_t * p = new uint8_t[block_size];
1920  try {
1922  while (base.find_changed_block(&n)) {
1923  buf.resize(0);
1924  pack_uint(buf, n + 1);
1925  io_write(changes_fd, buf.data(), buf.size());
1926 
1927  // Read block n.
1928  read_block(n, p);
1929 
1930  // Write block n to the file.
1931  io_write(changes_fd, reinterpret_cast<const char *>(p),
1932  block_size);
1933  ++n;
1934  }
1935  delete[] p;
1936  p = 0;
1937  } catch (...) {
1938  delete[] p;
1939  throw;
1940  }
1941  buf.resize(0);
1942  pack_uint(buf, 0u);
1943  io_write(changes_fd, buf.data(), buf.size());
1944 }
1945 
1946 void
1948 {
1949  LOGCALL_VOID(DB, "ChertTable::cancel", NO_ARGS);
1950  Assert(writable);
1951 
1952  if (handle < 0) {
1953  if (handle == -2) {
1955  }
1956  latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1957  return;
1958  }
1959 
1960  // This causes problems: if (!Btree_modified) return;
1961 
1962  string err_msg;
1963  if (!base.read(name, base_letter, writable, err_msg)) {
1964  throw Xapian::DatabaseCorruptError(string("Couldn't reread base ") + base_letter);
1965  }
1966 
1969  root = base.get_root();
1970  level = base.get_level();
1971  //bit_map_size = basep->get_bit_map_size();
1975 
1976  latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1977 
1978  Btree_modified = false;
1979 
1980  for (int j = 0; j <= level; j++) {
1981  C[j].n = BLK_UNUSED;
1982  C[j].rewrite = false;
1983  }
1984  read_root();
1985 
1986  changed_n = 0;
1987  changed_c = DIR_START;
1989 
1992  ++cursor_version;
1993  }
1994 }
1995 
1996 /************ B-tree reading ************/
1997 
1998 bool
1999 ChertTable::do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
2000 {
2001  LOGCALL(DB, bool, "ChertTable::do_open_to_read", revision_supplied | revision_);
2002  if (handle == -2) {
2004  }
2005  handle = io_open_block_rd(name + "DB");
2006  if (handle < 0) {
2007  if (lazy) {
2008  // This table is optional when reading!
2009  revision_number = revision_;
2010  RETURN(true);
2011  }
2012  string message("Couldn't open ");
2013  message += name;
2014  message += "DB to read: ";
2015  errno_to_string(errno, message);
2016  throw Xapian::DatabaseOpeningError(message);
2017  }
2018 
2019  if (!basic_open(revision_supplied, revision_)) {
2020  ::close(handle);
2021  handle = -1;
2022  if (revision_supplied) {
2023  // The requested revision was not available.
2024  // This could be because the database was modified underneath us, or
2025  // because a base file is missing. Return false, and work out what
2026  // the problem was at a higher level.
2027  RETURN(false);
2028  }
2029  throw Xapian::DatabaseOpeningError("Failed to open table for reading");
2030  }
2031 
2032  for (int j = 0; j <= level; j++) {
2033  C[j].n = BLK_UNUSED;
2034  C[j].p = new uint8_t[block_size];
2035  }
2036 
2037  read_root();
2038  RETURN(true);
2039 }
2040 
2041 void
2043 {
2044  LOGCALL_VOID(DB, "ChertTable::open", NO_ARGS);
2045  LOGLINE(DB, "opening at path " << name);
2046  close();
2047 
2048  if (!writable) {
2049  // Any errors are thrown if revision_supplied is false
2050  (void)do_open_to_read(false, 0);
2051  return;
2052  }
2053 
2054  // Any errors are thrown if revision_supplied is false.
2055  (void)do_open_to_write(false, 0);
2056 }
2057 
2058 bool
2060 {
2061  LOGCALL(DB, bool, "ChertTable::open", revision);
2062  LOGLINE(DB, "opening for particular revision at path " << name);
2063  close();
2064 
2065  if (!writable) {
2066  if (do_open_to_read(true, revision)) {
2067  AssertEq(revision_number, revision);
2068  RETURN(true);
2069  } else {
2070  close();
2071  RETURN(false);
2072  }
2073  }
2074 
2075  if (!do_open_to_write(true, revision)) {
2076  // Can't open at the requested revision.
2077  close();
2078  RETURN(false);
2079  }
2080 
2081  AssertEq(revision_number, revision);
2082  RETURN(true);
2083 }
2084 
2085 bool
2086 ChertTable::prev_for_sequential(Cursor * C_, int /*dummy*/) const
2087 {
2088  LOGCALL(DB, bool, "ChertTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2089  int c = C_[0].c;
2090  AssertRel(DIR_START,<=,c);
2091  AssertRel(c,<,DIR_END(C_[0].p));
2092  if (c == DIR_START) {
2093  uint8_t * p = C_[0].p;
2094  Assert(p);
2095  uint4 n = C_[0].n;
2096  while (true) {
2097  if (n == 0) RETURN(false);
2098  n--;
2099  if (writable) {
2100  if (n == C[0].n) {
2101  // Block is a leaf block in the built-in cursor
2102  // (potentially in modified form).
2103  memcpy(p, C[0].p, block_size);
2104  } else {
2105  // Blocks in the built-in cursor may not have been written
2106  // to disk yet, so we have to check that the block number
2107  // isn't in the built-in cursor or we'll read an
2108  // uninitialised block (for which GET_LEVEL(p) will
2109  // probably return 0).
2110  int j;
2111  for (j = 1; j <= level; ++j) {
2112  if (n == C[j].n) break;
2113  }
2114  if (j <= level) continue;
2115 
2116  // Block isn't in the built-in cursor, so the form on disk
2117  // is valid, so read it to check if it's the next level 0
2118  // block.
2119  read_block(n, p);
2120  }
2121  } else {
2122  read_block(n, p);
2123  }
2125  if (REVISION(p) > revision_number + writable) {
2126  set_overwritten();
2127  RETURN(false);
2128  }
2129  if (GET_LEVEL(p) == 0) break;
2130  }
2131  c = DIR_END(p);
2132  C_[0].n = n;
2133  AssertRel(DIR_START,<,c);
2134  }
2135  c -= D2;
2136  C_[0].c = c;
2137  RETURN(true);
2138 }
2139 
2140 bool
2141 ChertTable::next_for_sequential(Cursor * C_, int /*dummy*/) const
2142 {
2143  LOGCALL(DB, bool, "ChertTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2144  uint8_t * p = C_[0].p;
2145  Assert(p);
2146  int c = C_[0].c;
2147  AssertRel(c,<,DIR_END(p));
2148  c += D2;
2149  Assert((unsigned)c < block_size);
2150  if (c == DIR_END(p)) {
2151  uint4 n = C_[0].n;
2152  while (true) {
2153  n++;
2154  if (n > base.get_last_block()) RETURN(false);
2155  if (writable) {
2156  if (n == C[0].n) {
2157  // Block is a leaf block in the built-in cursor
2158  // (potentially in modified form).
2159  memcpy(p, C[0].p, block_size);
2160  } else {
2161  // Blocks in the built-in cursor may not have been written
2162  // to disk yet, so we have to check that the block number
2163  // isn't in the built-in cursor or we'll read an
2164  // uninitialised block (for which GET_LEVEL(p) will
2165  // probably return 0).
2166  int j;
2167  for (j = 1; j <= level; ++j) {
2168  if (n == C[j].n) break;
2169  }
2170  if (j <= level) continue;
2171 
2172  // Block isn't in the built-in cursor, so the form on disk
2173  // is valid, so read it to check if it's the next level 0
2174  // block.
2175  read_block(n, p);
2176  }
2177  } else {
2178  read_block(n, p);
2179  }
2181  if (REVISION(p) > revision_number + writable) {
2182  set_overwritten();
2183  RETURN(false);
2184  }
2185  if (GET_LEVEL(p) == 0) break;
2186  }
2187  c = DIR_START;
2188  C_[0].n = n;
2189  }
2190  C_[0].c = c;
2191  RETURN(true);
2192 }
2193 
2194 bool
2196 {
2197  LOGCALL(DB, bool, "ChertTable::prev_default", Literal("C_") | j);
2198  uint8_t * p = C_[j].p;
2199  int c = C_[j].c;
2200  AssertRel(DIR_START,<=,c);
2201  AssertRel(c,<,DIR_END(p));
2202  AssertRel((unsigned)DIR_END(p),<=,block_size);
2203  if (c == DIR_START) {
2204  if (j == level) RETURN(false);
2205  if (!prev_default(C_, j + 1)) RETURN(false);
2206  c = DIR_END(p);
2207  AssertRel(DIR_START,<,c);
2208  }
2209  c -= D2;
2210  C_[j].c = c;
2211  if (j > 0) {
2212  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2213  }
2214  RETURN(true);
2215 }
2216 
2217 bool
2219 {
2220  LOGCALL(DB, bool, "ChertTable::next_default", Literal("C_") | j);
2221  uint8_t * p = C_[j].p;
2222  int c = C_[j].c;
2223  AssertRel(c,<,DIR_END(p));
2224  AssertRel((unsigned)DIR_END(p),<=,block_size);
2225  c += D2;
2226  if (j > 0) {
2227  AssertRel(DIR_START,<,c);
2228  } else {
2229  AssertRel(DIR_START,<=,c);
2230  }
2231  // Sometimes c can be DIR_END(p) + 2 here it appears...
2232  if (c >= DIR_END(p)) {
2233  if (j == level) RETURN(false);
2234  if (!next_default(C_, j + 1)) RETURN(false);
2235  c = DIR_START;
2236  }
2237  C_[j].c = c;
2238  if (j > 0) {
2239  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2240 #ifdef BTREE_DEBUG_FULL
2241  printf("Block in ChertTable:next_default");
2242  report_block_full(j - 1, C_[j - 1].n, C_[j - 1].p);
2243 #endif /* BTREE_DEBUG_FULL */
2244  }
2245  RETURN(true);
2246 }
2247 
2248 void
2250 {
2251  throw Xapian::DatabaseClosedError("Database has been closed");
2252 }
2253 
2269 bool Key::operator<(Key key2) const
2270 {
2271  LOGCALL(DB, bool, "Key::operator<", static_cast<const void*>(key2.p));
2272  int key1_len = length();
2273  int key2_len = key2.length();
2274  if (key1_len == key2_len) {
2275  // The keys are the same length, so we can compare the counts
2276  // in the same operation since they're stored as 2 byte
2277  // bigendian numbers.
2278  RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) < 0);
2279  }
2280 
2281  int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
2282 
2283  // Compare the common part of the keys
2284  int diff = memcmp(p + K1, key2.p + K1, k_smaller);
2285  if (diff != 0) RETURN(diff < 0);
2286 
2287  // We dealt with the "same length" case above so we never need to check
2288  // the count here.
2289  RETURN(key1_len < key2_len);
2290 }
2291 
2292 bool Key::operator==(Key key2) const
2293 {
2294  LOGCALL(DB, bool, "Key::operator==", static_cast<const void*>(key2.p));
2295  int key1_len = length();
2296  if (key1_len != key2.length()) RETURN(false);
2297  // The keys are the same length, so we can compare the counts
2298  // in the same operation since they're stored as 2 byte
2299  // bigendian numbers.
2300  RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) == 0);
2301 }
bool get_sequential() const
int close(FD &fd)
Definition: fd.h:63
void set_overwritten() const
Definition: chert_table.cc:251
static uint8_t * zeroed_new(size_t size)
Definition: chert_table.cc:96
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:480
#define RETURN(A)
Definition: debuglog.h:482
bool get_have_fakeroot() const
#define Assert(COND)
Definition: omassert.h:122
char other_base_letter() const
Definition: chert_table.h:711
int io_open_block_rd(const char *fname)
Open a block-based file for reading.
Definition: io_utils.h:38
void set_level(uint4 level_)
int DIR_END(const uint8_t *b)
Definition: chert_table.h:154
bool both_bases
set to true if baseA and baseB both exist as valid bases.
Definition: chert_table.h:742
int c
offset in the block&#39;s directory
Definition: chert_cursor.h:47
void io_write(int fd, const char *p, size_t n)
Write n bytes from block pointed to by p to file descriptor fd.
Definition: io_utils.cc:145
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: chert_cursor.h:66
Indicates an attempt to access a closed database.
Definition: error.h:1097
void add_item(Item_wr kt, int j)
ChertTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
Definition: chert_table.cc:672
#define AssertEq(A, B)
Definition: omassert.h:124
void read_root()
int add_kt(bool found)
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C...
Definition: chert_table.cc:846
Key key() const
Definition: chert_table.h:214
z_stream * inflate_zstream
Zlib state object for inflating.
Definition: chert_table.h:858
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
T get_address() const
Definition: chert_table.h:200
int length() const
Definition: chert_table.h:181
#define true
Definition: header.h:8
bool next()
Advance to the next key.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
int getint4(const unsigned char *p, int c)
Definition: chert_table.h:79
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:180
const size_t BLOCK_CAPACITY
Even for items of at maximum size, it must be possible to get this number of items in a block...
Definition: chert_table.h:105
#define usual(COND)
Definition: config.h:563
const int C2
Definition: chert_table.h:123
bool io_sync(int fd)
Ensure all data previously written to file descriptor fd has been written to disk.
Definition: io_utils.h:73
void compact(uint8_t *p)
compact(p) compact the block at p by shuffling all the items up to the end.
Definition: chert_table.cc:469
void set_full_compaction(bool parity)
const char * tablename
The name of the table (used when writing changesets).
Definition: chert_table.h:716
const int I2
Definition: chert_table.h:121
Convert errno value to std::string, thread-safe if possible.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:479
bool faked_root_block
true if the root block is faked (not written to disk).
Definition: chert_table.h:751
int delete_kt()
Definition: chert_table.cc:914
int io_open_block_wr(const char *fname, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:67
Btree base file implementation.
uint4 root
the root block of the B-tree
Definition: chert_table.h:771
Item_wr kt
buffer of size block_size for making up key-tag items
Definition: chert_table.h:774
#define STRINGIZE(X)
The STRINGIZE macro converts its parameter into a string constant.
Definition: stringutils.h:36
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
void append_chunk(std::string *tag) const
Definition: chert_table.h:215
#define BLK_UNUSED
Definition: chert_cursor.h:31
STL namespace.
void lazy_alloc_inflate_zstream() const
Allocate the zstream for inflating, if not already allocated.
Convert types to std::string.
chert_tablesize_t item_count
keeps a count of the number of items in the B-tree.
Definition: chert_table.h:728
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
void open()
Open the btree at the latest revision.
Utility functions for testing files.
Cursor C[BTREE_CURSOR_LEVELS]
Definition: chert_table.h:841
#define false
Definition: header.h:9
unsigned long cursor_version
Version count for tracking when cursors need to rebuild.
Definition: chert_table.h:813
#define rare(COND)
Definition: config.h:562
bool exists() const
Determine whether the btree exists on disk.
bool key_exists(const std::string &key) const
Check if a key exists in the Btree.
void create_and_open(unsigned int blocksize)
Create a new empty btree structure on disk and open it at the initial revision.
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
const int DIR_START
Definition: chert_table.h:155
void aligned_write4(unsigned char *ptr, T value)
Definition: wordaccess.h:170
chert_tablesize_t get_item_count() const
void set_have_fakeroot(bool have_fakeroot_)
uint8_t * split_p
Buffer used when splitting a block.
Definition: chert_table.h:848
ChertTable(const ChertTable &)
Copying not allowed.
Hierarchy of classes which Xapian can throw as exceptions.
bool readahead_key(const string &key) const
bool del(const std::string &key)
Delete an entry from the table.
static uint4 block_given_by(const uint8_t *p, int c)
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value...
Definition: pretty.h:45
bool prev_for_sequential(Cursor *C_, int dummy) const
~ChertTable()
Close the Btree.
bool next_for_sequential(Cursor *C_, int dummy) const
bool io_full_sync(int fd)
Definition: io_utils.h:88
void set_sequential(bool sequential_)
uint4 get_block_size() const
uint32_t uint4
Definition: internaltypes.h:32
bool next_default(Cursor *C_, int j) const
DatabaseModifiedError indicates a database was modified.
Definition: error.h:539
void cancel()
Cancel any outstanding changes.
functions for reading and writing different width words
uint4 get_level() const
void errno_to_string(int e, string &s)
int size() const
I in diagram above.
Definition: chert_table.h:202
void form_null_key(uint4 n)
Form an item with a null key and with block number n in the tag.
Definition: chert_table.h:286
void set_block_given_by(uint4 n)
Set this item&#39;s tag to point to block n (this block should not be at level 0).
Definition: chert_table.h:273
Interface to Btree cursors.
bool do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
Perform the opening operation to read.
std::string name
The path name of the B tree.
Definition: chert_table.h:783
bool rewrite
true if the block is not the same as on disk, and so needs rewriting
Definition: chert_cursor.h:58
void SET_LEVEL(uint8_t *b, int x)
Definition: chert_table.h:158
uint8_t * p
pointer to a block
Definition: chert_cursor.h:45
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:228
int GET_LEVEL(const uint8_t *b)
Definition: chert_table.h:151
bool do_open_to_write(bool revision_supplied, chert_revision_number_t revision_, bool create_db=false)
Perform the opening operation to write.
bool operator<(Key key2) const
Compares this key with key2.
chert_revision_number_t revision_number
revision number of the opened B-tree.
Definition: chert_table.h:725
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
void set_root(uint4 root_)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void erase()
Erase this table from disk.
static int find_in_block(const uint8_t *p, Key key, bool leaf, int c)
find_in_block(p, key, leaf, c) searches for the key in the block at p.
Definition: chert_table.cc:386
bool io_tmp_rename(const std::string &tmp_file, const std::string &real_file)
Rename a temporary file to its final position.
Definition: io_utils.cc:271
void set_block_size(unsigned int block_size_)
Set the block size.
void SET_DIR_END(uint8_t *b, int x)
Definition: chert_table.h:161
bool writable
Set to true when the database is opened to write.
Definition: chert_table.h:807
const int K1
Definition: chert_table.h:120
bool find_changed_block(uint4 *n) const
Find the first changed block at or after position *n.
bool read(const std::string &name, char ch, bool read_bitmap, std::string &err_msg)
Read values from a base file.
void close(bool permanent=false)
Close the Btree.
bool really_empty() const
Return true if there are no entries in the table.
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
void set_item_count(chert_tablesize_t item_count_)
bool basic_open(bool revision_supplied, chert_revision_number_t revision)
#define BYTE_PAIR_RANGE
Definition: chert_table.cc:166
void setD(unsigned char *p, int c, int x)
Definition: chert_table.h:128
void write_changed_blocks(int changes_fd)
Append the list of blocks changed to a changeset file.
bool sequential
true iff the data has been written in a single write in sequential order.
Definition: chert_table.h:756
#define AssertParanoid(COND)
Definition: omassert.h:129
void enter_key(int j, Key prevkey, Key newkey)
enter_key(j, prevkey, newkey) is called after a block split.
Definition: chert_table.cc:538
z_stream * deflate_zstream
Zlib state object for deflating.
Definition: chert_table.h:855
bool prev_default(Cursor *C_, int j) const
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:478
uint4 get_root() const
bool find(Cursor *) const
find(C_) searches for the key of B->kt in the B-tree.
Definition: chert_table.cc:433
uint4 changed_n
the last block to be changed by an addition
Definition: chert_table.h:791
void commit(chert_revision_number_t revision, int changes_fd=-1, const std::string *changes_tail=NULL)
Commit any outstanding changes to the table.
void lazy_alloc_deflate_zstream() const
Allocate the zstream for deflating, if not already allocated.
uint4 block_given_by() const
Get this item&#39;s tag as a block number (this block should not be at level 0).
Definition: chert_table.h:224
void set_block_size(uint4 block_size_)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
void fake_root_item()
Definition: chert_table.h:314
int mid_point(uint8_t *p) const
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in ...
Definition: chert_table.cc:604
void block_to_cursor(Cursor *C_, int j, uint4 n) const
Definition: chert_table.cc:272
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
Definition: chert_table.cc:978
uint4 get_revision() const
bool get_compressed() const
Definition: chert_table.h:207
int MAX_FREE(const uint8_t *b)
Definition: chert_table.h:152
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
int seq_count
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POIN...
Definition: chert_table.h:788
#define CHERT_DEFAULT_BLOCK_SIZE
The default block size to use in a B-tree table.
Definition: chert_types.h:57
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
Btree implementation.
uint4 n
block number
Definition: chert_cursor.h:56
void alter()
Btree::alter(); is called when the B-tree is to be altered.
Definition: chert_table.cc:338
char name[9]
Definition: dbcheck.cc:55
#define SEQ_START_POINT
Flip to sequential addition block-splitting after this number of observed sequential additions (in ne...
Definition: chert_table.cc:151
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Definition: pack.h:477
bool Btree_modified
Set to true the first time the B-tree is modified.
Definition: chert_table.h:801
#define CHERT_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: chert_table.h:101
void delete_item(int j, bool repeatedly)
ChertTable::delete_item(j, repeatedly) is (almost) the converse of add_item.
Definition: chert_table.cc:771
const int DONT_COMPRESS
Definition: chert_table.h:94
int components_of() const
Definition: chert_table.h:211
char base_letter
the value &#39;A&#39; or &#39;B&#39; of the current base
Definition: chert_table.h:745
void write_to_file(const std::string &filename, char base_letter, const std::string &tablename, int changes_fd, const std::string *changes_tail)
Write the btree base file to disk.
chert_revision_number_t latest_revision_number
Revision number of the other base, or zero if there is only one base file.
Definition: chert_table.h:736
unsigned int block_size
block size of the B tree in bytes
Definition: chert_table.h:731
int changed_c
directory offset corresponding to last block to be changed by an addition
Definition: chert_table.h:795
Pack types into strings and unpack them again.
int handle
File descriptor of the table.
Definition: chert_table.h:765
const int D2
Definition: chert_table.h:122
Wrappers for low-level POSIX I/O routines.
const int BTREE_CURSOR_LEVELS
Definition: chert_table.h:325
Various handy helpers which std::string really should provide.
void form_key(const std::string &key) const
Definition: chert_table.cc:948
uint4 get_last_block() const
void read_block(uint4 n, uint8_t *p) const
read_block(n, p) reads block n of the DB file to address p.
Definition: chert_table.cc:170
void flush_db()
Flush any outstanding changes to the DB file of the table.
Definition: header.h:151
unsigned REVISION(const uint8_t *b)
Definition: chert_table.h:150
ChertTable_base base
For writing back as file baseA or baseB.
Definition: chert_table.h:780
bool operator==(Key key2) const
bool cursor_created_since_last_modification
Flag for tracking when cursors need to rebuild.
Definition: chert_table.h:810
void write_block(uint4 n, const uint8_t *p) const
write_block(n, p) writes block n in the DB file from address p.
Definition: chert_table.cc:199
Various assertion macros.
#define LOGLINE(a, b)
Definition: debuglog.h:483
void set_revision(uint4 revision_)
DatabaseError indicates some sort of database related error.
Definition: error.h:367
void split_root(uint4 split_n)
Btree needs to gain a new level to insert more items: so split root block and construct a new one...
Definition: chert_table.cc:493
const size_t COMPRESS_MIN
Definition: chert_table.cc:71
uint8_t * buffer
buffer of size block_size for reforming blocks
Definition: chert_table.h:777
static void throw_database_closed()
Throw an exception indicating that the database is closed.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
void SET_REVISION(uint8_t *b, uint4 rev)
Definition: chert_table.h:157
int level
number of levels, counting from 0
Definition: chert_table.h:768
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
void SET_MAX_FREE(uint8_t *b, int x)
Definition: chert_table.h:159
bool lazy
If true, don&#39;t create the table until it&#39;s needed.
Definition: chert_table.h:861
void set_key_and_block(Key newkey, int truncate_size, uint4 n)
Definition: chert_table.h:250
Debug logging macros.
const uint8_t * p
Definition: chert_table.h:168
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
int TOTAL_FREE(const uint8_t *b)
Definition: chert_table.h:153
void add_item_to_block(uint8_t *p, Item_wr kt, int c)
add_item_to_block(p, kt_, c) adds item kt_ to the block at p.
Definition: chert_table.cc:636
#define C(X)
void SET_TOTAL_FREE(uint8_t *b, int x)
Definition: chert_table.h:160
bool read_tag(Cursor *C_, std::string *tag, bool keep_compressed) const
Read the tag value for the key pointed to by cursor C_.
const uint8_t * get_address() const
Definition: chert_table.h:171
int compress_strategy
DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY, Z_RLE.
Definition: chert_table.h:852