xapian-core  1.4.26
chert_table.cc
Go to the documentation of this file.
1 /* chert_table.cc: Btree implementation
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
6  * Copyright 2008 Lemur Consulting Ltd
7  * Copyright 2010 Richard Boulton
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "chert_table.h"
28 
29 #include <xapian/error.h>
30 
31 #include <cerrno>
32 
33 #include "errno_to_string.h"
34 #include "omassert.h"
35 #include "stringutils.h" // For STRINGIZE().
36 
37 // Define to use "dangerous" mode - in this mode we write modified btree
38 // blocks back in place. This is somewhat faster (especially once we're
39 // I/O bound) but the database can't be safely searched during indexing
40 // and if indexing is terminated uncleanly, the database may be corrupt.
41 //
42 // Despite the risks, it's useful for speeding up a full rebuild.
43 //
44 // FIXME: make this mode run-time selectable, and record that it is currently
45 // in use somewhere on disk, so readers can check and refuse to open the
46 // database.
47 //
48 // #define DANGEROUS
49 
50 #include <sys/types.h>
51 
52 #include <cstring> /* for memmove */
53 #include <climits> /* for CHAR_BIT */
54 
55 #include "chert_btreebase.h"
56 #include "chert_cursor.h"
57 
58 #include "filetests.h"
59 #include "io_utils.h"
60 #include "debuglog.h"
61 #include "pack.h"
62 #include "str.h"
63 #include "wordaccess.h"
64 
65 #include <algorithm> // for std::min()
66 #include <string>
67 
68 using namespace std;
69 
70 // Only try to compress tags longer than this many bytes.
71 const size_t COMPRESS_MIN = 4;
72 
73 //#define BTREE_DEBUG_FULL 1
74 #undef BTREE_DEBUG_FULL
75 
76 #ifdef BTREE_DEBUG_FULL
77 /*------debugging aids from here--------*/
78 
79 static void print_key(const uint8_t * p, int c, int j);
80 static void print_tag(const uint8_t * p, int c, int j);
81 
82 /*
83 static void report_cursor(int N, Btree * B, Cursor * C)
84 {
85  int i;
86  printf("%d)\n", N);
87  for (i = 0; i <= B->level; i++)
88  printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
89  C[i].p, C[i].c, C[i].n, C[i].rewrite);
90 }
91 */
92 
93 /*------to here--------*/
94 #endif /* BTREE_DEBUG_FULL */
95 
96 static inline uint8_t *zeroed_new(size_t size)
97 {
98  uint8_t *temp = new uint8_t[size];
99  memset(temp, 0, size);
100  return temp;
101 }
102 
103 /* A B-tree comprises (a) a base file, containing essential information (Block
104  size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
105  bitmap being set if the Nth block of the B-tree file is in use, and (c) a
106  file DB containing the B-tree proper. The DB file is divided into a sequence
107  of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
108  use. Those in use are arranged in a tree.
109 
110  Each block, b, has a structure like this:
111 
112  R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
113  <---------- D ----------> <-M->
114 
115  And then,
116 
117  R = REVISION(b) is the revision number the B-tree had when the block was
118  written into the DB file.
119  L = GET_LEVEL(b) is the level of the block, which is the number of levels
120  towards the root of the B-tree structure. So leaf blocks
121  have level 0 and the one root block has the highest level
122  equal to the number of levels in the B-tree.
123  M = MAX_FREE(b) is the size of the gap between the end of the directory and
124  the first item of data. (It is not necessarily the maximum
125  size among the bits of space that are free, but I can't
126  think of a better name.)
127  T = TOTAL_FREE(b)is the total amount of free space left in b.
128  D = DIR_END(b) gives the offset to the end of the directory.
129 
130  o1, o2 ... oN are a directory of offsets to the N items held in the block.
131  The items are key-tag pairs, and as they occur in the directory are ordered
132  by the keys.
133 
134  An item has this form:
135 
136  I K key x C tag
137  <--K-->
138  <------I------>
139 
140  A long tag presented through the API is split up into C tags small enough to
141  be accommodated in the blocks of the B-tree. The key is extended to include
142  a counter, x, which runs from 1 to C. The key is preceded by a length, K,
143  and the whole item with a length, I, as depicted above.
144 
145  Here are the corresponding definitions:
146 
147 */
148 
151 #define SEQ_START_POINT (-10)
152 
153 
154 /* There are two bit maps in bit_map0 and bit_map. The nth bit of bitmap is 0
155  if the nth block is free, otherwise 1. bit_map0 is the initial state of
156  bitmap at the start of the current transaction.
157 
158  Note use of the limits.h values:
159  UCHAR_MAX = 255, an unsigned with all bits set, and
160  CHAR_BIT = 8, the number of bits per byte
161 
162  BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
163  bytes -- 64K effectively.
164 */
165 
166 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
167 
169 void
170 ChertTable::read_block(uint4 n, uint8_t * p) const
171 {
172  // Log the value of p, not the contents of the block it points to...
173  LOGCALL_VOID(DB, "ChertTable::read_block", n | (void*)p);
174  if (rare(handle == -2))
176 
177  /* Use the base bit_map_size not the bitmap's size, because
178  * the latter is uninitialised in readonly mode.
179  */
180  Assert(n / CHAR_BIT < base.get_bit_map_size());
181 
182  io_read_block(handle, reinterpret_cast<char *>(p), block_size, n);
183 
184  int dir_end = DIR_END(p);
185  if (rare(dir_end < DIR_START || unsigned(dir_end) > block_size)) {
186  string msg("dir_end invalid in block ");
187  msg += str(n);
188  throw Xapian::DatabaseCorruptError(msg);
189  }
190 }
191 
198 void
199 ChertTable::write_block(uint4 n, const uint8_t * p) const
200 {
201  LOGCALL_VOID(DB, "ChertTable::write_block", n | p);
202  Assert(writable);
203  /* Check that n is in range. */
204  Assert(n / CHAR_BIT < base.get_bit_map_size());
205 
206  /* don't write to non-free */;
207  AssertParanoid(base.block_free_at_start(n));
208 
209  /* write revision is okay */
210  AssertEqParanoid(REVISION(p), latest_revision_number + 1);
211 
212  if (both_bases) {
213  // Delete the old base before modifying the database.
214  //
215  // If the file is on NFS, then io_unlink() may return false even if
216  // the file was removed, so on balance throwing an exception in this
217  // case is unhelpful, since we wanted the file gone anyway! The
218  // likely explanation is that somebody moved, deleted, or changed a
219  // symlink to the database directory.
220  (void)io_unlink(name + "base" + other_base_letter());
221  both_bases = false;
222  latest_revision_number = revision_number;
223  }
224 
225  io_write_block(handle, reinterpret_cast<const char *>(p), block_size, n);
226 }
227 
228 
229 /* A note on cursors:
230 
231  Each B-tree level has a corresponding array element C[j] in a
232  cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
233  root block level. Within a level j,
234 
235  C[j].p addresses the block
236  C[j].c is the offset into the directory entry in the block
237  C[j].n is the number of the block at C[j].p
238 
239  A look up in the B-tree causes navigation of the blocks starting
240  from the root. In each block, p, we find an offset, c, to an item
241  which gives the number, n, of the block for the next level. This
242  leads to an array of values p,c,n which are held inside the cursor.
243 
244  Any Btree object B has a built-in cursor, at B->C. But other cursors may
245  be created. If BC is a created cursor, BC->C is the cursor in the
246  sense given above, and BC->B is the handle for the B-tree again.
247 */
248 
249 
250 void
252 {
253  LOGCALL_VOID(DB, "ChertTable::set_overwritten", NO_ARGS);
254  // If we're writable, there shouldn't be another writer who could cause
255  // overwritten to be flagged, so that's a DatabaseCorruptError.
256  if (writable)
257  throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
258  throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
259 }
260 
261 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
262  C, writing the block currently at C[j] back to disk if necessary.
263  Note that
264 
265  C[j].rewrite
266 
267  is true iff C[j].n is different from block n in file DB. If it is
268  false no rewriting is necessary.
269 */
270 
271 void
273 {
274  LOGCALL_VOID(DB, "ChertTable::block_to_cursor", (void*)C_ | j | n);
275  if (n == C_[j].n) return;
276  uint8_t * p = C_[j].p;
277  Assert(p);
278 
279  // FIXME: only needs to be done in write mode
280  if (C_[j].rewrite) {
281  Assert(writable);
282  Assert(C == C_);
283  write_block(C_[j].n, p);
284  C_[j].rewrite = false;
285  }
286 
287  // Check if the block is in the built-in cursor (potentially in
288  // modified form).
289  if (n == C[j].n) {
290  if (p != C[j].p)
291  memcpy(p, C[j].p, block_size);
292  } else {
293  read_block(n, p);
294  }
295 
296  C_[j].n = n;
297  if (j < level) {
298  /* unsigned comparison */
299  if (rare(REVISION(p) > REVISION(C_[j + 1].p))) {
300  set_overwritten();
301  return;
302  }
303  }
304 
305  if (rare(j != GET_LEVEL(p))) {
306  string msg = "Expected block ";
307  msg += str(n);
308  msg += " to be level ";
309  msg += str(j);
310  msg += ", not ";
311  msg += str(GET_LEVEL(p));
312  throw Xapian::DatabaseCorruptError(msg);
313  }
314 }
315 
337 void
339 {
340  LOGCALL_VOID(DB, "ChertTable::alter", NO_ARGS);
341  Assert(writable);
342 #ifdef DANGEROUS
343  C[0].rewrite = true;
344 #else
345  int j = 0;
346  uint8_t * p = C[j].p;
347  while (true) {
348  if (C[j].rewrite) return; /* all new, so return */
349  C[j].rewrite = true;
350 
351  uint4 n = C[j].n;
352  if (base.block_free_at_start(n)) {
353  Assert(REVISION(p) == latest_revision_number + 1);
354  return;
355  }
356  Assert(REVISION(p) < latest_revision_number + 1);
357  base.free_block(n);
358  n = base.next_free_block();
359  C[j].n = n;
360  SET_REVISION(p, latest_revision_number + 1);
361 
362  if (j == level) return;
363  j++;
364  p = C[j].p;
365  Item_wr(p, C[j].c).set_block_given_by(n);
366  }
367 #endif
368 }
369 
385 int
386 ChertTable::find_in_block(const uint8_t * p, Key key, bool leaf, int c)
387 {
388  LOGCALL_STATIC(DB, int, "ChertTable::find_in_block", (const void*)p | (const void *)key.get_address() | leaf | c);
389  // c should be odd (either -1, or an even offset from DIR_START).
390  Assert((c & 1) == 1);
391  int i = DIR_START;
392  if (leaf) i -= D2;
393  if (c != -1) {
394  AssertRel(i,<=,c);
395  }
396  int j = DIR_END(p);
397 
398  if (c != -1) {
399  if (c < j && i < c && Item(p, c).key() <= key)
400  i = c;
401  c += D2;
402  if (c < j && i < c && key < Item(p, c).key())
403  j = c;
404  }
405 
406  while (j - i > D2) {
407  int k = i + ((j - i)/(D2 * 2))*D2; /* mid way */
408  if (key < Item(p, k).key()) j = k; else i = k;
409  }
410  if (leaf) {
411  AssertRel(DIR_START - D2,<=,i);
412  } else {
413  AssertRel(DIR_START,<=,i);
414  }
415  AssertRel(i,<,DIR_END(p));
416  RETURN(i);
417 }
418 
432 bool
434 {
435  LOGCALL(DB, bool, "ChertTable::find", (void*)C_);
436  // Note: the parameter is needed when we're called by ChertCursor
437  const uint8_t * p;
438  int c;
439  Key key = kt.key();
440  for (int j = level; j > 0; --j) {
441  p = C_[j].p;
442  c = find_in_block(p, key, false, C_[j].c);
443 #ifdef BTREE_DEBUG_FULL
444  printf("Block in ChertTable:find - code position 1");
445  report_block_full(j, C_[j].n, p);
446 #endif /* BTREE_DEBUG_FULL */
447  C_[j].c = c;
448  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
449  }
450  p = C_[0].p;
451  c = find_in_block(p, key, true, C_[0].c);
452 #ifdef BTREE_DEBUG_FULL
453  printf("Block in ChertTable:find - code position 2");
454  report_block_full(0, C_[0].n, p);
455 #endif /* BTREE_DEBUG_FULL */
456  C_[0].c = c;
457  if (c < DIR_START) {
458  RETURN(false);
459  }
460  RETURN(Item(p, c).key() == key);
461 }
462 
468 void
470 {
471  LOGCALL_VOID(DB, "ChertTable::compact", (void*)p);
472  Assert(writable);
473  int e = block_size;
474  uint8_t * b = buffer;
475  int dir_end = DIR_END(p);
476  for (int c = DIR_START; c < dir_end; c += D2) {
477  Item item(p, c);
478  int l = item.size();
479  e -= l;
480  memmove(b + e, item.get_address(), l);
481  setD(p, c, e); /* reform in b */
482  }
483  memmove(p + e, b + e, block_size - e); /* copy back */
484  e -= dir_end;
485  SET_TOTAL_FREE(p, e);
486  SET_MAX_FREE(p, e);
487 }
488 
492 void
494 {
495  LOGCALL_VOID(DB, "ChertTable::split_root", split_n);
496  /* gain a level */
497  ++level;
498 
499  /* check level overflow - this isn't something that should ever happen
500  * but deserves more than an Assert()... */
501  if (level == BTREE_CURSOR_LEVELS) {
502  throw Xapian::DatabaseCorruptError("Btree has grown impossibly large (" STRINGIZE(BTREE_CURSOR_LEVELS) " levels)");
503  }
504 
505  uint8_t * q = zeroed_new(block_size);
506  C[level].p = q;
507  C[level].c = DIR_START;
508  C[level].n = base.next_free_block();
509  C[level].rewrite = true;
510  SET_REVISION(q, latest_revision_number + 1);
511  SET_LEVEL(q, level);
513  compact(q); /* to reset TOTAL_FREE, MAX_FREE */
514 
515  /* form a null key in b with a pointer to the old root */
516  uint8_t b[10]; /* 7 is exact */
517  Item_wr item(b);
518  item.form_null_key(split_n);
519  add_item(item, level);
520 }
521 
537 void
538 ChertTable::enter_key(int j, Key prevkey, Key newkey)
539 {
540  LOGCALL_VOID(DB, "ChertTable::enter_key", j | Literal("prevkey") | Literal("newkey"));
541  Assert(writable);
542  Assert(prevkey < newkey);
543  AssertRel(j,>=,1);
544 
545  uint4 blocknumber = C[j - 1].n;
546 
547  // FIXME update to use Key
548  // Keys are truncated here: but don't truncate the count at the end away.
549  const int newkey_len = newkey.length();
550  AssertRel(newkey_len,>,0);
551  int i;
552 
553  if (j == 1) {
554  // Truncate the key to the minimal key which differs from prevkey,
555  // the preceding key in the block.
556  i = 0;
557  const int min_len = min(newkey_len, prevkey.length());
558  while (i < min_len && prevkey[i] == newkey[i]) {
559  i++;
560  }
561 
562  // Want one byte of difference.
563  if (i < newkey_len) i++;
564  } else {
565  /* Can't truncate between branch levels, since the separated keys
566  * are in at the leaf level, and truncating again will change the
567  * branch point.
568  */
569  i = newkey_len;
570  }
571 
572  uint8_t b[UCHAR_MAX + 6];
573  Item_wr item(b);
574  Assert(i <= 256 - I2 - C2);
575  Assert(i <= (int)sizeof(b) - I2 - C2 - 4);
576  item.set_key_and_block(newkey, i, blocknumber);
577 
578  // When j > 1 we can make the first key of block p null. This is probably
579  // worthwhile as it trades a small amount of CPU and RAM use for a small
580  // saving in disk use. Other redundant keys will still creep in though.
581  if (j > 1) {
582  uint8_t * p = C[j - 1].p;
583  uint4 n = getint4(newkey.get_address(), newkey_len + K1 + C2);
584  int new_total_free = TOTAL_FREE(p) + newkey_len + C2;
585  // FIXME: incredibly icky going from key to item like this...
586  auto byte_addr = const_cast<uint8_t*>(newkey.get_address());
587  Item_wr(byte_addr - I2).form_null_key(n);
588  SET_TOTAL_FREE(p, new_total_free);
589  }
590 
591  // The split block gets inserted into the parent after the pointer to the
592  // current child.
593  AssertEq(C[j].c, find_in_block(C[j].p, item.key(), false, C[j].c));
594  C[j].c += D2;
595  C[j].rewrite = true; /* a subtle point: this *is* required. */
596  add_item(item, j);
597 }
598 
603 int
604 ChertTable::mid_point(uint8_t * p) const
605 {
606  LOGCALL(DB, int, "ChertTable::mid_point", (void*)p);
607  int n = 0;
608  int dir_end = DIR_END(p);
609  int size = block_size - TOTAL_FREE(p) - dir_end;
610  for (int c = DIR_START; c < dir_end; c += D2) {
611  int l = Item(p, c).size();
612  n += 2 * l;
613  if (n >= size) {
614  if (l < n - size) RETURN(c);
615  RETURN(c + D2);
616  }
617  }
618 
619  /* This shouldn't happen, as the sum of the item sizes should be the same
620  * as the value calculated in size, so assert but return a sane value just
621  * in case. */
622  Assert(false);
623  RETURN(dir_end);
624 }
625 
635 void
636 ChertTable::add_item_to_block(uint8_t * p, Item_wr kt_, int c)
637 {
638  LOGCALL_VOID(DB, "ChertTable::add_item_to_block", (void*)p | Literal("kt_") | c);
639  Assert(writable);
640  int dir_end = DIR_END(p);
641  int kt_len = kt_.size();
642  int needed = kt_len + D2;
643  int new_total = TOTAL_FREE(p) - needed;
644  int new_max = MAX_FREE(p) - needed;
645 
646  Assert(new_total >= 0);
647 
648  AssertRel(MAX_FREE(p),>=,needed);
649 
650  AssertRel(DIR_START,<=,c);
651  AssertRel(c,<=,dir_end);
652 
653  memmove(p + c + D2, p + c, dir_end - c);
654  dir_end += D2;
655  SET_DIR_END(p, dir_end);
656 
657  int o = dir_end + new_max;
658  setD(p, c, o);
659  memmove(p + o, kt_.get_address(), kt_len);
660 
661  SET_MAX_FREE(p, new_max);
662 
663  SET_TOTAL_FREE(p, new_total);
664 }
665 
671 void
673 {
674  LOGCALL_VOID(DB, "ChertTable::add_item", Literal("kt_") | j);
675  Assert(writable);
676  uint8_t * p = C[j].p;
677  int c = C[j].c;
678  uint4 n;
679 
680  int needed = kt_.size() + D2;
681  if (TOTAL_FREE(p) < needed) {
682  int m;
683  // Prepare to split p. After splitting, the block is in two halves, the
684  // lower half is split_p, the upper half p again. add_to_upper_half
685  // becomes true when the item gets added to p, false when it gets added
686  // to split_p.
687 
688  if (seq_count < 0) {
689  // If we're not in sequential mode, we split at the mid point
690  // of the node.
691  m = mid_point(p);
692  } else {
693  // During sequential addition, split at the insert point
694  AssertRel(c,>=,DIR_START);
695  m = c;
696  }
697 
698  uint4 split_n = C[j].n;
699  C[j].n = base.next_free_block();
700 
701  memcpy(split_p, p, block_size); // replicate the whole block in split_p
702  SET_DIR_END(split_p, m);
703  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
704 
705  {
706  int residue = DIR_END(p) - m;
707  int new_dir_end = DIR_START + residue;
708  memmove(p + DIR_START, p + m, residue);
709  SET_DIR_END(p, new_dir_end);
710  }
711 
712  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
713 
714  bool add_to_upper_half;
715  if (seq_count < 0) {
716  add_to_upper_half = (c >= m);
717  } else {
718  // And add item to lower half if split_p has room, otherwise upper
719  // half
720  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
721  }
722 
723  if (add_to_upper_half) {
724  c -= (m - DIR_START);
725  Assert(seq_count < 0 || c <= DIR_START + D2);
726  Assert(c >= DIR_START);
727  Assert(c <= DIR_END(p));
728  add_item_to_block(p, kt_, c);
729  n = C[j].n;
730  } else {
731  Assert(c >= DIR_START);
732  Assert(c <= DIR_END(split_p));
733  add_item_to_block(split_p, kt_, c);
734  n = split_n;
735  }
736  write_block(split_n, split_p);
737 
738  // Check if we're splitting the root block.
739  if (j == level) split_root(split_n);
740 
741  /* Enter a separating key at level j + 1 between */
742  /* the last key of block split_p, and the first key of block p */
743  enter_key(j + 1,
744  Item(split_p, DIR_END(split_p) - D2).key(),
745  Item(p, DIR_START).key());
746  } else {
747  AssertRel(TOTAL_FREE(p),>=,needed);
748 
749  if (MAX_FREE(p) < needed) {
750  compact(p);
751  AssertRel(MAX_FREE(p),>=,needed);
752  }
753 
754  add_item_to_block(p, kt_, c);
755  n = C[j].n;
756  }
757  if (j == 0) {
758  changed_n = n;
759  changed_c = c;
760  }
761 }
762 
770 void
771 ChertTable::delete_item(int j, bool repeatedly)
772 {
773  LOGCALL_VOID(DB, "ChertTable::delete_item", j | repeatedly);
774  Assert(writable);
775  uint8_t * p = C[j].p;
776  int c = C[j].c;
777  AssertRel(DIR_START,<=,c);
778  AssertRel(c,<,DIR_END(p));
779  int kt_len = Item(p, c).size(); /* size of the item to be deleted */
780  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
781 
782  memmove(p + c, p + c + D2, dir_end - c);
783  SET_DIR_END(p, dir_end);
784  SET_MAX_FREE(p, MAX_FREE(p) + D2);
785  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
786 
787  if (!repeatedly) return;
788  if (j < level) {
789  if (dir_end == DIR_START) {
790  base.free_block(C[j].n);
791  C[j].rewrite = false;
792  C[j].n = BLK_UNUSED;
793  C[j + 1].rewrite = true; /* *is* necessary */
794  delete_item(j + 1, true);
795  }
796  } else {
797  Assert(j == level);
798  while (dir_end == DIR_START + D2 && level > 0) {
799  /* single item in the root block, so lose a level */
800  uint4 new_root = Item(p, DIR_START).block_given_by();
801  delete [] p;
802  C[level].p = 0;
803  base.free_block(C[level].n);
804  C[level].rewrite = false;
805  C[level].n = BLK_UNUSED;
806  level--;
807 
808  block_to_cursor(C, level, new_root);
809 
810  p = C[level].p;
811  dir_end = DIR_END(p); /* prepare for the loop */
812  }
813  }
814 }
815 
816 /* debugging aid:
817 static addcount = 0;
818 */
819 
845 int
847 {
848  LOGCALL(DB, int, "ChertTable::add_kt", found);
849  Assert(writable);
850  int components = 0;
851 
852  /*
853  {
854  printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
855  print_bytes(kt[I2] - K1 - C2, kt + I2 + K1); putchar('\n');
856  }
857  */
858  alter();
859 
860  if (found) { /* replacement */
861  seq_count = SEQ_START_POINT;
862  sequential = false;
863 
864  uint8_t * p = C[0].p;
865  int c = C[0].c;
866  AssertRel(DIR_START,<=,c);
867  AssertRel(c,<,DIR_END(p));
868  Item item(p, c);
869  int kt_size = kt.size();
870  int needed = kt_size - item.size();
871 
872  components = item.components_of();
873 
874  if (needed <= 0) {
875  /* simple replacement */
876  memmove(const_cast<uint8_t *>(item.get_address()),
877  kt.get_address(), kt_size);
878  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
879  } else {
880  /* new item into the block's freespace */
881  int new_max = MAX_FREE(p) - kt_size;
882  if (new_max >= 0) {
883  int o = DIR_END(p) + new_max;
884  memmove(p + o, kt.get_address(), kt_size);
885  setD(p, c, o);
886  SET_MAX_FREE(p, new_max);
887  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
888  } else {
889  /* do it the long way */
890  delete_item(0, false);
891  add_item(kt, 0);
892  }
893  }
894  } else {
895  /* addition */
896  if (changed_n == C[0].n && changed_c == C[0].c) {
897  if (seq_count < 0) seq_count++;
898  } else {
899  seq_count = SEQ_START_POINT;
900  sequential = false;
901  }
902  C[0].c += D2;
903  add_item(kt, 0);
904  }
905  RETURN(components);
906 }
907 
908 /* delete_kt() corresponds to add_kt(found), but there are only
909  two cases: if the key is not found nothing is done, and if it is
910  found the corresponding item is deleted with delete_item.
911 */
912 
913 int
915 {
916  LOGCALL(DB, int, "ChertTable::delete_kt", NO_ARGS);
917  Assert(writable);
918 
919  bool found = find(C);
920 
921  int components = 0;
922  seq_count = SEQ_START_POINT;
923  sequential = false;
924 
925  /*
926  {
927  printf("%d) %s ", addcount++, (found ? "deleting " : "ignoring "));
928  print_bytes(B->kt[I2] - K1 - C2, B->kt + I2 + K1); putchar('\n');
929  }
930  */
931  if (found) {
932  components = Item(C[0].p, C[0].c).components_of();
933  alter();
934  delete_item(0, true);
935  }
936  RETURN(components);
937 }
938 
939 /* ChertTable::form_key(key) treats address kt as an item holder and fills in
940 the key part:
941 
942  (I) K key c (C tag)
943 
944 The bracketed parts are left blank. The key is filled in with key_len bytes and
945 K set accordingly. c is set to 1.
946 */
947 
948 void ChertTable::form_key(const string & key) const
949 {
950  LOGCALL_VOID(DB, "ChertTable::form_key", key);
951  kt.form_key(key);
952 }
953 
954 /* ChertTable::add(key, tag) adds the key/tag item to the
955  B-tree, replacing any existing item with the same key.
956 
957  For a long tag, we end up having to add m components, of the form
958 
959  key 1 m tag1
960  key 2 m tag2
961  ...
962  key m m tagm
963 
964  and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
965  n components of the form
966 
967  key 1 n TAG1
968  key 2 n TAG2
969  ...
970  key n n TAGn
971 
972  and n may be greater than, equal to, or less than m. These cases are dealt
973  with in the code below. If m < n for example, we end up with a series of
974  deletions.
975 */
976 
977 void
978 ChertTable::add(const string &key, string tag, bool already_compressed)
979 {
980  LOGCALL_VOID(DB, "ChertTable::add", key | tag | already_compressed);
981  Assert(writable);
982 
983  if (handle < 0) create_and_open(block_size);
984 
985  form_key(key);
986 
987  bool compressed = false;
988  if (already_compressed) {
989  compressed = true;
990  } else if (compress_strategy != DONT_COMPRESS && tag.size() > COMPRESS_MIN) {
991  static_assert(DONT_COMPRESS != Z_DEFAULT_STRATEGY,
992  "DONT_COMPRESS clashes with zlib constant");
993  static_assert(DONT_COMPRESS != Z_FILTERED,
994  "DONT_COMPRESS clashes with zlib constant");
995  static_assert(DONT_COMPRESS != Z_HUFFMAN_ONLY,
996  "DONT_COMPRESS clashes with zlib constant");
997 #ifdef Z_RLE
998  static_assert(DONT_COMPRESS != Z_RLE,
999  "DONT_COMPRESS clashes with zlib constant");
1000 #endif
1001 
1002  lazy_alloc_deflate_zstream();
1003 
1004  deflate_zstream->next_in =
1005  reinterpret_cast<Bytef *>(const_cast<char *>(tag.data()));
1006  deflate_zstream->avail_in = static_cast<uInt>(tag.size());
1007 
1008  // If compressed size is >= tag.size(), we don't want to compress.
1009  unsigned long blk_len = tag.size() - 1;
1010  unsigned char * blk = new unsigned char[blk_len];
1011  deflate_zstream->next_out = blk;
1012  deflate_zstream->avail_out = static_cast<uInt>(blk_len);
1013 
1014  int err = deflate(deflate_zstream, Z_FINISH);
1015  if (err == Z_STREAM_END) {
1016  // If deflate succeeded, then the output was at least one byte
1017  // smaller than the input.
1018  tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
1019  compressed = true;
1020  } else {
1021  // Deflate failed - presumably the data wasn't compressible.
1022  }
1023 
1024  delete [] blk;
1025  }
1026 
1027  // sort of matching kt.append_chunk(), but setting the chunk
1028  const size_t cd = kt.key().length() + K1 + I2 + C2 + C2; // offset to the tag data
1029  const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1030  size_t first_L = L; // - amount for tag1
1031  bool found = find(C);
1032  if (!found) {
1033  uint8_t * p = C[0].p;
1034  size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1035  if (n > D2 + cd) {
1036  n -= (D2 + cd);
1037  // if n >= last then fully filling this block won't produce
1038  // an extra item, so we might as well do this even if
1039  // full_compaction isn't active.
1040  //
1041  // In the full_compaction case, it turns out we shouldn't always
1042  // try to fill every last byte. Doing so can actually increase the
1043  // total space required (I believe this effect is due to longer
1044  // dividing keys being required in the index blocks). Empirically,
1045  // n >= key.size() + K appears a good criterion for K ~= 34. This
1046  // seems to save about 0.2% in total database size over always
1047  // splitting the tag. It'll also give be slightly faster retrieval
1048  // as we can avoid reading an extra block occasionally.
1049  size_t last = tag.length() % L;
1050  if (n >= last || (full_compaction && n >= key.size() + 34))
1051  first_L = n;
1052  }
1053  }
1054 
1055  // a null tag must be added in of course
1056  int m = tag.empty() ? 1 : (tag.length() - first_L + L - 1) / L + 1;
1057  // there are m items to add
1058  /* FIXME: sort out this error higher up and turn this into
1059  * an assert.
1060  */
1061  if (m >= BYTE_PAIR_RANGE)
1062  throw Xapian::UnimplementedError("Can't handle insanely large tags");
1063 
1064  int n = 0; // initialise to shut off warning
1065  // - and there will be n to delete
1066  int o = 0; // Offset into the tag
1067  size_t residue = tag.length(); // Bytes of the tag remaining to add in
1068  int replacement = false; // Has there been a replacement ?
1069  int i;
1070  kt.set_components_of(m);
1071  for (i = 1; i <= m; i++) {
1072  size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1073  Assert(cd + l <= block_size);
1074  Assert(string::size_type(o + l) <= tag.length());
1075  kt.set_tag(cd, tag.data() + o, l, compressed);
1076  kt.set_component_of(i);
1077 
1078  o += l;
1079  residue -= l;
1080 
1081  if (i > 1) found = find(C);
1082  n = add_kt(found);
1083  if (n > 0) replacement = true;
1084  }
1085  /* o == tag.length() here, and n may be zero */
1086  for (i = m + 1; i <= n; i++) {
1087  kt.set_component_of(i);
1088  delete_kt();
1089  }
1090  if (!replacement) ++item_count;
1091  Btree_modified = true;
1092  if (cursor_created_since_last_modification) {
1093  cursor_created_since_last_modification = false;
1094  ++cursor_version;
1095  }
1096 }
1097 
1098 /* ChertTable::del(key) returns false if the key is not in the B-tree,
1099  otherwise deletes it and returns true.
1100 
1101  Again, this is parallel to ChertTable::add, but simpler in form.
1102 */
1103 
1104 bool
1105 ChertTable::del(const string &key)
1106 {
1107  LOGCALL(DB, bool, "ChertTable::del", key);
1108  Assert(writable);
1109 
1110  if (handle < 0) {
1111  if (handle == -2) {
1113  }
1114  RETURN(false);
1115  }
1116 
1117  // We can't delete a key which we is too long for us to store.
1118  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1119 
1120  if (key.empty()) RETURN(false);
1121  form_key(key);
1122 
1123  int n = delete_kt(); /* there are n items to delete */
1124  if (n <= 0) RETURN(false);
1125 
1126  for (int i = 2; i <= n; i++) {
1127  kt.set_component_of(i);
1128  delete_kt();
1129  }
1130 
1131  item_count--;
1132  Btree_modified = true;
1133  if (cursor_created_since_last_modification) {
1134  cursor_created_since_last_modification = false;
1135  ++cursor_version;
1136  }
1137  RETURN(true);
1138 }
1139 
1140 bool
1141 ChertTable::readahead_key(const string &key) const
1142 {
1143  LOGCALL(DB, bool, "ChertTable::readahead_key", key);
1144  Assert(!key.empty());
1145 
1146  // Two cases:
1147  //
1148  // handle = -1: Lazy table which isn't yet open
1149  //
1150  // handle = -2: Table has been closed. Since the readahead is just a
1151  // hint, we can safely ignore it for a closed table.
1152  if (handle < 0)
1153  RETURN(false);
1154 
1155  // If the table only has one level, there are no branch blocks to preread.
1156  if (level == 0)
1157  RETURN(false);
1158 
1159  // An overlong key cannot be found.
1160  if (key.size() > CHERT_BTREE_MAX_KEY_LEN)
1161  RETURN(true);
1162 
1163  form_key(key);
1164  Key ktkey = kt.key();
1165 
1166  // We'll only readahead the first level, since descending the B-tree would
1167  // require actual reads that would likely hurt performance more than help.
1168  const uint8_t * p = C[level].p;
1169  int c = find_in_block(p, ktkey, false, C[level].c);
1170  uint4 n = Item(p, c).block_given_by();
1171  // Don't preread if it's the block we last preread or already in the
1172  // cursor.
1173  if (n != last_readahead && n != C[level - 1].n) {
1174  /* Use the base bit_map_size not the bitmap's size, because the latter
1175  * is uninitialised in readonly mode.
1176  */
1177  Assert(n / CHAR_BIT < base.get_bit_map_size());
1178 
1179  last_readahead = n;
1180  if (!io_readahead_block(handle, block_size, n))
1181  RETURN(false);
1182  }
1183  RETURN(true);
1184 }
1185 
1186 bool
1187 ChertTable::get_exact_entry(const string &key, string & tag) const
1188 {
1189  LOGCALL(DB, bool, "ChertTable::get_exact_entry", key | tag);
1190  Assert(!key.empty());
1191 
1192  if (handle < 0) {
1193  if (handle == -2) {
1195  }
1196  RETURN(false);
1197  }
1198 
1199  // An oversized key can't exist, so attempting to search for it should fail.
1200  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1201 
1202  form_key(key);
1203  if (!find(C)) RETURN(false);
1204 
1205  (void)read_tag(C, &tag, false);
1206  RETURN(true);
1207 }
1208 
1209 bool
1210 ChertTable::key_exists(const string &key) const
1211 {
1212  LOGCALL(DB, bool, "ChertTable::key_exists", key);
1213  Assert(!key.empty());
1214 
1215  // An oversized key can't exist, so attempting to search for it should fail.
1216  if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1217 
1218  form_key(key);
1219  RETURN(find(C));
1220 }
1221 
1222 bool
1223 ChertTable::read_tag(Cursor * C_, string *tag, bool keep_compressed) const
1224 {
1225  LOGCALL(DB, bool, "ChertTable::read_tag", Literal("C_") | tag | keep_compressed);
1226  Item item(C_[0].p, C_[0].c);
1227 
1228  /* n components to join */
1229  int n = item.components_of();
1230 
1231  tag->resize(0);
1232  // max_item_size also includes K1 + I2 + C2 + C2 bytes overhead and the key
1233  // (which is at least 1 byte long).
1234  if (n > 1) tag->reserve((max_item_size - (1 + K1 + I2 + C2 + C2)) * n);
1235 
1236  item.append_chunk(tag);
1237  bool compressed = item.get_compressed();
1238 
1239  for (int i = 2; i <= n; i++) {
1240  if (!next(C_, 0)) {
1241  throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1242  }
1243  (void)Item(C_[0].p, C_[0].c).append_chunk(tag);
1244  }
1245  // At this point the cursor is on the last item - calling next will move
1246  // it to the next key (ChertCursor::get_tag() relies on this).
1247  if (!compressed || keep_compressed) RETURN(compressed);
1248 
1249  // FIXME: Perhaps we should decompress each chunk as we read it so we
1250  // don't need both the full compressed and uncompressed tags in memory
1251  // at once.
1252 
1253  string utag;
1254  // May not be enough for a compressed tag, but it's a reasonable guess.
1255  utag.reserve(tag->size() + tag->size() / 2);
1256 
1257  Bytef buf[8192];
1258 
1259  lazy_alloc_inflate_zstream();
1260 
1261  inflate_zstream->next_in =
1262  reinterpret_cast<Bytef*>(const_cast<char *>(tag->data()));
1263  inflate_zstream->avail_in = static_cast<uInt>(tag->size());
1264 
1265  int err = Z_OK;
1266  while (err != Z_STREAM_END) {
1267  inflate_zstream->next_out = buf;
1268  inflate_zstream->avail_out = static_cast<uInt>(sizeof(buf));
1269  err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1270  if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
1271  LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
1272  Bytef header2[4];
1273  aligned_write4(header2, inflate_zstream->adler);
1274  inflate_zstream->next_in = header2;
1275  inflate_zstream->avail_in = 4;
1276  err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1277  if (err == Z_STREAM_END) break;
1278  }
1279 
1280  if (err != Z_OK && err != Z_STREAM_END) {
1281  if (err == Z_MEM_ERROR) throw std::bad_alloc();
1282  string msg = "inflate failed";
1283  if (inflate_zstream->msg) {
1284  msg += " (";
1285  msg += inflate_zstream->msg;
1286  msg += ')';
1287  }
1288  throw Xapian::DatabaseError(msg);
1289  }
1290 
1291  utag.append(reinterpret_cast<const char *>(buf),
1292  inflate_zstream->next_out - buf);
1293  }
1294  if (utag.size() != inflate_zstream->total_out) {
1295  string msg = "compressed tag didn't expand to the expected size: ";
1296  msg += str(utag.size());
1297  msg += " != ";
1298  // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
1299  msg += str(size_t(inflate_zstream->total_out));
1300  throw Xapian::DatabaseCorruptError(msg);
1301  }
1302 
1303  swap(*tag, utag);
1304 
1305  RETURN(false);
1306 }
1307 
1308 void
1310 {
1311  LOGCALL_VOID(DB, "ChertTable::set_full_compaction", parity);
1312  Assert(writable);
1313 
1314  if (parity) seq_count = 0;
1315  full_compaction = parity;
1316 }
1317 
1319  LOGCALL(DB, ChertCursor *, "ChertTable::cursor_get", NO_ARGS);
1320  if (handle < 0) {
1321  if (handle == -2) {
1323  }
1324  RETURN(NULL);
1325  }
1326  // FIXME Ick - casting away const is nasty
1327  RETURN(new ChertCursor(const_cast<ChertTable *>(this)));
1328 }
1329 
1330 /************ B-tree opening and closing ************/
1331 
1332 bool
1333 ChertTable::basic_open(bool revision_supplied, chert_revision_number_t revision_)
1334 {
1335  LOGCALL(DB, bool, "ChertTable::basic_open", revision_supplied | revision_);
1336  int ch = 'X'; /* will be 'A' or 'B' */
1337 
1338  {
1339  const size_t BTREE_BASES = 2;
1340  string err_msg;
1341  static const char basenames[BTREE_BASES] = { 'A', 'B' };
1342 
1343  ChertTable_base bases[BTREE_BASES];
1344  bool base_ok[BTREE_BASES];
1345 
1346  both_bases = true;
1347  bool valid_base = false;
1348  {
1349  for (size_t i = 0; i < BTREE_BASES; ++i) {
1350  bool ok = bases[i].read(name, basenames[i], writable, err_msg);
1351  base_ok[i] = ok;
1352  if (ok) {
1353  valid_base = true;
1354  } else {
1355  both_bases = false;
1356  }
1357  }
1358  }
1359 
1360  if (!valid_base) {
1361  if (handle >= 0) {
1362  ::close(handle);
1363  handle = -1;
1364  }
1365  string message = "Error opening table '";
1366  message += name;
1367  message += "':\n";
1368  message += err_msg;
1369  throw Xapian::DatabaseOpeningError(message);
1370  }
1371 
1372  if (revision_supplied) {
1373  bool found_revision = false;
1374  for (size_t i = 0; i < BTREE_BASES; ++i) {
1375  if (base_ok[i] && bases[i].get_revision() == revision_) {
1376  ch = basenames[i];
1377  found_revision = true;
1378  break;
1379  }
1380  }
1381  if (!found_revision) {
1382  /* Couldn't open the revision that was asked for.
1383  * This shouldn't throw an exception, but should just return
1384  * false to upper levels.
1385  */
1386  RETURN(false);
1387  }
1388  } else {
1389  chert_revision_number_t highest_revision = 0;
1390  for (size_t i = 0; i < BTREE_BASES; ++i) {
1391  if (base_ok[i] && bases[i].get_revision() >= highest_revision) {
1392  ch = basenames[i];
1393  highest_revision = bases[i].get_revision();
1394  }
1395  }
1396  }
1397 
1398  ChertTable_base *basep = 0;
1399  ChertTable_base *other_base = 0;
1400 
1401  for (size_t i = 0; i < BTREE_BASES; ++i) {
1402  LOGLINE(DB, "Checking (ch == " << ch << ") against "
1403  "basenames[" << i << "] == " << basenames[i]);
1404  LOGLINE(DB, "bases[" << i << "].get_revision() == " <<
1405  bases[i].get_revision());
1406  LOGLINE(DB, "base_ok[" << i << "] == " << base_ok[i]);
1407  if (ch == basenames[i]) {
1408  basep = &bases[i];
1409 
1410  // FIXME: assuming only two bases for other_base
1411  size_t otherbase_num = 1 - i;
1412  if (base_ok[otherbase_num]) {
1413  other_base = &bases[otherbase_num];
1414  }
1415  break;
1416  }
1417  }
1418  Assert(basep);
1419 
1420  /* basep now points to the most recent base block */
1421 
1422  /* Avoid copying the bitmap etc. - swap contents with the base
1423  * object in the vector, since it'll be destroyed anyway soon.
1424  */
1425  base.swap(*basep);
1426 
1427  revision_number = base.get_revision();
1428  block_size = base.get_block_size();
1429  root = base.get_root();
1430  level = base.get_level();
1431  if (rare(level >= BTREE_CURSOR_LEVELS))
1432  throw Xapian::DatabaseCorruptError("Impossibly many Btree levels");
1433  //bit_map_size = basep->get_bit_map_size();
1434  item_count = base.get_item_count();
1435  faked_root_block = base.get_have_fakeroot();
1436  sequential = base.get_sequential();
1437 
1438  if (other_base != 0) {
1439  latest_revision_number = other_base->get_revision();
1440  if (revision_number > latest_revision_number)
1441  latest_revision_number = revision_number;
1442  } else {
1443  latest_revision_number = revision_number;
1444  }
1445  }
1446 
1447  /* kt holds constructed items as well as keys */
1448  kt = Item_wr(zeroed_new(block_size));
1449 
1450  set_max_item_size(BLOCK_CAPACITY);
1451 
1452  base_letter = ch;
1453 
1454  if (cursor_created_since_last_modification) {
1455  cursor_created_since_last_modification = false;
1456  ++cursor_version;
1457  }
1458 
1459  /* ready to open the main file */
1460 
1461  RETURN(true);
1462 }
1463 
1464 void
1466 {
1467  LOGCALL_VOID(DB, "ChertTable::read_root", NO_ARGS);
1468  if (faked_root_block) {
1469  /* root block for an unmodified database. */
1470  uint8_t * p = C[0].p;
1471  Assert(p);
1472 
1473  /* clear block - shouldn't be necessary, but is a bit nicer,
1474  * and means that the same operations should always produce
1475  * the same database. */
1476  memset(p, 0, block_size);
1477 
1478  int o = block_size - I2 - K1 - C2 - C2;
1479  Item_wr(p + o).fake_root_item();
1480 
1481  setD(p, DIR_START, o); // its directory entry
1482  SET_DIR_END(p, DIR_START + D2);// the directory size
1483 
1484  o -= (DIR_START + D2);
1485  SET_MAX_FREE(p, o);
1486  SET_TOTAL_FREE(p, o);
1487  SET_LEVEL(p, 0);
1488 
1489  if (!writable) {
1490  /* reading - revision number doesn't matter as long as
1491  * it's not greater than the current one. */
1492  SET_REVISION(p, 0);
1493  C[0].n = 0;
1494  } else {
1495  /* writing - */
1496  SET_REVISION(p, latest_revision_number + 1);
1497  C[0].n = base.next_free_block();
1498  }
1499  } else {
1500  /* using a root block stored on disk */
1501  block_to_cursor(C, level, root);
1502 
1503  if (REVISION(C[level].p) > revision_number) set_overwritten();
1504  /* although this is unlikely */
1505  }
1506 }
1507 
1508 bool
1509 ChertTable::do_open_to_write(bool revision_supplied,
1510  chert_revision_number_t revision_,
1511  bool create_db)
1512 {
1513  LOGCALL(DB, bool, "ChertTable::do_open_to_write", revision_supplied | revision_ | create_db);
1514  if (handle == -2) {
1516  }
1517  handle = io_open_block_wr(name + "DB", create_db);
1518  if (handle < 0) {
1519  // lazy doesn't make a lot of sense with create_db anyway, but ENOENT
1520  // with O_CREAT means a parent directory doesn't exist.
1521  if (lazy && !create_db && errno == ENOENT) {
1522  revision_number = revision_;
1523  RETURN(true);
1524  }
1525  string message(create_db ? "Couldn't create " : "Couldn't open ");
1526  message += name;
1527  message += "DB read/write: ";
1528  errno_to_string(errno, message);
1529  throw Xapian::DatabaseOpeningError(message);
1530  }
1531 
1532  if (!basic_open(revision_supplied, revision_)) {
1533  ::close(handle);
1534  handle = -1;
1535  if (!revision_supplied) {
1536  throw Xapian::DatabaseOpeningError("Failed to open for writing");
1537  }
1538  /* When the revision is supplied, it's not an exceptional
1539  * case when open failed, so we just return false here.
1540  */
1541  RETURN(false);
1542  }
1543 
1544  writable = true;
1545 
1546  for (int j = 0; j <= level; j++) {
1547  C[j].n = BLK_UNUSED;
1548  C[j].p = new uint8_t[block_size];
1549  }
1550  split_p = new uint8_t[block_size];
1551  read_root();
1552 
1553  buffer = zeroed_new(block_size);
1554 
1555  changed_n = 0;
1556  changed_c = DIR_START;
1557  seq_count = SEQ_START_POINT;
1558 
1559  RETURN(true);
1560 }
1561 
1562 ChertTable::ChertTable(const char * tablename_, const string & path_,
1563  bool readonly_, int compress_strategy_, bool lazy_)
1564  : tablename(tablename_),
1565  revision_number(0),
1566  item_count(0),
1567  block_size(0),
1568  latest_revision_number(0),
1569  both_bases(false),
1570  base_letter('A'),
1571  faked_root_block(true),
1572  sequential(true),
1573  handle(-1),
1574  level(0),
1575  root(0),
1576  kt(0),
1577  buffer(0),
1578  base(),
1579  name(path_),
1580  seq_count(0),
1581  changed_n(0),
1582  changed_c(0),
1583  max_item_size(0),
1584  Btree_modified(false),
1585  full_compaction(false),
1586  writable(!readonly_),
1587  cursor_created_since_last_modification(false),
1588  cursor_version(0),
1589  split_p(0),
1590  compress_strategy(compress_strategy_),
1591  deflate_zstream(NULL),
1592  inflate_zstream(NULL),
1593  lazy(lazy_),
1594  last_readahead(BLK_UNUSED)
1595 {
1596  LOGCALL_CTOR(DB, "ChertTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
1597 }
1598 
1599 bool
1601 {
1602  if (handle < 0) {
1603  if (handle == -2) {
1605  }
1606  return true;
1607  }
1608  ChertCursor cur(const_cast<ChertTable*>(this));
1609  cur.find_entry(string());
1610  return !cur.next();
1611 }
1612 
1613 void
1615  if (usual(deflate_zstream)) {
1616  if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
1617  // Try to recover by deleting the stream and starting from scratch.
1618  delete deflate_zstream;
1619  }
1620 
1621  deflate_zstream = new z_stream;
1622 
1623  deflate_zstream->zalloc = Z_NULL;
1624  deflate_zstream->zfree = Z_NULL;
1625  deflate_zstream->opaque = Z_NULL;
1626 
1627  // -15 means raw deflate with 32K LZ77 window (largest)
1628  // memLevel 9 is the highest (8 is default)
1629  int err;
1630  err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
1631  -15, 9, compress_strategy);
1632  if (rare(err != Z_OK)) {
1633  if (err == Z_MEM_ERROR) {
1634  delete deflate_zstream;
1635  deflate_zstream = 0;
1636  throw std::bad_alloc();
1637  }
1638  string msg = "deflateInit2 failed (";
1639  if (deflate_zstream->msg) {
1640  msg += deflate_zstream->msg;
1641  } else {
1642  msg += str(err);
1643  }
1644  msg += ')';
1645  delete deflate_zstream;
1646  deflate_zstream = 0;
1647  throw Xapian::DatabaseError(msg);
1648  }
1649 }
1650 
1651 void
1653  if (usual(inflate_zstream)) {
1654  if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
1655  // Try to recover by deleting the stream and starting from scratch.
1656  delete inflate_zstream;
1657  }
1658 
1659  inflate_zstream = new z_stream;
1660 
1661  inflate_zstream->zalloc = Z_NULL;
1662  inflate_zstream->zfree = Z_NULL;
1663  inflate_zstream->opaque = Z_NULL;
1664 
1665  inflate_zstream->next_in = Z_NULL;
1666  inflate_zstream->avail_in = 0;
1667 
1668  int err = inflateInit2(inflate_zstream, -15);
1669  if (rare(err != Z_OK)) {
1670  if (err == Z_MEM_ERROR) {
1671  delete inflate_zstream;
1672  inflate_zstream = 0;
1673  throw std::bad_alloc();
1674  }
1675  string msg = "inflateInit2 failed (";
1676  if (inflate_zstream->msg) {
1677  msg += inflate_zstream->msg;
1678  } else {
1679  msg += str(err);
1680  }
1681  msg += ')';
1682  delete inflate_zstream;
1683  inflate_zstream = 0;
1684  throw Xapian::DatabaseError(msg);
1685  }
1686 }
1687 
1688 bool
1690  LOGCALL(DB, bool, "ChertTable::exists", NO_ARGS);
1691  RETURN(file_exists(name + "DB") &&
1692  (file_exists(name + "baseA") || file_exists(name + "baseB")));
1693 }
1694 
1695 void
1697 {
1698  LOGCALL_VOID(DB, "ChertTable::erase", NO_ARGS);
1699  close();
1700 
1701  (void)io_unlink(name + "baseA");
1702  (void)io_unlink(name + "baseB");
1703  (void)io_unlink(name + "DB");
1704 }
1705 
1706 void
1707 ChertTable::set_block_size(unsigned int block_size_)
1708 {
1709  LOGCALL_VOID(DB, "ChertTable::set_block_size", block_size_);
1710  // Block size must in the range 2048..BYTE_PAIR_RANGE, and a power of two.
1711  if (block_size_ < 2048 || block_size_ > BYTE_PAIR_RANGE ||
1712  (block_size_ & (block_size_ - 1)) != 0) {
1713  block_size_ = CHERT_DEFAULT_BLOCK_SIZE;
1714  }
1715  block_size = block_size_;
1716 }
1717 
1718 void
1719 ChertTable::create_and_open(unsigned int block_size_)
1720 {
1721  LOGCALL_VOID(DB, "ChertTable::create_and_open", block_size_);
1722  if (handle == -2) {
1724  }
1725  Assert(writable);
1726  close();
1727 
1728  set_block_size(block_size_);
1729 
1730  // FIXME: it would be good to arrange that this works such that there's
1731  // always a valid table in place if you run create_and_open() on an
1732  // existing table.
1733 
1734  /* write initial values to files */
1735 
1736  /* create the base file */
1737  ChertTable_base base_;
1739  base_.set_block_size(block_size);
1740  base_.set_have_fakeroot(true);
1741  base_.set_sequential(true);
1742  // Doing a full sync here would be overly paranoid, as an empty table
1743  // contains no precious data and xapian-check can recreate lost base
1744  // files.
1745  base_.write_to_file(name + "baseA", 'A', string(), -1, NULL);
1746 
1747  /* remove the alternative base file, if any */
1748  (void)io_unlink(name + "baseB");
1749 
1750  // Any errors are thrown if revision_supplied is false.
1751  (void)do_open_to_write(false, 0, true);
1752 }
1753 
1755  LOGCALL_DTOR(DB, "ChertTable");
1757 
1758  if (deflate_zstream) {
1759  // Errors which we care about have already been handled, so just ignore
1760  // any which get returned here.
1761  (void) deflateEnd(deflate_zstream);
1762  delete deflate_zstream;
1763  }
1764 
1765  if (inflate_zstream) {
1766  // Errors which we care about have already been handled, so just ignore
1767  // any which get returned here.
1768  (void) inflateEnd(inflate_zstream);
1769  delete inflate_zstream;
1770  }
1771 }
1772 
1773 void ChertTable::close(bool permanent) {
1774  LOGCALL_VOID(DB, "ChertTable::close", permanent);
1775 
1776  if (handle >= 0) {
1777  // If an error occurs here, we just ignore it, since we're just
1778  // trying to free everything.
1779  (void)::close(handle);
1780  handle = -1;
1781  }
1782 
1783  if (permanent) {
1784  handle = -2;
1785  // Don't delete the resources in the table, since they may
1786  // still be used to look up cached content.
1787  return;
1788  }
1789  for (int j = level; j >= 0; j--) {
1790  delete [] C[j].p;
1791  C[j].p = 0;
1792  }
1793  delete [] split_p;
1794  split_p = 0;
1795 
1796  delete [] kt.get_address();
1797  kt = Item_wr(0);
1798  delete [] buffer;
1799  buffer = 0;
1800 }
1801 
1802 void
1804 {
1805  LOGCALL_VOID(DB, "ChertTable::flush_db", NO_ARGS);
1806  Assert(writable);
1807  if (handle < 0) {
1808  if (handle == -2) {
1810  }
1811  return;
1812  }
1813 
1814  for (int j = level; j >= 0; j--) {
1815  if (C[j].rewrite) {
1816  write_block(C[j].n, C[j].p);
1817  }
1818  }
1819 
1820  if (Btree_modified) {
1821  faked_root_block = false;
1822  }
1823 }
1824 
1825 void
1827  const string * changes_tail)
1828 {
1829  LOGCALL_VOID(DB, "ChertTable::commit", revision | changes_fd | changes_tail);
1830  Assert(writable);
1831 
1832  if (revision <= revision_number) {
1833  throw Xapian::DatabaseError("New revision too low");
1834  }
1835 
1836  if (handle < 0) {
1837  if (handle == -2) {
1839  }
1841  return;
1842  }
1843 
1844  try {
1845  if (faked_root_block) {
1846  /* We will use a dummy bitmap. */
1847  base.clear_bit_map();
1848  }
1849 
1850  base.set_revision(revision);
1851  base.set_root(C[level].n);
1852  base.set_level(level);
1856 
1858 
1859  both_bases = true;
1861  root = C[level].n;
1862 
1863  Btree_modified = false;
1864 
1865  for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
1866  C[i].n = BLK_UNUSED;
1867  C[i].c = -1;
1868  C[i].rewrite = false;
1869  }
1870 
1871  // Save to "<table>.tmp" and then rename to "<table>.base<letter>" so
1872  // that a reader can't try to read a partially written base file.
1873  string tmp = name;
1874  tmp += "tmp";
1875  string basefile = name;
1876  basefile += "base";
1877  basefile += char(base_letter);
1878  base.write_to_file(tmp, base_letter, tablename, changes_fd, changes_tail);
1879 
1880  // Do this as late as possible to allow maximum time for writes to
1881  // happen, and so the calls to io_sync() are adjacent which may be
1882  // more efficient, at least with some Linux kernel versions.
1883  if (changes_tail ? !io_full_sync(handle) : !io_sync(handle)) {
1884  (void)::close(handle);
1885  handle = -1;
1886  (void)unlink(tmp.c_str());
1887  throw Xapian::DatabaseError("Can't commit new revision - failed to flush DB to disk");
1888  }
1889 
1890  if (!io_tmp_rename(tmp, basefile)) {
1891  string msg("Couldn't update base file ");
1892  msg += basefile;
1893  throw Xapian::DatabaseError(msg, errno);
1894  }
1895  base.commit();
1896 
1897  read_root();
1898 
1899  changed_n = 0;
1900  changed_c = DIR_START;
1902  } catch (...) {
1904  throw;
1905  }
1906 }
1907 
1908 void
1910 {
1911  LOGCALL_VOID(DB, "ChertTable::write_changed_blocks", changes_fd);
1912  Assert(changes_fd >= 0);
1913  if (handle < 0) return;
1914  if (faked_root_block) return;
1915 
1916  string buf;
1917  pack_uint(buf, 2u); // Indicate the item is a list of blocks
1918  pack_string(buf, tablename);
1919  pack_uint(buf, block_size);
1920  io_write(changes_fd, buf.data(), buf.size());
1921 
1922  // Compare the old and new bitmaps to find blocks which have changed, and
1923  // write them to the file descriptor.
1924  uint4 n = 0;
1925  uint8_t * p = new uint8_t[block_size];
1926  try {
1928  while (base.find_changed_block(&n)) {
1929  buf.resize(0);
1930  pack_uint(buf, n + 1);
1931  io_write(changes_fd, buf.data(), buf.size());
1932 
1933  // Read block n.
1934  read_block(n, p);
1935 
1936  // Write block n to the file.
1937  io_write(changes_fd, reinterpret_cast<const char *>(p),
1938  block_size);
1939  ++n;
1940  }
1941  delete[] p;
1942  p = 0;
1943  } catch (...) {
1944  delete[] p;
1945  throw;
1946  }
1947  buf.resize(0);
1948  pack_uint(buf, 0u);
1949  io_write(changes_fd, buf.data(), buf.size());
1950 }
1951 
1952 void
1954 {
1955  LOGCALL_VOID(DB, "ChertTable::cancel", NO_ARGS);
1956  Assert(writable);
1957 
1958  if (handle < 0) {
1959  if (handle == -2) {
1961  }
1962  latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1963  return;
1964  }
1965 
1966  // This causes problems: if (!Btree_modified) return;
1967 
1968  string err_msg;
1969  if (!base.read(name, base_letter, writable, err_msg)) {
1970  throw Xapian::DatabaseCorruptError(string("Couldn't reread base ") + base_letter);
1971  }
1972 
1975  root = base.get_root();
1976  level = base.get_level();
1977  if (rare(level >= BTREE_CURSOR_LEVELS))
1978  throw Xapian::DatabaseCorruptError("Impossibly many Btree levels");
1979  //bit_map_size = basep->get_bit_map_size();
1983 
1984  latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1985 
1986  Btree_modified = false;
1987 
1988  for (int j = 0; j <= level; j++) {
1989  C[j].n = BLK_UNUSED;
1990  C[j].rewrite = false;
1991  }
1992  read_root();
1993 
1994  changed_n = 0;
1995  changed_c = DIR_START;
1997 
2000  ++cursor_version;
2001  }
2002 }
2003 
2004 /************ B-tree reading ************/
2005 
2006 bool
2007 ChertTable::do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
2008 {
2009  LOGCALL(DB, bool, "ChertTable::do_open_to_read", revision_supplied | revision_);
2010  if (handle == -2) {
2012  }
2013  handle = io_open_block_rd(name + "DB");
2014  if (handle < 0) {
2015  if (lazy) {
2016  // This table is optional when reading!
2017  revision_number = revision_;
2018  RETURN(true);
2019  }
2020  string message("Couldn't open ");
2021  message += name;
2022  message += "DB to read: ";
2023  errno_to_string(errno, message);
2024  throw Xapian::DatabaseOpeningError(message);
2025  }
2026 
2027  if (!basic_open(revision_supplied, revision_)) {
2028  ::close(handle);
2029  handle = -1;
2030  if (revision_supplied) {
2031  // The requested revision was not available.
2032  // This could be because the database was modified underneath us, or
2033  // because a base file is missing. Return false, and work out what
2034  // the problem was at a higher level.
2035  RETURN(false);
2036  }
2037  throw Xapian::DatabaseOpeningError("Failed to open table for reading");
2038  }
2039 
2040  for (int j = 0; j <= level; j++) {
2041  C[j].n = BLK_UNUSED;
2042  C[j].p = new uint8_t[block_size];
2043  }
2044 
2045  read_root();
2046  RETURN(true);
2047 }
2048 
2049 void
2051 {
2052  LOGCALL_VOID(DB, "ChertTable::open", NO_ARGS);
2053  LOGLINE(DB, "opening at path " << name);
2054  close();
2055 
2056  if (!writable) {
2057  // Any errors are thrown if revision_supplied is false
2058  (void)do_open_to_read(false, 0);
2059  return;
2060  }
2061 
2062  // Any errors are thrown if revision_supplied is false.
2063  (void)do_open_to_write(false, 0);
2064 }
2065 
2066 bool
2068 {
2069  LOGCALL(DB, bool, "ChertTable::open", revision);
2070  LOGLINE(DB, "opening for particular revision at path " << name);
2071  close();
2072 
2073  if (!writable) {
2074  if (do_open_to_read(true, revision)) {
2075  AssertEq(revision_number, revision);
2076  RETURN(true);
2077  } else {
2078  close();
2079  RETURN(false);
2080  }
2081  }
2082 
2083  if (!do_open_to_write(true, revision)) {
2084  // Can't open at the requested revision.
2085  close();
2086  RETURN(false);
2087  }
2088 
2089  AssertEq(revision_number, revision);
2090  RETURN(true);
2091 }
2092 
2093 bool
2094 ChertTable::prev_for_sequential(Cursor * C_, int /*dummy*/) const
2095 {
2096  LOGCALL(DB, bool, "ChertTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2097  int c = C_[0].c;
2098  AssertRel(DIR_START,<=,c);
2099  AssertRel(c,<,DIR_END(C_[0].p));
2100  if (c == DIR_START) {
2101  uint8_t * p = C_[0].p;
2102  Assert(p);
2103  uint4 n = C_[0].n;
2104  while (true) {
2105  if (n == 0) RETURN(false);
2106  n--;
2107  if (writable) {
2108  if (n == C[0].n) {
2109  // Block is a leaf block in the built-in cursor
2110  // (potentially in modified form).
2111  memcpy(p, C[0].p, block_size);
2112  } else {
2113  // Blocks in the built-in cursor may not have been written
2114  // to disk yet, so we have to check that the block number
2115  // isn't in the built-in cursor or we'll read an
2116  // uninitialised block (for which GET_LEVEL(p) will
2117  // probably return 0).
2118  int j;
2119  for (j = 1; j <= level; ++j) {
2120  if (n == C[j].n) break;
2121  }
2122  if (j <= level) continue;
2123 
2124  // Block isn't in the built-in cursor, so the form on disk
2125  // is valid, so read it to check if it's the next level 0
2126  // block.
2127  read_block(n, p);
2128  }
2129  } else {
2130  read_block(n, p);
2131  }
2133  if (REVISION(p) > revision_number + writable) {
2134  set_overwritten();
2135  RETURN(false);
2136  }
2137  if (GET_LEVEL(p) == 0) break;
2138  }
2139  c = DIR_END(p);
2140  C_[0].n = n;
2141  AssertRel(DIR_START,<,c);
2142  }
2143  c -= D2;
2144  C_[0].c = c;
2145  RETURN(true);
2146 }
2147 
2148 bool
2149 ChertTable::next_for_sequential(Cursor * C_, int /*dummy*/) const
2150 {
2151  LOGCALL(DB, bool, "ChertTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2152  uint8_t * p = C_[0].p;
2153  Assert(p);
2154  int c = C_[0].c;
2155  AssertRel(c,<,DIR_END(p));
2156  c += D2;
2157  Assert((unsigned)c < block_size);
2158  if (c == DIR_END(p)) {
2159  uint4 n = C_[0].n;
2160  while (true) {
2161  n++;
2162  if (n > base.get_last_block()) RETURN(false);
2163  if (writable) {
2164  if (n == C[0].n) {
2165  // Block is a leaf block in the built-in cursor
2166  // (potentially in modified form).
2167  memcpy(p, C[0].p, block_size);
2168  } else {
2169  // Blocks in the built-in cursor may not have been written
2170  // to disk yet, so we have to check that the block number
2171  // isn't in the built-in cursor or we'll read an
2172  // uninitialised block (for which GET_LEVEL(p) will
2173  // probably return 0).
2174  int j;
2175  for (j = 1; j <= level; ++j) {
2176  if (n == C[j].n) break;
2177  }
2178  if (j <= level) continue;
2179 
2180  // Block isn't in the built-in cursor, so the form on disk
2181  // is valid, so read it to check if it's the next level 0
2182  // block.
2183  read_block(n, p);
2184  }
2185  } else {
2186  read_block(n, p);
2187  }
2189  if (REVISION(p) > revision_number + writable) {
2190  set_overwritten();
2191  RETURN(false);
2192  }
2193  if (GET_LEVEL(p) == 0) break;
2194  }
2195  c = DIR_START;
2196  C_[0].n = n;
2197  }
2198  C_[0].c = c;
2199  RETURN(true);
2200 }
2201 
2202 bool
2204 {
2205  LOGCALL(DB, bool, "ChertTable::prev_default", Literal("C_") | j);
2206  uint8_t * p = C_[j].p;
2207  int c = C_[j].c;
2208  AssertRel(DIR_START,<=,c);
2209  AssertRel(c,<,DIR_END(p));
2210  AssertRel((unsigned)DIR_END(p),<=,block_size);
2211  if (c == DIR_START) {
2212  if (j == level) RETURN(false);
2213  if (!prev_default(C_, j + 1)) RETURN(false);
2214  c = DIR_END(p);
2215  AssertRel(DIR_START,<,c);
2216  }
2217  c -= D2;
2218  C_[j].c = c;
2219  if (j > 0) {
2220  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2221  }
2222  RETURN(true);
2223 }
2224 
2225 bool
2227 {
2228  LOGCALL(DB, bool, "ChertTable::next_default", Literal("C_") | j);
2229  uint8_t * p = C_[j].p;
2230  int c = C_[j].c;
2231  AssertRel(c,<,DIR_END(p));
2232  AssertRel((unsigned)DIR_END(p),<=,block_size);
2233  c += D2;
2234  if (j > 0) {
2235  AssertRel(DIR_START,<,c);
2236  } else {
2237  AssertRel(DIR_START,<=,c);
2238  }
2239  // Sometimes c can be DIR_END(p) + 2 here it appears...
2240  if (c >= DIR_END(p)) {
2241  if (j == level) RETURN(false);
2242  if (!next_default(C_, j + 1)) RETURN(false);
2243  c = DIR_START;
2244  }
2245  C_[j].c = c;
2246  if (j > 0) {
2247  block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2248 #ifdef BTREE_DEBUG_FULL
2249  printf("Block in ChertTable:next_default");
2250  report_block_full(j - 1, C_[j - 1].n, C_[j - 1].p);
2251 #endif /* BTREE_DEBUG_FULL */
2252  }
2253  RETURN(true);
2254 }
2255 
2256 void
2258 {
2259  throw Xapian::DatabaseClosedError("Database has been closed");
2260 }
2261 
2277 bool Key::operator<(Key key2) const
2278 {
2279  LOGCALL(DB, bool, "Key::operator<", static_cast<const void*>(key2.p));
2280  int key1_len = length();
2281  int key2_len = key2.length();
2282  if (key1_len == key2_len) {
2283  // The keys are the same length, so we can compare the counts
2284  // in the same operation since they're stored as 2 byte
2285  // bigendian numbers.
2286  RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) < 0);
2287  }
2288 
2289  int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
2290 
2291  // Compare the common part of the keys
2292  int diff = memcmp(p + K1, key2.p + K1, k_smaller);
2293  if (diff != 0) RETURN(diff < 0);
2294 
2295  // We dealt with the "same length" case above so we never need to check
2296  // the count here.
2297  RETURN(key1_len < key2_len);
2298 }
2299 
2300 bool Key::operator==(Key key2) const
2301 {
2302  LOGCALL(DB, bool, "Key::operator==", static_cast<const void*>(key2.p));
2303  int key1_len = length();
2304  if (key1_len != key2.length()) RETURN(false);
2305  // The keys are the same length, so we can compare the counts
2306  // in the same operation since they're stored as 2 byte
2307  // bigendian numbers.
2308  RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) == 0);
2309 }
bool get_sequential() const
int close(FD &fd)
Definition: fd.h:63
void set_overwritten() const
Definition: chert_table.cc:251
static uint8_t * zeroed_new(size_t size)
Definition: chert_table.cc:96
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:491
#define RETURN(A)
Definition: debuglog.h:493
bool get_have_fakeroot() const
#define Assert(COND)
Definition: omassert.h:122
char other_base_letter() const
Definition: chert_table.h:711
int io_open_block_rd(const char *fname)
Open a block-based file for reading.
Definition: io_utils.h:38
void set_level(uint4 level_)
int DIR_END(const uint8_t *b)
Definition: chert_table.h:154
bool both_bases
set to true if baseA and baseB both exist as valid bases.
Definition: chert_table.h:742
int c
offset in the block&#39;s directory
Definition: chert_cursor.h:47
void io_write(int fd, const char *p, size_t n)
Write n bytes from block pointed to by p to file descriptor fd.
Definition: io_utils.cc:145
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: chert_cursor.h:66
Indicates an attempt to access a closed database.
Definition: error.h:1097
void add_item(Item_wr kt, int j)
ChertTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
Definition: chert_table.cc:672
#define AssertEq(A, B)
Definition: omassert.h:124
void read_root()
int add_kt(bool found)
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C...
Definition: chert_table.cc:846
Key key() const
Definition: chert_table.h:214
z_stream * inflate_zstream
Zlib state object for inflating.
Definition: chert_table.h:858
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
T get_address() const
Definition: chert_table.h:200
int length() const
Definition: chert_table.h:181
#define true
Definition: header.h:8
bool next()
Advance to the next key.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
int getint4(const unsigned char *p, int c)
Definition: chert_table.h:79
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:180
const size_t BLOCK_CAPACITY
Even for items of at maximum size, it must be possible to get this number of items in a block...
Definition: chert_table.h:105
#define usual(COND)
Definition: config.h:576
const int C2
Definition: chert_table.h:123
bool io_sync(int fd)
Ensure all data previously written to file descriptor fd has been written to disk.
Definition: io_utils.h:73
void compact(uint8_t *p)
compact(p) compact the block at p by shuffling all the items up to the end.
Definition: chert_table.cc:469
void set_full_compaction(bool parity)
const char * tablename
The name of the table (used when writing changesets).
Definition: chert_table.h:716
const int I2
Definition: chert_table.h:121
Convert errno value to std::string, thread-safe if possible.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
bool faked_root_block
true if the root block is faked (not written to disk).
Definition: chert_table.h:751
int delete_kt()
Definition: chert_table.cc:914
int io_open_block_wr(const char *fname, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:67
Btree base file implementation.
uint4 root
the root block of the B-tree
Definition: chert_table.h:771
Item_wr kt
buffer of size block_size for making up key-tag items
Definition: chert_table.h:774
#define STRINGIZE(X)
The STRINGIZE macro converts its parameter into a string constant.
Definition: stringutils.h:36
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
void append_chunk(std::string *tag) const
Definition: chert_table.h:215
#define BLK_UNUSED
Definition: chert_cursor.h:31
STL namespace.
void lazy_alloc_inflate_zstream() const
Allocate the zstream for inflating, if not already allocated.
Convert types to std::string.
chert_tablesize_t item_count
keeps a count of the number of items in the B-tree.
Definition: chert_table.h:728
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
void open()
Open the btree at the latest revision.
Utility functions for testing files.
Cursor C[BTREE_CURSOR_LEVELS]
Definition: chert_table.h:841
#define false
Definition: header.h:9
unsigned long cursor_version
Version count for tracking when cursors need to rebuild.
Definition: chert_table.h:813
#define rare(COND)
Definition: config.h:575
bool exists() const
Determine whether the btree exists on disk.
bool key_exists(const std::string &key) const
Check if a key exists in the Btree.
void create_and_open(unsigned int blocksize)
Create a new empty btree structure on disk and open it at the initial revision.
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
const int DIR_START
Definition: chert_table.h:155
void aligned_write4(unsigned char *ptr, T value)
Definition: wordaccess.h:170
chert_tablesize_t get_item_count() const
void set_have_fakeroot(bool have_fakeroot_)
uint8_t * split_p
Buffer used when splitting a block.
Definition: chert_table.h:848
ChertTable(const ChertTable &)
Copying not allowed.
Hierarchy of classes which Xapian can throw as exceptions.
bool readahead_key(const string &key) const
bool del(const std::string &key)
Delete an entry from the table.
static uint4 block_given_by(const uint8_t *p, int c)
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value...
Definition: pretty.h:45
bool prev_for_sequential(Cursor *C_, int dummy) const
~ChertTable()
Close the Btree.
bool next_for_sequential(Cursor *C_, int dummy) const
bool io_full_sync(int fd)
Definition: io_utils.h:88
void set_sequential(bool sequential_)
uint4 get_block_size() const
uint32_t uint4
Definition: internaltypes.h:32
bool next_default(Cursor *C_, int j) const
DatabaseModifiedError indicates a database was modified.
Definition: error.h:539
void cancel()
Cancel any outstanding changes.
functions for reading and writing different width words
uint4 get_level() const
void errno_to_string(int e, string &s)
int size() const
I in diagram above.
Definition: chert_table.h:202
void form_null_key(uint4 n)
Form an item with a null key and with block number n in the tag.
Definition: chert_table.h:286
void set_block_given_by(uint4 n)
Set this item&#39;s tag to point to block n (this block should not be at level 0).
Definition: chert_table.h:273
Interface to Btree cursors.
bool do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
Perform the opening operation to read.
std::string name
The path name of the B tree.
Definition: chert_table.h:783
bool rewrite
true if the block is not the same as on disk, and so needs rewriting
Definition: chert_cursor.h:58
void SET_LEVEL(uint8_t *b, int x)
Definition: chert_table.h:158
uint8_t * p
pointer to a block
Definition: chert_cursor.h:45
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:228
int GET_LEVEL(const uint8_t *b)
Definition: chert_table.h:151
bool do_open_to_write(bool revision_supplied, chert_revision_number_t revision_, bool create_db=false)
Perform the opening operation to write.
bool operator<(Key key2) const
Compares this key with key2.
chert_revision_number_t revision_number
revision number of the opened B-tree.
Definition: chert_table.h:725
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
void set_root(uint4 root_)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void erase()
Erase this table from disk.
static int find_in_block(const uint8_t *p, Key key, bool leaf, int c)
find_in_block(p, key, leaf, c) searches for the key in the block at p.
Definition: chert_table.cc:386
bool io_tmp_rename(const std::string &tmp_file, const std::string &real_file)
Rename a temporary file to its final position.
Definition: io_utils.cc:271
void set_block_size(unsigned int block_size_)
Set the block size.
void SET_DIR_END(uint8_t *b, int x)
Definition: chert_table.h:161
bool writable
Set to true when the database is opened to write.
Definition: chert_table.h:807
const int K1
Definition: chert_table.h:120
bool find_changed_block(uint4 *n) const
Find the first changed block at or after position *n.
bool read(const std::string &name, char ch, bool read_bitmap, std::string &err_msg)
Read values from a base file.
void close(bool permanent=false)
Close the Btree.
bool really_empty() const
Return true if there are no entries in the table.
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
void set_item_count(chert_tablesize_t item_count_)
bool basic_open(bool revision_supplied, chert_revision_number_t revision)
#define BYTE_PAIR_RANGE
Definition: chert_table.cc:166
void setD(unsigned char *p, int c, int x)
Definition: chert_table.h:128
void write_changed_blocks(int changes_fd)
Append the list of blocks changed to a changeset file.
bool sequential
true iff the data has been written in a single write in sequential order.
Definition: chert_table.h:756
#define AssertParanoid(COND)
Definition: omassert.h:129
void enter_key(int j, Key prevkey, Key newkey)
enter_key(j, prevkey, newkey) is called after a block split.
Definition: chert_table.cc:538
z_stream * deflate_zstream
Zlib state object for deflating.
Definition: chert_table.h:855
bool prev_default(Cursor *C_, int j) const
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
uint4 get_root() const
bool find(Cursor *) const
find(C_) searches for the key of B->kt in the B-tree.
Definition: chert_table.cc:433
uint4 changed_n
the last block to be changed by an addition
Definition: chert_table.h:791
void commit(chert_revision_number_t revision, int changes_fd=-1, const std::string *changes_tail=NULL)
Commit any outstanding changes to the table.
void lazy_alloc_deflate_zstream() const
Allocate the zstream for deflating, if not already allocated.
uint4 block_given_by() const
Get this item&#39;s tag as a block number (this block should not be at level 0).
Definition: chert_table.h:224
void set_block_size(uint4 block_size_)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
void fake_root_item()
Definition: chert_table.h:314
int mid_point(uint8_t *p) const
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in ...
Definition: chert_table.cc:604
void block_to_cursor(Cursor *C_, int j, uint4 n) const
Definition: chert_table.cc:272
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
Definition: chert_table.cc:978
uint4 get_revision() const
bool get_compressed() const
Definition: chert_table.h:207
int MAX_FREE(const uint8_t *b)
Definition: chert_table.h:152
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
int seq_count
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POIN...
Definition: chert_table.h:788
#define CHERT_DEFAULT_BLOCK_SIZE
The default block size to use in a B-tree table.
Definition: chert_types.h:57
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
Btree implementation.
uint4 n
block number
Definition: chert_cursor.h:56
void alter()
Btree::alter(); is called when the B-tree is to be altered.
Definition: chert_table.cc:338
char name[9]
Definition: dbcheck.cc:55
#define SEQ_START_POINT
Flip to sequential addition block-splitting after this number of observed sequential additions (in ne...
Definition: chert_table.cc:151
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Definition: pack.h:477
bool Btree_modified
Set to true the first time the B-tree is modified.
Definition: chert_table.h:801
#define CHERT_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: chert_table.h:101
void delete_item(int j, bool repeatedly)
ChertTable::delete_item(j, repeatedly) is (almost) the converse of add_item.
Definition: chert_table.cc:771
const int DONT_COMPRESS
Definition: chert_table.h:94
int components_of() const
Definition: chert_table.h:211
char base_letter
the value &#39;A&#39; or &#39;B&#39; of the current base
Definition: chert_table.h:745
void write_to_file(const std::string &filename, char base_letter, const std::string &tablename, int changes_fd, const std::string *changes_tail)
Write the btree base file to disk.
chert_revision_number_t latest_revision_number
Revision number of the other base, or zero if there is only one base file.
Definition: chert_table.h:736
unsigned int block_size
block size of the B tree in bytes
Definition: chert_table.h:731
int changed_c
directory offset corresponding to last block to be changed by an addition
Definition: chert_table.h:795
Pack types into strings and unpack them again.
int handle
File descriptor of the table.
Definition: chert_table.h:765
const int D2
Definition: chert_table.h:122
Wrappers for low-level POSIX I/O routines.
const int BTREE_CURSOR_LEVELS
Definition: chert_table.h:325
Various handy helpers which std::string really should provide.
void form_key(const std::string &key) const
Definition: chert_table.cc:948
uint4 get_last_block() const
void read_block(uint4 n, uint8_t *p) const
read_block(n, p) reads block n of the DB file to address p.
Definition: chert_table.cc:170
void flush_db()
Flush any outstanding changes to the DB file of the table.
Definition: header.h:151
unsigned REVISION(const uint8_t *b)
Definition: chert_table.h:150
ChertTable_base base
For writing back as file baseA or baseB.
Definition: chert_table.h:780
bool operator==(Key key2) const
bool cursor_created_since_last_modification
Flag for tracking when cursors need to rebuild.
Definition: chert_table.h:810
void write_block(uint4 n, const uint8_t *p) const
write_block(n, p) writes block n in the DB file from address p.
Definition: chert_table.cc:199
Various assertion macros.
#define LOGLINE(a, b)
Definition: debuglog.h:494
void set_revision(uint4 revision_)
DatabaseError indicates some sort of database related error.
Definition: error.h:367
void split_root(uint4 split_n)
Btree needs to gain a new level to insert more items: so split root block and construct a new one...
Definition: chert_table.cc:493
const size_t COMPRESS_MIN
Definition: chert_table.cc:71
uint8_t * buffer
buffer of size block_size for reforming blocks
Definition: chert_table.h:777
static void throw_database_closed()
Throw an exception indicating that the database is closed.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
void SET_REVISION(uint8_t *b, uint4 rev)
Definition: chert_table.h:157
int level
number of levels, counting from 0
Definition: chert_table.h:768
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
void SET_MAX_FREE(uint8_t *b, int x)
Definition: chert_table.h:159
bool lazy
If true, don&#39;t create the table until it&#39;s needed.
Definition: chert_table.h:861
void set_key_and_block(Key newkey, int truncate_size, uint4 n)
Definition: chert_table.h:250
Debug logging macros.
const uint8_t * p
Definition: chert_table.h:168
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
int TOTAL_FREE(const uint8_t *b)
Definition: chert_table.h:153
void add_item_to_block(uint8_t *p, Item_wr kt, int c)
add_item_to_block(p, kt_, c) adds item kt_ to the block at p.
Definition: chert_table.cc:636
#define C(X)
void SET_TOTAL_FREE(uint8_t *b, int x)
Definition: chert_table.h:160
bool read_tag(Cursor *C_, std::string *tag, bool keep_compressed) const
Read the tag value for the key pointed to by cursor C_.
const uint8_t * get_address() const
Definition: chert_table.h:171
int compress_strategy
DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY, Z_RLE.
Definition: chert_table.h:852