xapian-core  1.4.29
glass_table.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2025 Olly Betts
7  * Copyright 2008 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "glass_table.h"
28 
29 #include <xapian/error.h>
30 
31 #include "omassert.h"
32 #include "posixy_wrapper.h"
33 #include "str.h"
34 #include "stringutils.h" // For STRINGIZE().
35 
36 #include <sys/types.h>
37 
38 #include <cerrno>
39 #include <cstring> /* for memmove */
40 #include <climits> /* for CHAR_BIT */
41 
42 #include "glass_freelist.h"
43 #include "glass_changes.h"
44 #include "glass_cursor.h"
45 #include "glass_defs.h"
46 #include "glass_version.h"
47 
48 #include "debuglog.h"
49 #include "filetests.h"
50 #include "io_utils.h"
51 #include "pack.h"
52 #include "wordaccess.h"
53 
54 #include <algorithm> // for std::min()
55 #include <string>
56 
57 #include "xapian/constants.h"
58 
59 using namespace Glass;
60 using namespace std;
61 
62 //#define BTREE_DEBUG_FULL 1
63 #undef BTREE_DEBUG_FULL
64 
65 #ifdef BTREE_DEBUG_FULL
66 /*------debugging aids from here--------*/
67 
68 static void print_key(const uint8_t * p, int c, int j);
69 static void print_tag(const uint8_t * p, int c, int j);
70 
71 /*
72 static void report_cursor(int N, Btree * B, Glass::Cursor * C)
73 {
74  int i;
75  printf("%d)\n", N);
76  for (i = 0; i <= B->level; i++)
77  printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
78  C[i].p, C[i].c, C[i].n, C[i].rewrite);
79 }
80 */
81 
82 /*------to here--------*/
83 #endif /* BTREE_DEBUG_FULL */
84 
85 static inline uint8_t *zeroed_new(size_t size)
86 {
87  uint8_t *temp = new uint8_t[size];
88  memset(temp, 0, size);
89  return temp;
90 }
91 
92 /* A B-tree consists of a file with extension GLASS_TABLE_EXTENSION divided
93  into a sequence of equal sized blocks, numbered 0, 1, 2 ... some of which
94  are free, some in use. Those in use are arranged in a tree. Lists of
95  currently unused blocks are stored in a freelist which is itself stored in
96  unused blocks.
97 
98  Some "root info" is needed to locate a particular revision of the B-tree
99  - this is stored in the "version file" (we store this data for all tables
100  at a particular revision together).
101 
102  Each block, b, has a structure like this:
103 
104  R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
105  <---------- D ----------> <-M->
106 
107  And then,
108 
109  R = REVISION(b) is the revision number the B-tree had when the block was
110  written into the DB file.
111  L = GET_LEVEL(b) is the level of the block, which is the number of levels
112  towards the root of the B-tree structure. So leaf blocks
113  have level 0 and the one root block has the highest level
114  equal to the number of levels in the B-tree. For blocks
115  storing the freelist, the level is set to 254.
116  M = MAX_FREE(b) is the size of the gap between the end of the directory and
117  the first item of data. (It is not necessarily the maximum
118  size among the bits of space that are free, but I can't
119  think of a better name.)
120  T = TOTAL_FREE(b)is the total amount of free space left in b.
121  D = DIR_END(b) gives the offset to the end of the directory.
122 
123  o1, o2 ... oN are a directory of offsets to the N items held in the block.
124  The items are key-tag pairs, and as they occur in the directory are ordered
125  by the keys.
126 
127  An item has this form:
128 
129  I K key X tag
130  ←K→
131  <------I---->
132 
133  A long tag presented through the API is split up into C pieces small enough
134  to be accommodated in the blocks of the B-tree. The key is extended to
135  include a counter, x, which runs from 1 to C. The key is preceded by a
136  length, K, and the whole item with a length, I, as depicted above. The
137  upper bits of I encode a flag indicating if this item is compressed, and a
138  flag saying if this is the last piece of a tag (i.e. if x == C).
139 
140  Here are the corresponding definitions:
141 
142 */
143 
146 #define SEQ_START_POINT (-10)
147 
148 /* Note use of the limits.h values:
149  UCHAR_MAX = 255, an unsigned with all bits set, and
150  CHAR_BIT = 8, the number of bits per byte
151 
152  BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
153  bytes -- 64K effectively.
154 */
155 
156 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
157 
159 void
160 GlassTable::read_block(uint4 n, uint8_t * p) const
161 {
162  // Log the value of p, not the contents of the block it points to...
163  LOGCALL_VOID(DB, "GlassTable::read_block", n | (void*)p);
164  if (rare(handle == -2))
166  AssertRel(n,<,free_list.get_first_unused_block());
167 
168  io_read_block(handle, reinterpret_cast<char *>(p), block_size, n, offset);
169 
170  if (GET_LEVEL(p) != LEVEL_FREELIST) {
171  int dir_end = DIR_END(p);
172  if (rare(dir_end < DIR_START || unsigned(dir_end) > block_size)) {
173  string msg("dir_end invalid in block ");
174  msg += str(n);
175  throw Xapian::DatabaseCorruptError(msg);
176  }
177  }
178 }
179 
190 void
191 GlassTable::write_block(uint4 n, const uint8_t * p, bool appending) const
192 {
193  LOGCALL_VOID(DB, "GlassTable::write_block", n | p | appending);
194  Assert(writable);
195  /* Check that n is in range. */
196  AssertRel(n,<,free_list.get_first_unused_block());
197 
198  /* don't write to non-free */
199  // FIXME: We can no longer check this easily.
200  // AssertParanoid(free_list.block_free_at_start(block_size, n));
201 
202  /* write revision is okay */
203  AssertEqParanoid(REVISION(p), revision_number + 1);
204 
205  if (appending) {
206  // In the case of the freelist, new entries can safely be written into
207  // the space at the end of the latest freelist block, as that's unused
208  // by previous revisions. It is also safe to write into a block which
209  // wasn't used in the previous revision.
210  //
211  // It's only when we start a new freelist block that we need to worry
212  // about invalidating old revisions.
213  } else if (flags & Xapian::DB_DANGEROUS) {
214  // FIXME: We should somehow flag this to prevent readers opening the
215  // database. Removing "iamglass" or setting a flag in it doesn't deal
216  // with any existing readers though. Perhaps we want to have readers
217  // read lock and try to take an exclusive lock here?
218  }
219 
220  const char * p_char = reinterpret_cast<const char *>(p);
221  io_write_block(handle, p_char, block_size, n, offset);
222 
223  if (!changes_obj) return;
224 
225  unsigned char v;
226  // FIXME: track table_type in this class?
227  if (strcmp(tablename, "position") == 0) {
228  v = int(Glass::POSITION);
229  } else if (strcmp(tablename, "postlist") == 0) {
230  v = int(Glass::POSTLIST);
231  } else if (strcmp(tablename, "docdata") == 0) {
232  v = int(Glass::DOCDATA);
233  } else if (strcmp(tablename, "spelling") == 0) {
234  v = int(Glass::SPELLING);
235  } else if (strcmp(tablename, "synonym") == 0) {
236  v = int(Glass::SYNONYM);
237  } else if (strcmp(tablename, "termlist") == 0) {
238  v = int(Glass::TERMLIST);
239  } else {
240  return; // FIXME
241  }
242 
243  if (block_size == 2048) {
244  v |= 0 << 3;
245  } else if (block_size == 4096) {
246  v |= 1 << 3;
247  } else if (block_size == 8192) {
248  v |= 2 << 3;
249  } else if (block_size == 16384) {
250  v |= 3 << 3;
251  } else if (block_size == 32768) {
252  v |= 4 << 3;
253  } else if (block_size == 65536) {
254  v |= 5 << 3;
255  } else {
256  return; // FIXME
257  }
258 
259  string buf;
260  buf += char(v);
261  // Write the block number to the file
262  pack_uint(buf, n);
263 
264  changes_obj->write_block(buf);
265  changes_obj->write_block(reinterpret_cast<const char *>(p), block_size);
266 }
267 
268 /* A note on cursors:
269 
270  Each B-tree level has a corresponding array element C[j] in a
271  cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
272  root block level. Within a level j,
273 
274  C[j].p addresses the block
275  C[j].c is the offset into the directory entry in the block
276  C[j].n is the number of the block at C[j].p
277 
278  A look up in the B-tree causes navigation of the blocks starting
279  from the root. In each block, p, we find an offset, c, to an item
280  which gives the number, n, of the block for the next level. This
281  leads to an array of values p,c,n which are held inside the cursor.
282 
283  Any Btree object B has a built-in cursor, at B->C. But other cursors may
284  be created. If BC is a created cursor, BC->C is the cursor in the
285  sense given above, and BC->B is the handle for the B-tree again.
286 */
287 
288 void
290 {
291  LOGCALL_VOID(DB, "GlassTable::set_overwritten", NO_ARGS);
292  // If we're writable, there shouldn't be another writer who could cause
293  // overwritten to be flagged, so that's a DatabaseCorruptError.
294  if (writable)
295  throw Xapian::DatabaseCorruptError("Block overwritten - run xapian-check on this database");
296  throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
297 }
298 
299 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
300  C, writing the block currently at C[j] back to disk if necessary.
301  Note that
302 
303  C[j].rewrite
304 
305  is true iff C[j].n is different from block n in file DB. If it is
306  false no rewriting is necessary.
307 */
308 
309 void
311 {
312  LOGCALL_VOID(DB, "GlassTable::block_to_cursor", (void*)C_ | j | n);
313  if (n == C_[j].get_n()) return;
314 
315  if (writable && C_[j].rewrite) {
316  Assert(C == C_);
317  write_block(C_[j].get_n(), C_[j].get_p());
318  C_[j].rewrite = false;
319  }
320 
321  // Check if the block is in the built-in cursor (potentially in
322  // modified form).
323  const uint8_t * p;
324  if (n == C[j].get_n()) {
325  p = C_[j].clone(C[j]);
326  } else {
327  uint8_t * q = C_[j].init(block_size);
328  read_block(n, q);
329  p = q;
330  C_[j].set_n(n);
331  }
332 
333  if (j < level) {
334  /* unsigned comparison */
335  if (rare(REVISION(p) > REVISION(C_[j + 1].get_p()))) {
336  set_overwritten();
337  return;
338  }
339  }
340 
341  if (rare(j != GET_LEVEL(p))) {
342  string msg = "Expected block ";
343  msg += str(n);
344  msg += " to be level ";
345  msg += str(j);
346  msg += ", not ";
347  msg += str(GET_LEVEL(p));
348  throw Xapian::DatabaseCorruptError(msg);
349  }
350 }
351 
373 void
375 {
376  LOGCALL_VOID(DB, "GlassTable::alter", NO_ARGS);
377  Assert(writable);
378  if (flags & Xapian::DB_DANGEROUS) {
379  C[0].rewrite = true;
380  return;
381  }
382  int j = 0;
383  while (true) {
384  if (C[j].rewrite) return; /* all new, so return */
385  C[j].rewrite = true;
386 
387  glass_revision_number_t rev = REVISION(C[j].get_p());
388  if (rev == revision_number + 1) {
389  return;
390  }
391  Assert(rev < revision_number + 1);
392  uint4 n = C[j].get_n();
393  free_list.mark_block_unused(this, block_size, n);
394  SET_REVISION(C[j].get_modifiable_p(block_size), revision_number + 1);
395  n = free_list.get_block(this, block_size);
396  C[j].set_n(n);
397 
398  if (j == level) return;
399  j++;
400  BItem_wr(C[j].get_modifiable_p(block_size), C[j].c).set_block_given_by(n);
401  }
402 }
403 
417 int
418 GlassTable::find_in_leaf(const uint8_t * p, LeafItem item, int c, bool& exact)
419 {
420  LOGCALL_STATIC(DB, int, "GlassTable::find_in_leaf", (const void*)p | (const void *)item.get_address() | c | Literal("bool&"));
421  // c should be odd (either -1, or an even offset from DIR_START).
422  Assert((c & 1) == 1);
423  int i = DIR_START;
424  i -= D2;
425  if (c != -1) {
426  AssertRel(i,<=,c);
427  }
428  int j = DIR_END(p);
429 
430  if (c != -1) {
431  if (c < j && i < c) {
432  int r = compare(LeafItem(p, c), item);
433  if (r == 0) {
434  exact = true;
435  return c;
436  }
437  if (r < 0) i = c;
438  }
439  c += D2;
440  if (c < j && i < c) {
441  int r = compare(item, LeafItem(p, c));
442  if (r == 0) {
443  exact = true;
444  return c;
445  }
446  if (r < 0) j = c;
447  }
448  }
449 
450  while (j - i > D2) {
451  int k = i + ((j - i) / (D2 * 2)) * D2; /* mid way */
452  int r = compare(item, LeafItem(p, k));
453  if (r < 0) {
454  j = k;
455  } else {
456  i = k;
457  if (r == 0) {
458  exact = true;
459  break;
460  }
461  }
462  }
463  AssertRel(DIR_START - D2,<=,i);
464  AssertRel(i,<,DIR_END(p));
465  RETURN(i);
466 }
467 
468 template<typename ITEM> int
469 find_in_branch_(const uint8_t * p, ITEM item, int c)
470 {
471  // c should be odd (either -1, or an even offset from DIR_START).
472  Assert((c & 1) == 1);
473  int i = DIR_START;
474  if (c != -1) {
475  AssertRel(i,<=,c);
476  }
477  int j = DIR_END(p);
478 
479  if (c != -1) {
480  if (c < j && i < c) {
481  int r = compare(BItem(p, c), item);
482  if (r == 0) return c;
483  if (r < 0) i = c;
484  }
485  c += D2;
486  if (c < j && i < c) {
487  int r = compare(item, BItem(p, c));
488  if (r == 0) return c;
489  if (r < 0) j = c;
490  }
491  }
492 
493  while (j - i > D2) {
494  int k = i + ((j - i) / (D2 * 2)) * D2; /* mid way */
495  int r = compare(item, BItem(p, k));
496  if (r < 0) {
497  j = k;
498  } else {
499  i = k;
500  if (r == 0) break;
501  }
502  }
503  AssertRel(DIR_START,<=,i);
504  AssertRel(i,<,DIR_END(p));
505  return i;
506 }
507 
508 int
509 GlassTable::find_in_branch(const uint8_t * p, LeafItem item, int c)
510 {
511  LOGCALL_STATIC(DB, int, "GlassTable::find_in_branch", (const void*)p | (const void *)item.get_address() | c);
512  RETURN(find_in_branch_(p, item, c));
513 }
514 
515 int
516 GlassTable::find_in_branch(const uint8_t * p, BItem item, int c)
517 {
518  LOGCALL_STATIC(DB, int, "GlassTable::find_in_branch", (const void*)p | (const void *)item.get_address() | c);
519  RETURN(find_in_branch_(p, item, c));
520 }
521 
529 bool
531 {
532  LOGCALL(DB, bool, "GlassTable::find", (void*)C_);
533  // Note: the parameter is needed when we're called by GlassCursor
534  const uint8_t * p;
535  int c;
536  for (int j = level; j > 0; --j) {
537  p = C_[j].get_p();
538  c = find_in_branch(p, kt, C_[j].c);
539 #ifdef BTREE_DEBUG_FULL
540  printf("Block in GlassTable:find - code position 1");
541  report_block_full(j, C_[j].get_n(), p);
542 #endif /* BTREE_DEBUG_FULL */
543  C_[j].c = c;
544  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
545  }
546  p = C_[0].get_p();
547  bool exact = false;
548  c = find_in_leaf(p, kt, C_[0].c, exact);
549 #ifdef BTREE_DEBUG_FULL
550  printf("Block in GlassTable:find - code position 2");
551  report_block_full(0, C_[0].get_n(), p);
552 #endif /* BTREE_DEBUG_FULL */
553  C_[0].c = c;
554  RETURN(exact);
555 }
556 
562 void
564 {
565  LOGCALL_VOID(DB, "GlassTable::compact", (void*)p);
566  Assert(p != buffer);
567  Assert(writable);
568  int e = block_size;
569  uint8_t * b = buffer;
570  int dir_end = DIR_END(p);
571  if (GET_LEVEL(p) == 0) {
572  // Leaf.
573  for (int c = DIR_START; c < dir_end; c += D2) {
574  LeafItem item(p, c);
575  int l = item.size();
576  e -= l;
577  memcpy(b + e, item.get_address(), l);
578  LeafItem_wr::setD(p, c, e); /* reform in b */
579  }
580  } else {
581  // Branch.
582  for (int c = DIR_START; c < dir_end; c += D2) {
583  BItem item(p, c);
584  int l = item.size();
585  e -= l;
586  memcpy(b + e, item.get_address(), l);
587  BItem_wr::setD(p, c, e); /* reform in b */
588  }
589  }
590  memcpy(p + e, b + e, block_size - e); /* copy back */
591  e -= dir_end;
592  SET_TOTAL_FREE(p, e);
593  SET_MAX_FREE(p, e);
594 }
595 
599 void
601 {
602  LOGCALL_VOID(DB, "GlassTable::split_root", split_n);
603  /* gain a level */
604  ++level;
605 
606  /* check level overflow - this isn't something that should ever happen
607  * but deserves more than an Assert()... */
608  if (level == GLASS_BTREE_CURSOR_LEVELS) {
609  throw Xapian::DatabaseCorruptError("Btree has grown impossibly large ("
611  " levels)");
612  }
613 
614  uint8_t * q = C[level].init(block_size);
615  memset(q, 0, block_size);
616  C[level].c = DIR_START;
617  C[level].set_n(free_list.get_block(this, block_size));
618  C[level].rewrite = true;
619  SET_REVISION(q, revision_number + 1);
620  SET_LEVEL(q, level);
622  compact(q); /* to reset TOTAL_FREE, MAX_FREE */
623 
624  /* form a null key in b with a pointer to the old root */
625  uint8_t b[10]; /* 7 is exact */
626  BItem_wr item(b);
627  item.form_null_key(split_n);
628  add_branch_item(item, level);
629 }
630 
646 void
648 {
649  LOGCALL_VOID(DB, "GlassTable::enter_key_above_leaf", Literal("previtem") | Literal("newitem"));
650  Assert(writable);
651  Assert(compare(previtem, newitem) < 0);
652 
653  Key prevkey = previtem.key();
654  Key newkey = newitem.key();
655  int new_comp = newitem.component_of();
656 
657  uint4 blocknumber = C[0].get_n();
658 
659  // FIXME update to use Key
660  // Keys are truncated here: but don't truncate the count at the end away.
661  const int newkey_len = newkey.length();
662  AssertRel(newkey_len,>,0);
663 
664  // Truncate the key to the minimal key which differs from prevkey,
665  // the preceding key in the block.
666  int i = 0;
667  const int min_len = min(newkey_len, prevkey.length());
668  while (i < min_len && prevkey[i] == newkey[i]) {
669  i++;
670  }
671 
672  // Want one byte of difference.
673  if (i < newkey_len) i++;
674 
675  // Enough space for a branch item with maximum length key.
676  uint8_t b[BYTES_PER_BLOCK_NUMBER + K1 + 255 + X2];
677  BItem_wr item(b);
678  AssertRel(i, <=, 255);
679  item.set_truncated_key_and_block(newkey, new_comp, i, blocknumber);
680 
681  // The split block gets inserted into the parent after the pointer to the
682  // current child.
683  AssertEq(C[1].c, find_in_branch(C[1].get_p(), item, C[1].c));
684  C[1].c += D2;
685  C[1].rewrite = true; /* a subtle point: this *is* required. */
686  add_branch_item(item, 1);
687 }
688 
702 void
704 {
705  LOGCALL_VOID(DB, "GlassTable::enter_key_above_branch", j | Literal("newitem"));
706  Assert(writable);
707  AssertRel(j,>,1);
708 
709  /* Can't truncate between branch levels, since the separated keys
710  * are in at the leaf level, and truncating again will change the
711  * branch point.
712  */
713 
714  uint4 blocknumber = C[j - 1].get_n();
715 
716  // Enough space for a branch item with maximum length key.
717  uint8_t b[BYTES_PER_BLOCK_NUMBER + K1 + 255 + X2];
718  BItem_wr item(b);
719  item.set_key_and_block(newitem.key(), blocknumber);
720 
721  // The split block gets inserted into the parent after the pointer to the
722  // current child.
723  AssertEq(C[j].c, find_in_branch(C[j].get_p(), item, C[j].c));
724  C[j].c += D2;
725  C[j].rewrite = true; /* a subtle point: this *is* required. */
726  add_branch_item(item, j);
727 }
728 
733 int
734 GlassTable::mid_point(uint8_t * p) const
735 {
736  LOGCALL(DB, int, "GlassTable::mid_point", (void*)p);
737  int n = 0;
738  int dir_end = DIR_END(p);
739  int size = block_size - TOTAL_FREE(p) - dir_end;
740  for (int c = DIR_START; c < dir_end; c += D2) {
741  int l;
742  if (GET_LEVEL(p) == 0) {
743  l = LeafItem(p, c).size();
744  } else {
745  l = BItem(p, c).size();
746  }
747  n += 2 * l;
748  if (n >= size) {
749  if (l < n - size) RETURN(c);
750  RETURN(c + D2);
751  }
752  }
753 
754  /* This shouldn't happen, as the sum of the item sizes should be the same
755  * as the value calculated in size, so assert but return a sane value just
756  * in case. */
757  Assert(false);
758  RETURN(dir_end);
759 }
760 
770 void
771 GlassTable::add_item_to_leaf(uint8_t * p, LeafItem kt_, int c)
772 {
773  LOGCALL_VOID(DB, "GlassTable::add_item_to_leaf", (void*)p | Literal("kt_") | c);
774  Assert(writable);
775  int dir_end = DIR_END(p);
776  int kt_len = kt_.size();
777  int needed = kt_len + D2;
778  int new_total = TOTAL_FREE(p) - needed;
779  int new_max = MAX_FREE(p) - needed;
780 
781  Assert(new_total >= 0);
782 
783  AssertRel(MAX_FREE(p),>=,needed);
784 
785  AssertRel(DIR_START,<=,c);
786  AssertRel(c,<=,dir_end);
787 
788  memmove(p + c + D2, p + c, dir_end - c);
789  dir_end += D2;
790  SET_DIR_END(p, dir_end);
791 
792  int o = dir_end + new_max;
793  LeafItem_wr::setD(p, c, o);
794  memmove(p + o, kt_.get_address(), kt_len);
795 
796  SET_MAX_FREE(p, new_max);
797 
798  SET_TOTAL_FREE(p, new_total);
799 }
800 
810 void
811 GlassTable::add_item_to_branch(uint8_t * p, BItem kt_, int c)
812 {
813  LOGCALL_VOID(DB, "GlassTable::add_item_to_branch", (void*)p | Literal("kt_") | c);
814  Assert(writable);
815  int dir_end = DIR_END(p);
816  int kt_len = kt_.size();
817  int needed = kt_len + D2;
818  int new_total = TOTAL_FREE(p) - needed;
819  int new_max = MAX_FREE(p) - needed;
820 
821  Assert(new_total >= 0);
822 
823  AssertRel(MAX_FREE(p),>=,needed);
824 
825  AssertRel(DIR_START,<=,c);
826  AssertRel(c,<=,dir_end);
827 
828  memmove(p + c + D2, p + c, dir_end - c);
829  dir_end += D2;
830  SET_DIR_END(p, dir_end);
831 
832  int o = dir_end + new_max;
833  BItem_wr::setD(p, c, o);
834  memmove(p + o, kt_.get_address(), kt_len);
835 
836  SET_MAX_FREE(p, new_max);
837 
838  SET_TOTAL_FREE(p, new_total);
839 }
840 
846 void
848 {
849  LOGCALL_VOID(DB, "GlassTable::add_leaf_item", Literal("kt_"));
850  Assert(writable);
851  uint8_t * p = C[0].get_modifiable_p(block_size);
852  int c = C[0].c;
853  uint4 n;
854 
855  int needed = kt_.size() + D2;
856  if (TOTAL_FREE(p) < needed) {
857  int m;
858  // Prepare to split p. After splitting, the block is in two halves, the
859  // lower half is split_p, the upper half p again. add_to_upper_half
860  // becomes true when the item gets added to p, false when it gets added
861  // to split_p.
862 
863  if (seq_count < 0) {
864  // If we're not in sequential mode, we split at the mid point
865  // of the node.
866  m = mid_point(p);
867  } else {
868  // During sequential addition, split at the insert point
869  AssertRel(c,>=,DIR_START);
870  m = c;
871  }
872 
873  uint4 split_n = C[0].get_n();
874  C[0].set_n(free_list.get_block(this, block_size));
875 
876  memcpy(split_p, p, block_size); // replicate the whole block in split_p
877  SET_DIR_END(split_p, m);
878  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
879 
880  {
881  int residue = DIR_END(p) - m;
882  int new_dir_end = DIR_START + residue;
883  memmove(p + DIR_START, p + m, residue);
884  SET_DIR_END(p, new_dir_end);
885  }
886 
887  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
888 
889  bool add_to_upper_half;
890  if (seq_count < 0) {
891  add_to_upper_half = (c >= m);
892  } else {
893  // And add item to lower half if split_p has room, otherwise upper
894  // half
895  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
896  }
897 
898  if (add_to_upper_half) {
899  c -= (m - DIR_START);
900  Assert(seq_count < 0 || c <= DIR_START + D2);
901  Assert(c >= DIR_START);
902  Assert(c <= DIR_END(p));
903  add_item_to_leaf(p, kt_, c);
904  n = C[0].get_n();
905  } else {
906  Assert(c >= DIR_START);
907  Assert(c <= DIR_END(split_p));
908  add_item_to_leaf(split_p, kt_, c);
909  n = split_n;
910  }
911  write_block(split_n, split_p);
912 
913  // Check if we're splitting the root block.
914  if (0 == level) split_root(split_n);
915 
916  /* Enter a separating key at level 1 between */
917  /* the last key of block split_p, and the first key of block p */
918  enter_key_above_leaf(LeafItem(split_p, DIR_END(split_p) - D2),
919  LeafItem(p, DIR_START));
920  } else {
921  AssertRel(TOTAL_FREE(p),>=,needed);
922 
923  if (MAX_FREE(p) < needed) {
924  compact(p);
925  AssertRel(MAX_FREE(p),>=,needed);
926  }
927 
928  add_item_to_leaf(p, kt_, c);
929  n = C[0].get_n();
930  }
931 
932  changed_n = n;
933  changed_c = c;
934 }
935 
941 void
943 {
944  LOGCALL_VOID(DB, "GlassTable::add_branch_item", Literal("kt_") | j);
945  Assert(writable);
946  uint8_t * p = C[j].get_modifiable_p(block_size);
947  int c = C[j].c;
948 
949  int needed = kt_.size() + D2;
950  if (TOTAL_FREE(p) < needed) {
951  int m;
952  // Prepare to split p. After splitting, the block is in two halves, the
953  // lower half is split_p, the upper half p again. add_to_upper_half
954  // becomes true when the item gets added to p, false when it gets added
955  // to split_p.
956 
957  if (seq_count < 0) {
958  // If we're not in sequential mode, we split at the mid point
959  // of the node.
960  m = mid_point(p);
961  } else {
962  // During sequential addition, split at the insert point
963  AssertRel(c,>=,DIR_START);
964  m = c;
965  }
966 
967  uint4 split_n = C[j].get_n();
968  C[j].set_n(free_list.get_block(this, block_size));
969 
970  memcpy(split_p, p, block_size); // replicate the whole block in split_p
971  SET_DIR_END(split_p, m);
972  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
973 
974  {
975  int residue = DIR_END(p) - m;
976  int new_dir_end = DIR_START + residue;
977  memmove(p + DIR_START, p + m, residue);
978  SET_DIR_END(p, new_dir_end);
979  }
980 
981  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
982 
983  bool add_to_upper_half;
984  if (seq_count < 0) {
985  add_to_upper_half = (c >= m);
986  } else {
987  // And add item to lower half if split_p has room, otherwise upper
988  // half
989  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
990  }
991 
992  if (add_to_upper_half) {
993  c -= (m - DIR_START);
994  Assert(seq_count < 0 || c <= DIR_START + D2);
995  Assert(c >= DIR_START);
996  Assert(c <= DIR_END(p));
997  add_item_to_branch(p, kt_, c);
998  } else {
999  Assert(c >= DIR_START);
1000  Assert(c <= DIR_END(split_p));
1001  add_item_to_branch(split_p, kt_, c);
1002  }
1003  write_block(split_n, split_p);
1004 
1005  // Check if we're splitting the root block.
1006  if (j == level) split_root(split_n);
1007 
1008  /* Enter a separating key at level j + 1 between */
1009  /* the last key of block split_p, and the first key of block p */
1010  enter_key_above_branch(j + 1, BItem(p, DIR_START));
1011 
1012  // In branch levels, we can make the first key of block p null and
1013  // save a bit of disk space. Other redundant keys will still creep
1014  // in though.
1015  BItem_wr item(p, DIR_START);
1016  int new_total_free = TOTAL_FREE(p) + item.key().length();
1017  item.form_null_key(item.block_given_by());
1018  SET_TOTAL_FREE(p, new_total_free);
1019  } else {
1020  AssertRel(TOTAL_FREE(p),>=,needed);
1021 
1022  if (MAX_FREE(p) < needed) {
1023  compact(p);
1024  AssertRel(MAX_FREE(p),>=,needed);
1025  }
1026 
1027  add_item_to_branch(p, kt_, c);
1028  }
1029 }
1030 
1038 void
1040 {
1041  LOGCALL_VOID(DB, "GlassTable::delete_leaf_item", repeatedly);
1042  Assert(writable);
1043  uint8_t * p = C[0].get_modifiable_p(block_size);
1044  int c = C[0].c;
1045  AssertRel(DIR_START,<=,c);
1046  AssertRel(c,<,DIR_END(p));
1047  int kt_len = LeafItem(p, c).size(); /* size of the item to be deleted */
1048  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
1049 
1050  memmove(p + c, p + c + D2, dir_end - c);
1051  SET_DIR_END(p, dir_end);
1052  SET_MAX_FREE(p, MAX_FREE(p) + D2);
1053  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
1054 
1055  if (!repeatedly) return;
1056  if (0 < level) {
1057  if (dir_end == DIR_START) {
1058  free_list.mark_block_unused(this, block_size, C[0].get_n());
1059  C[0].rewrite = false;
1060  C[0].set_n(BLK_UNUSED);
1061  C[1].rewrite = true; /* *is* necessary */
1062  delete_branch_item(1);
1063  }
1064  }
1065 }
1066 
1073 void
1075 {
1076  LOGCALL_VOID(DB, "GlassTable::delete_branch_item", j);
1077  Assert(writable);
1078  uint8_t * p = C[j].get_modifiable_p(block_size);
1079  int c = C[j].c;
1080  AssertRel(DIR_START,<=,c);
1081  AssertRel(c,<,DIR_END(p));
1082  int kt_len = BItem(p, c).size(); /* size of the item to be deleted */
1083  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
1084 
1085  memmove(p + c, p + c + D2, dir_end - c);
1086  SET_DIR_END(p, dir_end);
1087  SET_MAX_FREE(p, MAX_FREE(p) + D2);
1088  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
1089 
1090  if (j < level) {
1091  if (dir_end == DIR_START) {
1092  free_list.mark_block_unused(this, block_size, C[j].get_n());
1093  C[j].rewrite = false;
1094  C[j].set_n(BLK_UNUSED);
1095  C[j + 1].rewrite = true; /* *is* necessary */
1096  delete_branch_item(j + 1);
1097  }
1098  } else {
1099  Assert(j == level);
1100  while (dir_end == DIR_START + D2 && level > 0) {
1101  /* single item in the root block, so lose a level */
1102  uint4 new_root = BItem(C[level].get_p(), DIR_START).block_given_by();
1103  free_list.mark_block_unused(this, block_size, C[level].get_n());
1104  C[level].destroy();
1105  level--;
1106 
1107  block_to_cursor(C, level, new_root);
1108 
1109  dir_end = DIR_END(C[level].get_p()); /* prepare for the loop */
1110  }
1111  }
1112 }
1113 
1114 /* debugging aid:
1115 static addcount = 0;
1116 */
1117 
1148 int
1150 {
1151  LOGCALL(DB, int, "GlassTable::add_kt", found);
1152  Assert(writable);
1153 
1154  /*
1155  {
1156  printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
1157  print_bytes(kt[I2], kt + I2 + K1); putchar('\n');
1158  }
1159  */
1160  alter();
1161 
1162  int result = 0;
1163  if (found) { /* replacement */
1164  seq_count = SEQ_START_POINT;
1165  sequential = false;
1166 
1167  uint8_t * p = C[0].get_modifiable_p(block_size);
1168  int c = C[0].c;
1169  AssertRel(DIR_START,<=,c);
1170  AssertRel(c,<,DIR_END(p));
1171  LeafItem item(p, c);
1172  int kt_size = kt.size();
1173  int needed = kt_size - item.size();
1174 
1175  result = item.last_component() ? 2 : 1;
1176 
1177  if (needed <= 0) {
1178  /* simple replacement */
1179  memmove(const_cast<uint8_t *>(item.get_address()),
1180  kt.get_address(), kt_size);
1181  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
1182  } else {
1183  /* new item into the block's freespace */
1184  int new_max = MAX_FREE(p) - kt_size;
1185  if (new_max >= 0) {
1186  int o = DIR_END(p) + new_max;
1187  memmove(p + o, kt.get_address(), kt_size);
1188  LeafItem_wr::setD(p, c, o);
1189  SET_MAX_FREE(p, new_max);
1190  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
1191  } else {
1192  /* do it the long way */
1193  delete_leaf_item(false);
1194  add_leaf_item(kt);
1195  }
1196  }
1197  } else {
1198  /* addition */
1199  if (changed_n == C[0].get_n() && changed_c == C[0].c) {
1200  if (seq_count < 0) seq_count++;
1201  } else {
1202  seq_count = SEQ_START_POINT;
1203  sequential = false;
1204  }
1205  C[0].c += D2;
1206  add_leaf_item(kt);
1207  }
1208  RETURN(result);
1209 }
1210 
1211 /* delete_kt() corresponds to add_kt(found), but there are only
1212  two cases: if the key is not found nothing is done, and if it is
1213  found the corresponding item is deleted with delete_leaf_item.
1214 
1215  Returns:
1216  0 : nothing to delete
1217  1 : deleted kt
1218  2 : deleted kt and it was the final one
1219 */
1220 
1221 int
1223 {
1224  LOGCALL(DB, int, "GlassTable::delete_kt", NO_ARGS);
1225  Assert(writable);
1226 
1227  seq_count = SEQ_START_POINT;
1228  sequential = false;
1229 
1230  if (!find(C))
1231  return 0;
1232 
1233  int result = LeafItem(C[0].get_p(), C[0].c).last_component() ? 2 : 1;
1234  alter();
1235  delete_leaf_item(true);
1236 
1237  RETURN(result);
1238 }
1239 
1240 /* GlassTable::form_key(key) treats address kt as an item holder and fills in
1241 the key part:
1242 
1243  (I) K key c (C tag)
1244 
1245 The bracketed parts are left blank. The key is filled in with key_len bytes and
1246 K set accordingly. c is set to 1.
1247 */
1248 
1249 void GlassTable::form_key(const string & key) const
1250 {
1251  LOGCALL_VOID(DB, "GlassTable::form_key", key);
1252  kt.form_key(key);
1253 }
1254 
1255 /* GlassTable::add(key, tag) adds the key/tag item to the
1256  B-tree, replacing any existing item with the same key.
1257 
1258  For a long tag, we end up having to add m components, of the form
1259 
1260  key 1 m tag1
1261  key 2 m tag2
1262  ...
1263  key m m tagm
1264 
1265  and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
1266  n components of the form
1267 
1268  key 1 n TAG1
1269  key 2 n TAG2
1270  ...
1271  key n n TAGn
1272 
1273  and n may be greater than, equal to, or less than m. These cases are dealt
1274  with in the code below. If m < n for example, we end up with a series of
1275  deletions.
1276 */
1277 
1278 void
1279 GlassTable::add(const string& key, const string& tag, bool already_compressed)
1280 {
1281  LOGCALL_VOID(DB, "GlassTable::add", key | tag | already_compressed);
1282  Assert(writable);
1283 
1284  if (handle < 0) {
1285  if (handle == -2) {
1287  }
1288  RootInfo root_info;
1289  root_info.init(block_size, compress_min);
1290  do_open_to_write(&root_info);
1291  }
1292 
1293  form_key(key);
1294 
1295  const char* tag_data = tag.data();
1296  size_t tag_size = tag.size();
1297 
1298  bool compressed = false;
1299  if (already_compressed) {
1300  compressed = true;
1301  } else if (compress_min > 0 && tag_size > compress_min) {
1302  const char * res = comp_stream.compress(tag_data, &tag_size);
1303  if (res) {
1304  compressed = true;
1305  tag_data = res;
1306  }
1307  }
1308 
1309  // sort of matching kt.append_chunk(), but setting the chunk
1310  const size_t cd = kt.key().length() + K1 + I2 + X2; // offset to the tag data
1311  const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1312  size_t first_L = L + X2; // - amount for tag1 (we don't store X there)
1313  bool found = find(C);
1314  if (tag_size <= first_L) {
1315  // The whole tag clearly fits in one item, so no need to make this
1316  // complicated.
1317  first_L = tag_size;
1318  } else if (!found) {
1319  const uint8_t * p = C[0].get_p();
1320  size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1321  if (n > D2 + cd) {
1322  n -= (D2 + cd);
1323  // if n >= last then fully filling this block won't produce
1324  // an extra item, so we might as well do this even if
1325  // full_compaction isn't active.
1326  //
1327  // In the full_compaction case, it turns out we shouldn't always
1328  // try to fill every last byte. Doing so can actually increase the
1329  // total space required (I believe this effect is due to longer
1330  // dividing keys being required in the index blocks). Empirically,
1331  // n >= key.size() + K appears a good criterion for K ~= 34. This
1332  // seems to save about 0.2% in total database size over always
1333  // splitting the tag. It'll also give be slightly faster retrieval
1334  // as we can avoid reading an extra block occasionally.
1335  size_t last = (tag_size - X2) % L;
1336  if (n >= last || (full_compaction && n >= key.size() + 34)) {
1337  // first_L < max_item_size + D2 - D2 - cd
1338  // Total size of first item = cd + first_L < max_item_size
1339  first_L = n + X2;
1340  }
1341  }
1342  }
1343 
1344  // There are m items to add.
1345  int m = (tag_size - first_L + L - 1) / L + 1;
1346  /* FIXME: sort out this error higher up and turn this into
1347  * an assert.
1348  */
1349  if (m >= BYTE_PAIR_RANGE) {
1350  string message = "Btree tag entry of size ";
1351  message += str(tag_size);
1352  message += " is too large to store - "
1353  "increase the block size to raise this limit";
1354  throw Xapian::UnimplementedError(message);
1355  }
1356 
1357  size_t o = 0; // Offset into the tag
1358  size_t residue = tag_size; // Bytes of the tag remaining to add in
1359  bool replacement = false; // Has there been a replacement?
1360  bool components_to_del = false; // Are there components to delete?
1361  int i;
1362  for (i = 1; i <= m; ++i) {
1363  size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1364  size_t this_cd = (i == 1 ? cd - X2 : cd);
1365  Assert(this_cd + l <= block_size);
1366  Assert(o + l <= tag_size);
1367  kt.set_tag(this_cd, tag_data + o, l, compressed, i, m);
1368 
1369  o += l;
1370  residue -= l;
1371 
1372  if (i > 1) found = find(C);
1373  int result = add_kt(found);
1374  if (result) replacement = true;
1375  components_to_del = (result == 1);
1376  }
1377  AssertEq(o, tag_size);
1378  if (components_to_del) {
1379  i = m;
1380  do {
1381  kt.set_component_of(++i);
1382  } while (delete_kt() == 1);
1383  }
1384  if (!replacement) ++item_count;
1385  Btree_modified = true;
1386  if (cursor_created_since_last_modification) {
1387  cursor_created_since_last_modification = false;
1388  ++cursor_version;
1389  }
1390 }
1391 
1392 /* GlassTable::del(key) returns false if the key is not in the B-tree,
1393  otherwise deletes it and returns true.
1394 
1395  Again, this is parallel to GlassTable::add, but simpler in form.
1396 */
1397 
1398 bool
1399 GlassTable::del(const string &key)
1400 {
1401  LOGCALL(DB, bool, "GlassTable::del", key);
1402  Assert(writable);
1403 
1404  if (handle < 0) {
1405  if (handle == -2) {
1407  }
1408  RETURN(false);
1409  }
1410 
1411  // We can't delete a key which we is too long for us to store.
1412  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1413 
1414  if (key.empty()) RETURN(false);
1415  form_key(key);
1416 
1417  int r = delete_kt();
1418  if (r == 0) RETURN(false);
1419  int i = 1;
1420  while (r == 1) {
1421  kt.set_component_of(++i);
1422  r = delete_kt();
1423  }
1424 
1425  item_count--;
1426  Btree_modified = true;
1427  if (cursor_created_since_last_modification) {
1428  cursor_created_since_last_modification = false;
1429  ++cursor_version;
1430  }
1431  RETURN(true);
1432 }
1433 
1434 bool
1435 GlassTable::readahead_key(const string &key) const
1436 {
1437  LOGCALL(DB, bool, "GlassTable::readahead_key", key);
1438  Assert(!key.empty());
1439 
1440  // Three cases:
1441  //
1442  // handle == -1: Lazy table in a multi-file database which isn't yet open.
1443  //
1444  // handle == -2: Table has been closed. Since the readahead is just a
1445  // hint, we can safely ignore it for a closed table.
1446  //
1447  // handle <= -3: Lazy table in a single-file database which isn't yet
1448  // open.
1449  if (handle < 0)
1450  RETURN(false);
1451 
1452  // If the table only has one level, there are no branch blocks to preread.
1453  if (level == 0)
1454  RETURN(false);
1455 
1456  // An overlong key cannot be found.
1457  if (key.size() > GLASS_BTREE_MAX_KEY_LEN)
1458  RETURN(true);
1459 
1460  form_key(key);
1461 
1462  // We'll only readahead the first level, since descending the B-tree would
1463  // require actual reads that would likely hurt performance more than help.
1464  const uint8_t * p = C[level].get_p();
1465  int c = find_in_branch(p, kt, C[level].c);
1466  uint4 n = BItem(p, c).block_given_by();
1467  // Don't preread if it's the block we last preread or already in the
1468  // cursor.
1469  if (n != last_readahead && n != C[level - 1].get_n()) {
1470  last_readahead = n;
1471  if (!io_readahead_block(handle, block_size, n, offset))
1472  RETURN(false);
1473  }
1474  RETURN(true);
1475 }
1476 
1477 bool
1478 GlassTable::get_exact_entry(const string &key, string & tag) const
1479 {
1480  LOGCALL(DB, bool, "GlassTable::get_exact_entry", key | tag);
1481  Assert(!key.empty());
1482 
1483  if (handle < 0) {
1484  if (handle == -2) {
1486  }
1487  RETURN(false);
1488  }
1489 
1490  // An oversized key can't exist, so attempting to search for it should fail.
1491  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1492 
1493  form_key(key);
1494  if (!find(C)) RETURN(false);
1495 
1496  (void)read_tag(C, &tag, false);
1497  RETURN(true);
1498 }
1499 
1500 bool
1501 GlassTable::key_exists(const string &key) const
1502 {
1503  LOGCALL(DB, bool, "GlassTable::key_exists", key);
1504  Assert(!key.empty());
1505 
1506  // An oversized key can't exist, so attempting to search for it should fail.
1507  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1508 
1509  form_key(key);
1510  RETURN(find(C));
1511 }
1512 
1513 bool
1514 GlassTable::read_tag(Glass::Cursor * C_, string *tag, bool keep_compressed) const
1515 {
1516  LOGCALL(DB, bool, "GlassTable::read_tag", Literal("C_") | tag | keep_compressed);
1517 
1518  tag->resize(0);
1519 
1520  LeafItem item(C_[0].get_p(), C_[0].c);
1521  bool compressed = item.get_compressed();
1522  bool decompress = false;
1523  if (compressed && !keep_compressed) {
1524  comp_stream.decompress_start();
1525  decompress = true;
1526  }
1527 
1528  while (true) {
1529  bool last = item.last_component();
1530  if (decompress) {
1531  // Decompress each chunk as we read it so we don't need both the
1532  // full compressed and uncompressed tags in memory at once.
1533  bool done = item.decompress_chunk(comp_stream, *tag);
1534  if (done != last) {
1535  throw Xapian::DatabaseCorruptError(done ?
1536  "Too many chunks of compressed data" :
1537  "Too few chunks of compressed data");
1538  }
1539  } else {
1540  item.append_chunk(tag);
1541  }
1542  if (last) break;
1543  if (!next(C_, 0)) {
1544  throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1545  }
1546  item.init(C_[0].get_p(), C_[0].c);
1547  }
1548  // At this point the cursor is on the last item - calling next will move
1549  // it to the next key (GlassCursor::read_tag() relies on this).
1550 
1551  RETURN(compressed && keep_compressed);
1552 }
1553 
1554 void
1556 {
1557  LOGCALL_VOID(DB, "GlassTable::set_full_compaction", parity);
1558  Assert(writable);
1559 
1560  if (parity) seq_count = 0;
1561  full_compaction = parity;
1562 }
1563 
1565  LOGCALL(DB, GlassCursor *, "GlassTable::cursor_get", NO_ARGS);
1566  if (handle < 0) {
1567  if (handle == -2) {
1569  }
1570  RETURN(NULL);
1571  }
1572  // FIXME Ick - casting away const is nasty
1573  RETURN(new GlassCursor(const_cast<GlassTable *>(this)));
1574 }
1575 
1576 /************ B-tree opening and closing ************/
1577 
1578 void
1580 {
1581  LOGCALL_VOID(DB, "GlassTable::basic_open", root_info|rev);
1582  revision_number = rev;
1583  root = root_info->get_root();
1584  level = root_info->get_level();
1585  item_count = root_info->get_num_entries();
1586  faked_root_block = root_info->get_root_is_fake();
1587  sequential = root_info->get_sequential();
1588  const string & fl_serialised = root_info->get_free_list();
1589  if (!fl_serialised.empty()) {
1590  if (!free_list.unpack(fl_serialised))
1591  throw Xapian::DatabaseCorruptError("Bad freelist metadata");
1592  } else {
1593  free_list.reset();
1594  }
1595 
1596  compress_min = root_info->get_compress_min();
1597 
1598  /* kt holds constructed items as well as keys */
1599  kt = LeafItem_wr(zeroed_new(block_size));
1600 
1601  set_max_item_size(BLOCK_CAPACITY);
1602 
1603  for (int j = 0; j <= level; ++j) {
1604  C[j].init(block_size);
1605  }
1606 
1607  read_root();
1608 
1609  if (cursor_created_since_last_modification) {
1610  cursor_created_since_last_modification = false;
1611  ++cursor_version;
1612  }
1613 }
1614 
1615 void
1617 {
1618  LOGCALL_VOID(DB, "GlassTable::read_root", NO_ARGS);
1619  if (faked_root_block) {
1620  /* root block for an unmodified database. */
1621  uint8_t * p = C[0].init(block_size);
1622  Assert(p);
1623 
1624  /* clear block - shouldn't be necessary, but is a bit nicer,
1625  * and means that the same operations should always produce
1626  * the same database. */
1627  memset(p, 0, block_size);
1628 
1629  int o = block_size - I2 - K1;
1630  LeafItem_wr(p + o).fake_root_item();
1631 
1632  LeafItem_wr::setD(p, DIR_START, o); // its directory entry
1633  SET_DIR_END(p, DIR_START + D2);// the directory size
1634 
1635  o -= (DIR_START + D2);
1636  SET_MAX_FREE(p, o);
1637  SET_TOTAL_FREE(p, o);
1638  SET_LEVEL(p, 0);
1639 
1640  if (!writable) {
1641  /* reading - revision number doesn't matter as long as
1642  * it's not greater than the current one. */
1643  SET_REVISION(p, 0);
1644  C[0].set_n(0);
1645  } else {
1646  /* writing - */
1647  SET_REVISION(p, revision_number + 1);
1648  C[0].set_n(free_list.get_block(this, block_size));
1649  C[0].rewrite = true;
1650  }
1651  } else {
1652  /* using a root block stored on disk */
1653  block_to_cursor(C, level, root);
1654 
1655  if (REVISION(C[level].get_p()) > revision_number) set_overwritten();
1656  /* although this is unlikely */
1657  }
1658 }
1659 
1660 void
1663 {
1664  LOGCALL_VOID(DB, "GlassTable::do_open_to_write", root_info|rev);
1665  if (handle == -2) {
1667  }
1668  if (handle <= -2) {
1669  // Single file database.
1670  handle = -3 - handle;
1671  } else {
1672  handle = io_open_block_wr(name + GLASS_TABLE_EXTENSION, (rev == 0));
1673  if (handle < 0) {
1674  // lazy doesn't make a lot of sense when we're creating a DB (which
1675  // is the case when rev==0), but ENOENT with O_CREAT means a parent
1676  // directory doesn't exist.
1677  if (lazy && rev && errno == ENOENT) {
1678  revision_number = rev;
1679  return;
1680  }
1681  string message((rev == 0) ? "Couldn't create " : "Couldn't open ");
1682  message += name;
1683  message += GLASS_TABLE_EXTENSION" read/write";
1684  throw Xapian::DatabaseOpeningError(message, errno);
1685  }
1686  }
1687 
1688  writable = true;
1689  basic_open(root_info, rev);
1690 
1691  split_p = new uint8_t[block_size];
1692 
1693  buffer = zeroed_new(block_size);
1694 
1695  changed_n = 0;
1696  changed_c = DIR_START;
1697  seq_count = SEQ_START_POINT;
1698 }
1699 
1700 GlassTable::GlassTable(const char * tablename_, const string & path_,
1701  bool readonly_, bool lazy_)
1702  : tablename(tablename_),
1703  revision_number(0),
1704  item_count(0),
1705  block_size(0),
1706  faked_root_block(true),
1707  sequential(true),
1708  handle(-1),
1709  level(0),
1710  root(0),
1711  kt(0),
1712  buffer(0),
1713  free_list(),
1714  name(path_),
1715  seq_count(0),
1716  changed_n(0),
1717  changed_c(0),
1718  max_item_size(0),
1719  Btree_modified(false),
1720  full_compaction(false),
1721  writable(!readonly_),
1722  cursor_created_since_last_modification(false),
1723  cursor_version(0),
1724  changes_obj(NULL),
1725  split_p(0),
1726  compress_min(0),
1727  comp_stream(Z_DEFAULT_STRATEGY),
1728  lazy(lazy_),
1729  last_readahead(BLK_UNUSED),
1730  offset(0)
1731 {
1732  LOGCALL_CTOR(DB, "GlassTable", tablename_ | path_ | readonly_ | lazy_);
1733 }
1734 
1735 GlassTable::GlassTable(const char * tablename_, int fd, off_t offset_,
1736  bool readonly_, bool lazy_)
1737  : tablename(tablename_),
1738  revision_number(0),
1739  item_count(0),
1740  block_size(0),
1742  sequential(true),
1743  handle(-3 - fd),
1744  level(0),
1745  root(0),
1746  kt(0),
1747  buffer(0),
1748  free_list(),
1749  name(),
1750  seq_count(0),
1751  changed_n(0),
1752  changed_c(0),
1753  max_item_size(0),
1756  writable(!readonly_),
1758  cursor_version(0),
1759  changes_obj(NULL),
1760  split_p(0),
1761  compress_min(0),
1762  comp_stream(Z_DEFAULT_STRATEGY),
1763  lazy(lazy_),
1765  offset(offset_)
1766 {
1767  LOGCALL_CTOR(DB, "GlassTable", tablename_ | fd | offset_ | readonly_ | lazy_);
1768 }
1769 
1770 bool
1772  LOGCALL(DB, bool, "GlassTable::exists", NO_ARGS);
1773  // We know a single-file database exists, since we have an fd open on it!
1775 }
1776 
1777 void
1778 GlassTable::create_and_open(int flags_, const RootInfo & root_info)
1779 {
1780  LOGCALL_VOID(DB, "GlassTable::create_and_open", flags_|root_info);
1781  if (handle == -2) {
1783  }
1784  Assert(writable);
1785  close();
1786 
1787  unsigned int block_size_ = root_info.get_blocksize();
1788  Assert(block_size_ >= 2048);
1789  Assert(block_size_ <= BYTE_PAIR_RANGE);
1790  // Must be a power of two.
1791  Assert((block_size_ & (block_size_ - 1)) == 0);
1792 
1793  flags = flags_;
1794  block_size = block_size_;
1795 
1796  if (lazy) {
1797  close();
1799  compress_min = root_info.get_compress_min();
1800  } else {
1801  // FIXME: it would be good to arrange that this works such that there's
1802  // always a valid table in place if you run create_and_open() on an
1803  // existing table.
1804 
1805  do_open_to_write(&root_info);
1806  }
1807 }
1808 
1810  LOGCALL_DTOR(DB, "GlassTable");
1812 }
1813 
1814 void GlassTable::close(bool permanent) {
1815  LOGCALL_VOID(DB, "GlassTable::close", permanent);
1816 
1817  if (handle >= 0) {
1818  if (single_file()) {
1819  handle = -3 - handle;
1820  } else {
1821  // If an error occurs here, we just ignore it, since we're just
1822  // trying to free everything.
1823  (void)::close(handle);
1824  handle = -1;
1825  }
1826  }
1827 
1828  if (permanent) {
1829  handle = -2;
1830  // Don't delete the resources in the table, since they may
1831  // still be used to look up cached content.
1832  return;
1833  }
1834  for (int j = level; j >= 0; --j) {
1835  C[j].destroy();
1836  }
1837  delete [] split_p;
1838  split_p = 0;
1839 
1840  delete [] kt.get_address();
1841  kt = LeafItem_wr(0);
1842  delete [] buffer;
1843  buffer = 0;
1844 }
1845 
1846 void
1848 {
1849  LOGCALL_VOID(DB, "GlassTable::flush_db", NO_ARGS);
1850  Assert(writable);
1851  if (handle < 0) {
1852  if (handle == -2) {
1854  }
1855  return;
1856  }
1857 
1858  for (int j = level; j >= 0; --j) {
1859  if (C[j].rewrite) {
1860  write_block(C[j].get_n(), C[j].get_p());
1861  }
1862  }
1863 
1864  faked_root_block = false;
1865 }
1866 
1867 void
1869 {
1870  LOGCALL_VOID(DB, "GlassTable::commit", revision|root_info);
1871  Assert(writable);
1872 
1873  if (revision <= revision_number) {
1874  throw Xapian::DatabaseError("New revision too low");
1875  }
1876 
1877  if (handle < 0) {
1878  if (handle == -2) {
1880  }
1882  root_info->set_blocksize(block_size);
1883  root_info->set_level(0);
1884  root_info->set_num_entries(0);
1885  root_info->set_root_is_fake(true);
1886  root_info->set_sequential(true);
1887  root_info->set_root(0);
1888  return;
1889  }
1890 
1891  try {
1892  root = C[level].get_n();
1893 
1894  root_info->set_blocksize(block_size);
1895  root_info->set_level(level);
1896  root_info->set_num_entries(item_count);
1897  root_info->set_root_is_fake(faked_root_block);
1898  root_info->set_sequential(sequential);
1899  root_info->set_root(root);
1900 
1901  Btree_modified = false;
1902 
1903  for (int i = 0; i <= level; ++i) {
1904  C[i].init(block_size);
1905  }
1906 
1907  free_list.set_revision(revision);
1908  free_list.commit(this, block_size);
1909 
1910  // Save the freelist details into the root_info.
1911  string serialised;
1912  free_list.pack(serialised);
1913  root_info->set_free_list(serialised);
1914 
1916 
1917  read_root();
1918 
1919  changed_n = 0;
1920  changed_c = DIR_START;
1922  } catch (...) {
1924  throw;
1925  }
1926 }
1927 
1928 void
1930 {
1931  LOGCALL_VOID(DB, "GlassTable::cancel", root_info|rev);
1932  Assert(writable);
1933 
1934  if (handle < 0) {
1935  if (handle == -2) {
1937  }
1938  return;
1939  }
1940 
1941  // This causes problems: if (!Btree_modified) return;
1942 
1944  throw Xapian::InvalidOperationError("cancel() not supported under Xapian::DB_DANGEROUS");
1945 
1946  revision_number = rev;
1947  block_size = root_info.get_blocksize();
1948  root = root_info.get_root();
1949  level = root_info.get_level();
1950  item_count = root_info.get_num_entries();
1951  faked_root_block = root_info.get_root_is_fake();
1952  sequential = root_info.get_sequential();
1953  const string & fl_serialised = root_info.get_free_list();
1954  if (!fl_serialised.empty()) {
1955  if (!free_list.unpack(fl_serialised))
1956  throw Xapian::DatabaseCorruptError("Bad freelist metadata");
1957  } else {
1958  free_list.reset();
1959  }
1960 
1961  Btree_modified = false;
1962 
1963  for (int j = 0; j <= level; ++j) {
1964  C[j].init(block_size);
1965  C[j].rewrite = false;
1966  }
1967  read_root();
1968 
1969  changed_n = 0;
1970  changed_c = DIR_START;
1972 
1975  ++cursor_version;
1976  }
1977 }
1978 
1979 /************ B-tree reading ************/
1980 
1981 void
1984 {
1985  LOGCALL(DB, bool, "GlassTable::do_open_to_read", root_info|rev);
1986  if (handle == -2) {
1988  }
1989  if (single_file()) {
1990  handle = -3 - handle;
1991  } else {
1993  if (handle < 0) {
1994  if (lazy) {
1995  // This table is optional when reading!
1996  revision_number = rev;
1997  return;
1998  }
1999  string message("Couldn't open ");
2000  message += name;
2001  message += GLASS_TABLE_EXTENSION" to read";
2002  throw Xapian::DatabaseOpeningError(message, errno);
2003  }
2004  }
2005 
2006  basic_open(root_info, rev);
2007 
2008  read_root();
2009 }
2010 
2011 void
2012 GlassTable::open(int flags_, const RootInfo & root_info,
2014 {
2015  LOGCALL_VOID(DB, "GlassTable::open", flags_|root_info|rev);
2016  close();
2017 
2018  flags = flags_;
2019  block_size = root_info.get_blocksize();
2020  root = root_info.get_root();
2021 
2022  if (!writable) {
2023  do_open_to_read(&root_info, rev);
2024  return;
2025  }
2026 
2027  do_open_to_write(&root_info, rev);
2028 }
2029 
2030 bool
2032 {
2033  LOGCALL(DB, bool, "GlassTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2034  int c = C_[0].c;
2035  AssertRel(DIR_START,<=,c);
2036  AssertRel(c,<,DIR_END(C_[0].get_p()));
2037  if (c == DIR_START) {
2038  uint4 n = C_[0].get_n();
2039  const uint8_t * p;
2040  while (true) {
2041  if (n == 0) RETURN(false);
2042  n--;
2043  if (n == C[0].get_n()) {
2044  // Block is a leaf block in the built-in cursor (potentially in
2045  // modified form if the table is writable).
2046  p = C_[0].clone(C[0]);
2047  } else {
2048  if (writable) {
2049  // Blocks in the built-in cursor may not have been written
2050  // to disk yet, so we have to check that the block number
2051  // isn't in the built-in cursor or we'll read an
2052  // uninitialised block (for which GET_LEVEL(p) will
2053  // probably return 0).
2054  int j;
2055  for (j = 1; j <= level; ++j) {
2056  if (n == C[j].get_n()) break;
2057  }
2058  if (j <= level) continue;
2059  }
2060 
2061  // Block isn't in the built-in cursor, so the form on disk
2062  // is valid, so read it to check if it's the next level 0
2063  // block.
2064  uint8_t * q = C_[0].init(block_size);
2065  read_block(n, q);
2066  p = q;
2067  C_[0].set_n(n);
2068  }
2069  if (REVISION(p) > revision_number + writable) {
2070  set_overwritten();
2071  RETURN(false);
2072  }
2073  if (GET_LEVEL(p) == 0) break;
2074  }
2075  c = DIR_END(p);
2076  AssertRel(DIR_START,<,c);
2077  }
2078  c -= D2;
2079  C_[0].c = c;
2080  RETURN(true);
2081 }
2082 
2083 bool
2085 {
2086  LOGCALL(DB, bool, "GlassTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2087  const uint8_t * p = C_[0].get_p();
2088  Assert(p);
2089  int c = C_[0].c;
2090  AssertRel(c,<,DIR_END(p));
2091  c += D2;
2092  Assert(unsigned(c) < block_size);
2093  if (c == DIR_END(p)) {
2094  uint4 n = C_[0].get_n();
2095  while (true) {
2096  n++;
2097  if (n >= free_list.get_first_unused_block()) RETURN(false);
2098  if (writable) {
2099  if (n == C[0].get_n()) {
2100  // Block is a leaf block in the built-in cursor
2101  // (potentially in modified form).
2102  p = C_[0].clone(C[0]);
2103  } else {
2104  // Blocks in the built-in cursor may not have been written
2105  // to disk yet, so we have to check that the block number
2106  // isn't in the built-in cursor or we'll read an
2107  // uninitialised block (for which GET_LEVEL(p) will
2108  // probably return 0).
2109  int j;
2110  for (j = 1; j <= level; ++j) {
2111  if (n == C[j].get_n()) break;
2112  }
2113  if (j <= level) continue;
2114 
2115  // Block isn't in the built-in cursor, so the form on disk
2116  // is valid, so read it to check if it's the next level 0
2117  // block.
2118  uint8_t * q = C_[0].init(block_size);
2119  read_block(n, q);
2120  p = q;
2121  }
2122  } else {
2123  uint8_t * q = C_[0].init(block_size);
2124  read_block(n, q);
2125  p = q;
2126  }
2127  if (REVISION(p) > revision_number + writable) {
2128  set_overwritten();
2129  RETURN(false);
2130  }
2131  if (GET_LEVEL(p) == 0) break;
2132  }
2133  c = DIR_START;
2134  C_[0].set_n(n);
2135  }
2136  C_[0].c = c;
2137  RETURN(true);
2138 }
2139 
2140 bool
2142 {
2143  LOGCALL(DB, bool, "GlassTable::prev_default", Literal("C_") | j);
2144  const uint8_t * p = C_[j].get_p();
2145  int c = C_[j].c;
2146  AssertRel(DIR_START,<=,c);
2147  AssertRel(c,<,DIR_END(p));
2148  AssertRel(unsigned(DIR_END(p)),<=,block_size);
2149  if (c == DIR_START) {
2150  if (j == level) RETURN(false);
2151  if (!prev_default(C_, j + 1)) RETURN(false);
2152  p = C_[j].get_p();
2153  c = DIR_END(p);
2154  AssertRel(DIR_START,<,c);
2155  }
2156  c -= D2;
2157  C_[j].c = c;
2158  if (j > 0) {
2159  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
2160  }
2161  RETURN(true);
2162 }
2163 
2164 bool
2166 {
2167  LOGCALL(DB, bool, "GlassTable::next_default", Literal("C_") | j);
2168  const uint8_t * p = C_[j].get_p();
2169  int c = C_[j].c;
2170  AssertRel(c,<,DIR_END(p));
2171  AssertRel(unsigned(DIR_END(p)),<=,block_size);
2172  c += D2;
2173  if (j > 0) {
2174  AssertRel(DIR_START,<,c);
2175  } else {
2176  AssertRel(DIR_START,<=,c);
2177  }
2178  // Sometimes c can be DIR_END(p) + 2 here it appears...
2179  if (c >= DIR_END(p)) {
2180  if (j == level) RETURN(false);
2181  if (!next_default(C_, j + 1)) RETURN(false);
2182  p = C_[j].get_p();
2183  c = DIR_START;
2184  }
2185  C_[j].c = c;
2186  if (j > 0) {
2187  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
2188 #ifdef BTREE_DEBUG_FULL
2189  printf("Block in GlassTable:next_default");
2190  report_block_full(j - 1, C_[j - 1].get_n(), C_[j - 1].get_p());
2191 #endif /* BTREE_DEBUG_FULL */
2192  }
2193  RETURN(true);
2194 }
2195 
2196 void
2198 {
2199  throw Xapian::DatabaseClosedError("Database has been closed");
2200 }
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:491
#define RETURN(A)
Definition: debuglog.h:493
uint4 get_compress_min() const
Definition: glass_version.h:68
#define Assert(COND)
Definition: omassert.h:122
bool key_exists(const std::string &key) const
Check if a key exists in the Btree.
int io_open_block_rd(const char *fname)
Open a block-based file for reading.
Definition: io_utils.h:38
GlassVersion class.
int DIR_END(const uint8_t *b)
Definition: chert_table.h:154
static void setD(uint8_t *q, int c, int x)
Definition: glass_table.h:395
int component_of() const
Definition: glass_table.h:188
off_t offset
offset to start of table in file.
Definition: glass_table.h:909
bool get_sequential() const
Definition: glass_version.h:63
Indicates an attempt to access a closed database.
Definition: error.h:1097
Glass::LeafItem_wr kt
buffer of size block_size for making up key-tag items
Definition: glass_table.h:803
void init(T p_, int c)
Definition: glass_table.h:177
#define AssertEq(A, B)
Definition: omassert.h:124
void create_and_open(int flags_, const RootInfo &root_info)
Create a new empty btree structure on disk and open it at the initial revision.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
const size_t BLOCK_CAPACITY
Even for items of at maximum size, it must be possible to get this number of items in a block...
Definition: glass_table.h:50
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
bool find(Glass::Cursor *) const
find(C_) searches for the key of B->kt in the B-tree.
Definition: glass_table.cc:530
#define true
Definition: header.h:8
void commit(const GlassTable *B, uint4 block_size)
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define GLASS_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: glass_table.h:57
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
int TOTAL_FREE(const uint8_t *b)
Definition: glass_table.h:113
static int find_in_branch(const uint8_t *p, Glass::LeafItem item, int c)
Definition: glass_table.cc:509
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:180
int changed_c
directory offset corresponding to last block to be changed by an addition
Definition: glass_table.h:827
bool readahead_key(const string &key) const
int find_in_branch_(const uint8_t *p, ITEM item, int c)
Definition: glass_table.cc:469
uint4 REVISION(const uint8_t *b)
Definition: glass_table.h:110
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
Provides wrappers with POSIXy semantics.
uint8_t * split_p
Buffer used when splitting a block.
Definition: glass_table.h:895
unsigned long cursor_version
Version count for tracking when cursors need to rebuild.
Definition: glass_table.h:845
void set_block_given_by(uint4 n)
Set this item&#39;s tag to point to block n (this block should not be at level 0).
Definition: glass_table.h:384
void set_blocksize(unsigned b)
Definition: glass_version.h:76
Constants in the Xapian namespace.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
int io_open_block_wr(const char *fname, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:67
bool writable
Set to true when the database is opened to write.
Definition: glass_table.h:839
#define STRINGIZE(X)
The STRINGIZE macro converts its parameter into a string constant.
Definition: stringutils.h:36
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
void set_truncated_key_and_block(Key newkey, int new_comp, int truncate_size, uint4 n)
Definition: glass_table.h:367
#define BLK_UNUSED
Definition: chert_cursor.h:31
STL namespace.
Definitions, types, etc for use inside glass.
Convert types to std::string.
int handle
File descriptor of the table.
Definition: glass_table.h:794
void set_key_and_block(Key newkey, uint4 n)
Definition: glass_table.h:359
void add(const std::string &key, const std::string &tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
Utility functions for testing files.
void compact(uint8_t *p)
compact(p) compact the block at p by shuffling all the items up to the end.
Definition: glass_table.cc:563
bool lazy
If true, don&#39;t create the table until it&#39;s needed.
Definition: glass_table.h:903
#define false
Definition: header.h:9
void SET_MAX_FREE(uint8_t *b, int x)
Definition: glass_table.h:119
void write_block(uint4 n, const uint8_t *p, bool appending=false) const
write_block(n, p, appending) writes block n in the DB file from address p.
Definition: glass_table.cc:191
void set_level(int level_)
Definition: glass_version.h:71
bool prev_default(Glass::Cursor *C_, int j) const
#define rare(COND)
Definition: config.h:575
int flags
Flags like DB_NO_SYNC and DB_DANGEROUS.
Definition: glass_table.h:774
~GlassTable()
Close the Btree.
bool full_compaction
set to true when full compaction is to be achieved
Definition: glass_table.h:836
int size() const
SIZE in diagram above.
Definition: glass_table.h:182
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
const int DIR_START
Definition: chert_table.h:155
uint4 changed_n
the last block to be changed by an addition
Definition: glass_table.h:823
void set_n(uint4 n)
Definition: glass_cursor.h:106
int MAX_FREE(const uint8_t *b)
Definition: glass_table.h:112
int compare(ITEM1 a, ITEM2 b)
Compare two items by their keys.
Definition: glass_table.h:934
Hierarchy of classes which Xapian can throw as exceptions.
void add_item_to_leaf(uint8_t *p, Glass::LeafItem kt, int c)
add_item_to_leaf(p, kt_, c) adds item kt_ to the leaf block at p.
Definition: glass_table.cc:771
void set_num_entries(glass_tablesize_t n)
Definition: glass_version.h:72
void form_key(const std::string &key) const
const int K1
Definition: glass_table.h:72
Definition: pretty.h:45
void flush_db()
Flush any outstanding changes to the DB file of the table.
void add_item_to_branch(uint8_t *p, Glass::BItem kt, int c)
add_item_to_branch(p, kt_, c) adds item kt_ to the branch block at p.
Definition: glass_table.cc:811
void read_root()
void enter_key_above_branch(int j, Glass::BItem newitem)
enter_key_above_branch(j, newkey) is called after a branch block split.
Definition: glass_table.cc:703
const char * tablename
The name of the table (used when writing changesets).
Definition: glass_table.h:762
void do_open_to_read(const RootInfo *root_info, glass_revision_number_t rev)
Perform the opening operation to read.
uint32_t uint4
Definition: internaltypes.h:32
glass_tablesize_t item_count
keeps a count of the number of items in the B-tree.
Definition: glass_table.h:768
DatabaseModifiedError indicates a database was modified.
Definition: error.h:539
functions for reading and writing different width words
Glass::Cursor C[GLASS_BTREE_CURSOR_LEVELS]
Definition: glass_table.h:888
int GET_LEVEL(const uint8_t *b)
Definition: glass_table.h:111
void close(bool permanent=false)
Close the Btree.
bool prev_for_sequential(Glass::Cursor *C_, int dummy) const
int add_kt(bool found)
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C...
void split_root(uint4 split_n)
Btree needs to gain a new level to insert more items: so split root block and construct a new one...
Definition: glass_table.cc:600
static uint8_t * zeroed_new(size_t size)
Definition: glass_table.cc:85
Glass freelist.
static int find_in_leaf(const uint8_t *p, Glass::LeafItem item, int c, bool &exact)
find_in_leaf(p, key, c, exact) searches for the key in the leaf block at p.
Definition: glass_table.cc:418
void commit(glass_revision_number_t revision, RootInfo *root_info)
Commit any outstanding changes to the table.
bool decompress_chunk(CompressionStream &comp_stream, string &tag) const
Definition: glass_table.h:202
Glass changesets.
bool next_for_sequential(Glass::Cursor *C_, int dummy) const
GlassChanges * changes_obj
The GlassChanges object to write block changes to.
Definition: glass_table.h:851
uint4 last_readahead
Last block readahead_key() preread.
Definition: glass_table.h:906
uint4 get_first_unused_block() const
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:228
int GET_LEVEL(const uint8_t *b)
Definition: chert_table.h:151
void set_revision(uint4 revision_)
int length() const
Definition: glass_table.h:149
bool single_file() const
Definition: glass_table.h:853
GlassTable(const GlassTable &)
Copying not allowed.
const int I2
Definition: glass_table.h:73
const uint8_t * clone(const Cursor &o)
Definition: glass_cursor.h:68
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void alter()
Btree::alter(); is called when the B-tree is to be altered.
Definition: glass_table.cc:374
void add_branch_item(Glass::BItem kt, int j)
GlassTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
Definition: glass_table.cc:942
void SET_LEVEL(uint8_t *b, int x)
Definition: glass_table.h:118
bool get_compressed() const
Definition: glass_table.h:185
const int D2
Definition: glass_table.h:74
bool unpack(const char **pstart, const char *end)
glass_tablesize_t get_num_entries() const
Definition: glass_version.h:61
bool last_component() const
Definition: glass_table.h:187
CompressionStream comp_stream
Definition: glass_table.h:900
bool get_root_is_fake() const
Definition: glass_version.h:62
int DIR_END(const uint8_t *b)
Definition: glass_table.h:114
static void throw_database_closed()
Throw an exception indicating that the database is closed.
Btree implementation.
const int DB_DANGEROUS
Update the database in-place.
Definition: constants.h:103
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
int seq_count
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POIN...
Definition: glass_table.h:820
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
const int DIR_START
Definition: glass_table.h:115
#define GLASS_BTREE_CURSOR_LEVELS
Allow for this many levels in the B-tree.
Definition: glass_defs.h:43
int size() const
SIZE in diagram above.
Definition: glass_table.h:323
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
std::string name
The path name of the B tree.
Definition: glass_table.h:815
static void setD(uint8_t *q, int c, int x)
Definition: glass_table.h:282
bool Btree_modified
Set to true the first time the B-tree is modified.
Definition: glass_table.h:833
int mid_point(uint8_t *p) const
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in ...
Definition: glass_table.cc:734
unsigned int block_size
block size of the B tree in bytes
Definition: glass_table.h:771
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
void init(unsigned blocksize_, uint4 compress_min_)
Key key() const
Definition: glass_table.h:326
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
void read_block(uint4 n, uint8_t *p) const
read_block(n, p) reads block n of the DB file to address p.
Definition: glass_table.cc:160
const int LEVEL_FREELIST
Freelist blocks have their level set to LEVEL_FREELIST.
Definition: glass_table.h:136
bool read_tag(Glass::Cursor *C_, std::string *tag, bool keep_compressed) const
Read the tag value for the key pointed to by cursor C_.
void set_overwritten() const
Definition: glass_table.cc:289
uint8_t * buffer
buffer of size block_size for reforming blocks
Definition: glass_table.h:806
void append_chunk(std::string *tag) const
Definition: glass_table.h:193
unsigned get_blocksize() const
Definition: glass_version.h:64
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
char name[9]
Definition: dbcheck.cc:55
Interface to Btree cursors.
glass_block_t get_root() const
Definition: glass_version.h:59
void enter_key_above_leaf(Glass::LeafItem previtem, Glass::LeafItem newitem)
enter_key_above_leaf(previtem, newitem) is called after a leaf block split.
Definition: glass_table.cc:647
bool faked_root_block
true if the root block is faked (not written to disk).
Definition: glass_table.h:780
int delete_kt()
int c
offset in the block&#39;s directory
Definition: glass_cursor.h:134
size_t max_item_size
maximum size of an item (key-tag pair)
Definition: glass_table.h:830
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
Pack types into strings and unpack them again.
const int X2
Definition: glass_table.h:75
void basic_open(const RootInfo *root_info, glass_revision_number_t rev)
const int D2
Definition: chert_table.h:122
Wrappers for low-level POSIX I/O routines.
void set_root_is_fake(bool f)
Definition: glass_version.h:73
void SET_DIR_END(uint8_t *b, int x)
Definition: glass_table.h:121
Various handy helpers which std::string really should provide.
void SET_REVISION(uint8_t *b, uint4 rev)
Definition: glass_table.h:117
void delete_branch_item(int j)
GlassTable::delete_branch_item(j, repeatedly) is (almost) the converse of add_branch_item.
GlassFreeList free_list
List of free blocks.
Definition: glass_table.h:809
void block_to_cursor(Glass::Cursor *C_, int j, uint4 n) const
Definition: glass_table.cc:310
void add_leaf_item(Glass::LeafItem kt)
GlassTable::add_leaf_item(kt_) adds item kt_ to the leaf block.
Definition: glass_table.cc:847
uint4 compress_min
Minimum size tag to try compressing (0 for no compression).
Definition: glass_table.h:898
Definition: header.h:151
unsigned REVISION(const uint8_t *b)
Definition: chert_table.h:150
void pack(std::string &buf)
const int BYTES_PER_BLOCK_NUMBER
Definition: glass_table.h:60
#define BYTE_PAIR_RANGE
Definition: glass_table.cc:156
void cancel(const RootInfo &root_info, glass_revision_number_t rev)
Cancel any outstanding changes.
T get_address() const
Definition: glass_table.h:321
const uint8_t * get_p() const
Get pointer to block.
Definition: glass_cursor.h:116
Various assertion macros.
glass_revision_number_t revision_number
revision number of the opened B-tree.
Definition: glass_table.h:765
bool next_default(Glass::Cursor *C_, int j) const
DatabaseError indicates some sort of database related error.
Definition: error.h:367
static uint4 block_given_by(const uint8_t *p, int c)
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value...
void set_root(glass_block_t root_)
Definition: glass_version.h:75
bool exists() const
Determine whether the btree exists on disk.
void set_sequential(bool f)
Definition: glass_version.h:74
uint4 root
the root block of the B-tree
Definition: glass_table.h:800
void do_open_to_write(const RootInfo *root_info, glass_revision_number_t rev=0)
Perform the opening operation to write.
void form_null_key(uint4 n)
Form an item with a null key and with block number n in the tag.
Definition: glass_table.h:389
int level
number of levels, counting from 0
Definition: glass_table.h:797
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
void delete_leaf_item(bool repeatedly)
GlassTable::delete_leaf_item(repeatedly) is (almost) the converse of add_leaf_item.
const std::string & get_free_list() const
Definition: glass_version.h:69
bool sequential
true iff the data has been written in a single write in sequential order.
Definition: glass_table.h:785
void SET_TOTAL_FREE(uint8_t *b, int x)
Definition: glass_table.h:120
void set_free_list(const std::string &s)
Definition: glass_version.h:80
int get_level() const
Definition: glass_version.h:60
bool cursor_created_since_last_modification
Flag for tracking when cursors need to rebuild.
Definition: glass_table.h:842
uint4 block_given_by() const
Get this item&#39;s tag as a block number (this block should not be at level 0).
Definition: glass_table.h:330
bool del(const std::string &key)
Delete an entry from the table.
uint4 get_n() const
Get the block number.
Definition: glass_cursor.h:101
uint8_t * init(unsigned block_size)
Definition: glass_cursor.h:54
bool rewrite
true if the block is not the same as on disk, and so needs rewriting
Definition: glass_cursor.h:137
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
#define SEQ_START_POINT
Flip to sequential addition block-splitting after this number of observed sequential additions (in ne...
Definition: glass_table.cc:146
void destroy()
Definition: glass_cursor.h:83
void set_full_compaction(bool parity)
#define C(X)