xapian-core  1.4.19
glass_table.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
7  * Copyright 2008 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "glass_table.h"
28 
29 #include <xapian/error.h>
30 
31 #include "omassert.h"
32 #include "posixy_wrapper.h"
33 #include "str.h"
34 #include "stringutils.h" // For STRINGIZE().
35 
36 #include <sys/types.h>
37 
38 #include <cerrno>
39 #include <cstring> /* for memmove */
40 #include <climits> /* for CHAR_BIT */
41 
42 #include "glass_freelist.h"
43 #include "glass_changes.h"
44 #include "glass_cursor.h"
45 #include "glass_defs.h"
46 #include "glass_version.h"
47 
48 #include "debuglog.h"
49 #include "filetests.h"
50 #include "io_utils.h"
51 #include "pack.h"
52 #include "wordaccess.h"
53 
54 #include <algorithm> // for std::min()
55 #include <string>
56 
57 #include "xapian/constants.h"
58 
59 using namespace Glass;
60 using namespace std;
61 
62 //#define BTREE_DEBUG_FULL 1
63 #undef BTREE_DEBUG_FULL
64 
65 #ifdef BTREE_DEBUG_FULL
66 /*------debugging aids from here--------*/
67 
68 static void print_key(const uint8_t * p, int c, int j);
69 static void print_tag(const uint8_t * p, int c, int j);
70 
71 /*
72 static void report_cursor(int N, Btree * B, Glass::Cursor * C)
73 {
74  int i;
75  printf("%d)\n", N);
76  for (i = 0; i <= B->level; i++)
77  printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
78  C[i].p, C[i].c, C[i].n, C[i].rewrite);
79 }
80 */
81 
82 /*------to here--------*/
83 #endif /* BTREE_DEBUG_FULL */
84 
85 static inline uint8_t *zeroed_new(size_t size)
86 {
87  uint8_t *temp = new uint8_t[size];
88  memset(temp, 0, size);
89  return temp;
90 }
91 
92 /* A B-tree consists of a file with extension GLASS_TABLE_EXTENSION divided
93  into a sequence of equal sized blocks, numbered 0, 1, 2 ... some of which
94  are free, some in use. Those in use are arranged in a tree. Lists of
95  currently unused blocks are stored in a freelist which is itself stored in
96  unused blocks.
97 
98  Some "root info" is needed to locate a particular revision of the B-tree
99  - this is stored in the "version file" (we store this data for all tables
100  at a particular revision together).
101 
102  Each block, b, has a structure like this:
103 
104  R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
105  <---------- D ----------> <-M->
106 
107  And then,
108 
109  R = REVISION(b) is the revision number the B-tree had when the block was
110  written into the DB file.
111  L = GET_LEVEL(b) is the level of the block, which is the number of levels
112  towards the root of the B-tree structure. So leaf blocks
113  have level 0 and the one root block has the highest level
114  equal to the number of levels in the B-tree. For blocks
115  storing the freelist, the level is set to 254.
116  M = MAX_FREE(b) is the size of the gap between the end of the directory and
117  the first item of data. (It is not necessarily the maximum
118  size among the bits of space that are free, but I can't
119  think of a better name.)
120  T = TOTAL_FREE(b)is the total amount of free space left in b.
121  D = DIR_END(b) gives the offset to the end of the directory.
122 
123  o1, o2 ... oN are a directory of offsets to the N items held in the block.
124  The items are key-tag pairs, and as they occur in the directory are ordered
125  by the keys.
126 
127  An item has this form:
128 
129  I K key X tag
130  ←K→
131  <------I---->
132 
133  A long tag presented through the API is split up into C pieces small enough
134  to be accommodated in the blocks of the B-tree. The key is extended to
135  include a counter, x, which runs from 1 to C. The key is preceded by a
136  length, K, and the whole item with a length, I, as depicted above. The
137  upper bits of I encode a flag indicating if this item is compressed, and a
138  flag saying if this is the last piece of a tag (i.e. if x == C).
139 
140  Here are the corresponding definitions:
141 
142 */
143 
146 #define SEQ_START_POINT (-10)
147 
148 /* Note use of the limits.h values:
149  UCHAR_MAX = 255, an unsigned with all bits set, and
150  CHAR_BIT = 8, the number of bits per byte
151 
152  BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
153  bytes -- 64K effectively.
154 */
155 
156 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
157 
159 void
160 GlassTable::read_block(uint4 n, uint8_t * p) const
161 {
162  // Log the value of p, not the contents of the block it points to...
163  LOGCALL_VOID(DB, "GlassTable::read_block", n | (void*)p);
164  if (rare(handle == -2))
166  AssertRel(n,<,free_list.get_first_unused_block());
167 
168  io_read_block(handle, reinterpret_cast<char *>(p), block_size, n, offset);
169 
170  if (GET_LEVEL(p) != LEVEL_FREELIST) {
171  int dir_end = DIR_END(p);
172  if (rare(dir_end < DIR_START || unsigned(dir_end) > block_size)) {
173  string msg("dir_end invalid in block ");
174  msg += str(n);
175  throw Xapian::DatabaseCorruptError(msg);
176  }
177  }
178 }
179 
190 void
191 GlassTable::write_block(uint4 n, const uint8_t * p, bool appending) const
192 {
193  LOGCALL_VOID(DB, "GlassTable::write_block", n | p | appending);
194  Assert(writable);
195  /* Check that n is in range. */
196  AssertRel(n,<,free_list.get_first_unused_block());
197 
198  /* don't write to non-free */
199  // FIXME: We can no longer check this easily.
200  // AssertParanoid(free_list.block_free_at_start(block_size, n));
201 
202  /* write revision is okay */
203  AssertEqParanoid(REVISION(p), revision_number + 1);
204 
205  if (appending) {
206  // In the case of the freelist, new entries can safely be written into
207  // the space at the end of the latest freelist block, as that's unused
208  // by previous revisions. It is also safe to write into a block which
209  // wasn't used in the previous revision.
210  //
211  // It's only when we start a new freelist block that we need to worry
212  // about invalidating old revisions.
213  } else if (flags & Xapian::DB_DANGEROUS) {
214  // FIXME: We should somehow flag this to prevent readers opening the
215  // database. Removing "iamglass" or setting a flag in it doesn't deal
216  // with any existing readers though. Perhaps we want to have readers
217  // read lock and try to take an exclusive lock here?
218  }
219 
220  const char * p_char = reinterpret_cast<const char *>(p);
221  io_write_block(handle, p_char, block_size, n, offset);
222 
223  if (!changes_obj) return;
224 
225  unsigned char v;
226  // FIXME: track table_type in this class?
227  if (strcmp(tablename, "position") == 0) {
228  v = int(Glass::POSITION);
229  } else if (strcmp(tablename, "postlist") == 0) {
230  v = int(Glass::POSTLIST);
231  } else if (strcmp(tablename, "docdata") == 0) {
232  v = int(Glass::DOCDATA);
233  } else if (strcmp(tablename, "spelling") == 0) {
234  v = int(Glass::SPELLING);
235  } else if (strcmp(tablename, "synonym") == 0) {
236  v = int(Glass::SYNONYM);
237  } else if (strcmp(tablename, "termlist") == 0) {
238  v = int(Glass::TERMLIST);
239  } else {
240  return; // FIXME
241  }
242 
243  if (block_size == 2048) {
244  v |= 0 << 3;
245  } else if (block_size == 4096) {
246  v |= 1 << 3;
247  } else if (block_size == 8192) {
248  v |= 2 << 3;
249  } else if (block_size == 16384) {
250  v |= 3 << 3;
251  } else if (block_size == 32768) {
252  v |= 4 << 3;
253  } else if (block_size == 65536) {
254  v |= 5 << 3;
255  } else {
256  return; // FIXME
257  }
258 
259  string buf;
260  buf += char(v);
261  // Write the block number to the file
262  pack_uint(buf, n);
263 
264  changes_obj->write_block(buf);
265  changes_obj->write_block(reinterpret_cast<const char *>(p), block_size);
266 }
267 
268 /* A note on cursors:
269 
270  Each B-tree level has a corresponding array element C[j] in a
271  cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
272  root block level. Within a level j,
273 
274  C[j].p addresses the block
275  C[j].c is the offset into the directory entry in the block
276  C[j].n is the number of the block at C[j].p
277 
278  A look up in the B-tree causes navigation of the blocks starting
279  from the root. In each block, p, we find an offset, c, to an item
280  which gives the number, n, of the block for the next level. This
281  leads to an array of values p,c,n which are held inside the cursor.
282 
283  Any Btree object B has a built-in cursor, at B->C. But other cursors may
284  be created. If BC is a created cursor, BC->C is the cursor in the
285  sense given above, and BC->B is the handle for the B-tree again.
286 */
287 
288 void
290 {
291  LOGCALL_VOID(DB, "GlassTable::set_overwritten", NO_ARGS);
292  // If we're writable, there shouldn't be another writer who could cause
293  // overwritten to be flagged, so that's a DatabaseCorruptError.
294  if (writable)
295  throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
296  throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
297 }
298 
299 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
300  C, writing the block currently at C[j] back to disk if necessary.
301  Note that
302 
303  C[j].rewrite
304 
305  is true iff C[j].n is different from block n in file DB. If it is
306  false no rewriting is necessary.
307 */
308 
309 void
311 {
312  LOGCALL_VOID(DB, "GlassTable::block_to_cursor", (void*)C_ | j | n);
313  if (n == C_[j].get_n()) return;
314 
315  if (writable && C_[j].rewrite) {
316  Assert(C == C_);
317  write_block(C_[j].get_n(), C_[j].get_p());
318  C_[j].rewrite = false;
319  }
320 
321  // Check if the block is in the built-in cursor (potentially in
322  // modified form).
323  const uint8_t * p;
324  if (n == C[j].get_n()) {
325  p = C_[j].clone(C[j]);
326  } else {
327  uint8_t * q = C_[j].init(block_size);
328  read_block(n, q);
329  p = q;
330  C_[j].set_n(n);
331  }
332 
333  if (j < level) {
334  /* unsigned comparison */
335  if (rare(REVISION(p) > REVISION(C_[j + 1].get_p()))) {
336  set_overwritten();
337  return;
338  }
339  }
340 
341  if (rare(j != GET_LEVEL(p))) {
342  string msg = "Expected block ";
343  msg += str(n);
344  msg += " to be level ";
345  msg += str(j);
346  msg += ", not ";
347  msg += str(GET_LEVEL(p));
348  throw Xapian::DatabaseCorruptError(msg);
349  }
350 }
351 
373 void
375 {
376  LOGCALL_VOID(DB, "GlassTable::alter", NO_ARGS);
377  Assert(writable);
378  if (flags & Xapian::DB_DANGEROUS) {
379  C[0].rewrite = true;
380  return;
381  }
382  int j = 0;
383  while (true) {
384  if (C[j].rewrite) return; /* all new, so return */
385  C[j].rewrite = true;
386 
387  glass_revision_number_t rev = REVISION(C[j].get_p());
388  if (rev == revision_number + 1) {
389  return;
390  }
391  Assert(rev < revision_number + 1);
392  uint4 n = C[j].get_n();
393  free_list.mark_block_unused(this, block_size, n);
394  SET_REVISION(C[j].get_modifiable_p(block_size), revision_number + 1);
395  n = free_list.get_block(this, block_size);
396  C[j].set_n(n);
397 
398  if (j == level) return;
399  j++;
400  BItem_wr(C[j].get_modifiable_p(block_size), C[j].c).set_block_given_by(n);
401  }
402 }
403 
417 int
418 GlassTable::find_in_leaf(const uint8_t * p, LeafItem item, int c, bool& exact)
419 {
420  LOGCALL_STATIC(DB, int, "GlassTable::find_in_leaf", (const void*)p | (const void *)item.get_address() | c | Literal("bool&"));
421  // c should be odd (either -1, or an even offset from DIR_START).
422  Assert((c & 1) == 1);
423  int i = DIR_START;
424  i -= D2;
425  if (c != -1) {
426  AssertRel(i,<=,c);
427  }
428  int j = DIR_END(p);
429 
430  if (c != -1) {
431  if (c < j && i < c) {
432  int r = compare(LeafItem(p, c), item);
433  if (r == 0) {
434  exact = true;
435  return c;
436  }
437  if (r < 0) i = c;
438  }
439  c += D2;
440  if (c < j && i < c) {
441  int r = compare(item, LeafItem(p, c));
442  if (r == 0) {
443  exact = true;
444  return c;
445  }
446  if (r < 0) j = c;
447  }
448  }
449 
450  while (j - i > D2) {
451  int k = i + ((j - i) / (D2 * 2)) * D2; /* mid way */
452  int r = compare(item, LeafItem(p, k));
453  if (r < 0) {
454  j = k;
455  } else {
456  i = k;
457  if (r == 0) {
458  exact = true;
459  break;
460  }
461  }
462  }
463  AssertRel(DIR_START - D2,<=,i);
464  AssertRel(i,<,DIR_END(p));
465  RETURN(i);
466 }
467 
468 template<typename ITEM> int
469 find_in_branch_(const uint8_t * p, ITEM item, int c)
470 {
471  // c should be odd (either -1, or an even offset from DIR_START).
472  Assert((c & 1) == 1);
473  int i = DIR_START;
474  if (c != -1) {
475  AssertRel(i,<=,c);
476  }
477  int j = DIR_END(p);
478 
479  if (c != -1) {
480  if (c < j && i < c) {
481  int r = compare(BItem(p, c), item);
482  if (r == 0) return c;
483  if (r < 0) i = c;
484  }
485  c += D2;
486  if (c < j && i < c) {
487  int r = compare(item, BItem(p, c));
488  if (r == 0) return c;
489  if (r < 0) j = c;
490  }
491  }
492 
493  while (j - i > D2) {
494  int k = i + ((j - i) / (D2 * 2)) * D2; /* mid way */
495  int r = compare(item, BItem(p, k));
496  if (r < 0) {
497  j = k;
498  } else {
499  i = k;
500  if (r == 0) break;
501  }
502  }
503  AssertRel(DIR_START,<=,i);
504  AssertRel(i,<,DIR_END(p));
505  return i;
506 }
507 
508 int
509 GlassTable::find_in_branch(const uint8_t * p, LeafItem item, int c)
510 {
511  LOGCALL_STATIC(DB, int, "GlassTable::find_in_branch", (const void*)p | (const void *)item.get_address() | c);
512  RETURN(find_in_branch_(p, item, c));
513 }
514 
515 int
516 GlassTable::find_in_branch(const uint8_t * p, BItem item, int c)
517 {
518  LOGCALL_STATIC(DB, int, "GlassTable::find_in_branch", (const void*)p | (const void *)item.get_address() | c);
519  RETURN(find_in_branch_(p, item, c));
520 }
521 
529 bool
531 {
532  LOGCALL(DB, bool, "GlassTable::find", (void*)C_);
533  // Note: the parameter is needed when we're called by GlassCursor
534  const uint8_t * p;
535  int c;
536  for (int j = level; j > 0; --j) {
537  p = C_[j].get_p();
538  c = find_in_branch(p, kt, C_[j].c);
539 #ifdef BTREE_DEBUG_FULL
540  printf("Block in GlassTable:find - code position 1");
541  report_block_full(j, C_[j].get_n(), p);
542 #endif /* BTREE_DEBUG_FULL */
543  C_[j].c = c;
544  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
545  }
546  p = C_[0].get_p();
547  bool exact = false;
548  c = find_in_leaf(p, kt, C_[0].c, exact);
549 #ifdef BTREE_DEBUG_FULL
550  printf("Block in GlassTable:find - code position 2");
551  report_block_full(0, C_[0].get_n(), p);
552 #endif /* BTREE_DEBUG_FULL */
553  C_[0].c = c;
554  RETURN(exact);
555 }
556 
562 void
564 {
565  LOGCALL_VOID(DB, "GlassTable::compact", (void*)p);
566  Assert(p != buffer);
567  Assert(writable);
568  int e = block_size;
569  uint8_t * b = buffer;
570  int dir_end = DIR_END(p);
571  if (GET_LEVEL(p) == 0) {
572  // Leaf.
573  for (int c = DIR_START; c < dir_end; c += D2) {
574  LeafItem item(p, c);
575  int l = item.size();
576  e -= l;
577  memcpy(b + e, item.get_address(), l);
578  LeafItem_wr::setD(p, c, e); /* reform in b */
579  }
580  } else {
581  // Branch.
582  for (int c = DIR_START; c < dir_end; c += D2) {
583  BItem item(p, c);
584  int l = item.size();
585  e -= l;
586  memcpy(b + e, item.get_address(), l);
587  BItem_wr::setD(p, c, e); /* reform in b */
588  }
589  }
590  memcpy(p + e, b + e, block_size - e); /* copy back */
591  e -= dir_end;
592  SET_TOTAL_FREE(p, e);
593  SET_MAX_FREE(p, e);
594 }
595 
599 void
601 {
602  LOGCALL_VOID(DB, "GlassTable::split_root", split_n);
603  /* gain a level */
604  ++level;
605 
606  /* check level overflow - this isn't something that should ever happen
607  * but deserves more than an Assert()... */
608  if (level == BTREE_CURSOR_LEVELS) {
609  throw Xapian::DatabaseCorruptError("Btree has grown impossibly large (" STRINGIZE(BTREE_CURSOR_LEVELS) " levels)");
610  }
611 
612  uint8_t * q = C[level].init(block_size);
613  memset(q, 0, block_size);
614  C[level].c = DIR_START;
615  C[level].set_n(free_list.get_block(this, block_size));
616  C[level].rewrite = true;
617  SET_REVISION(q, revision_number + 1);
618  SET_LEVEL(q, level);
620  compact(q); /* to reset TOTAL_FREE, MAX_FREE */
621 
622  /* form a null key in b with a pointer to the old root */
623  uint8_t b[10]; /* 7 is exact */
624  BItem_wr item(b);
625  item.form_null_key(split_n);
626  add_branch_item(item, level);
627 }
628 
644 void
646 {
647  LOGCALL_VOID(DB, "GlassTable::enter_key_above_leaf", Literal("previtem") | Literal("newitem"));
648  Assert(writable);
649  Assert(compare(previtem, newitem) < 0);
650 
651  Key prevkey = previtem.key();
652  Key newkey = newitem.key();
653  int new_comp = newitem.component_of();
654 
655  uint4 blocknumber = C[0].get_n();
656 
657  // FIXME update to use Key
658  // Keys are truncated here: but don't truncate the count at the end away.
659  const int newkey_len = newkey.length();
660  AssertRel(newkey_len,>,0);
661 
662  // Truncate the key to the minimal key which differs from prevkey,
663  // the preceding key in the block.
664  int i = 0;
665  const int min_len = min(newkey_len, prevkey.length());
666  while (i < min_len && prevkey[i] == newkey[i]) {
667  i++;
668  }
669 
670  // Want one byte of difference.
671  if (i < newkey_len) i++;
672 
673  // Enough space for a branch item with maximum length key.
674  uint8_t b[BYTES_PER_BLOCK_NUMBER + K1 + 255 + X2];
675  BItem_wr item(b);
676  AssertRel(i, <=, 255);
677  item.set_truncated_key_and_block(newkey, new_comp, i, blocknumber);
678 
679  // The split block gets inserted into the parent after the pointer to the
680  // current child.
681  AssertEq(C[1].c, find_in_branch(C[1].get_p(), item, C[1].c));
682  C[1].c += D2;
683  C[1].rewrite = true; /* a subtle point: this *is* required. */
684  add_branch_item(item, 1);
685 }
686 
700 void
702 {
703  LOGCALL_VOID(DB, "GlassTable::enter_key_above_branch", j | Literal("newitem"));
704  Assert(writable);
705  AssertRel(j,>,1);
706 
707  /* Can't truncate between branch levels, since the separated keys
708  * are in at the leaf level, and truncating again will change the
709  * branch point.
710  */
711 
712  uint4 blocknumber = C[j - 1].get_n();
713 
714  // Enough space for a branch item with maximum length key.
715  uint8_t b[BYTES_PER_BLOCK_NUMBER + K1 + 255 + X2];
716  BItem_wr item(b);
717  item.set_key_and_block(newitem.key(), blocknumber);
718 
719  // The split block gets inserted into the parent after the pointer to the
720  // current child.
721  AssertEq(C[j].c, find_in_branch(C[j].get_p(), item, C[j].c));
722  C[j].c += D2;
723  C[j].rewrite = true; /* a subtle point: this *is* required. */
724  add_branch_item(item, j);
725 }
726 
731 int
732 GlassTable::mid_point(uint8_t * p) const
733 {
734  LOGCALL(DB, int, "GlassTable::mid_point", (void*)p);
735  int n = 0;
736  int dir_end = DIR_END(p);
737  int size = block_size - TOTAL_FREE(p) - dir_end;
738  for (int c = DIR_START; c < dir_end; c += D2) {
739  int l;
740  if (GET_LEVEL(p) == 0) {
741  l = LeafItem(p, c).size();
742  } else {
743  l = BItem(p, c).size();
744  }
745  n += 2 * l;
746  if (n >= size) {
747  if (l < n - size) RETURN(c);
748  RETURN(c + D2);
749  }
750  }
751 
752  /* This shouldn't happen, as the sum of the item sizes should be the same
753  * as the value calculated in size, so assert but return a sane value just
754  * in case. */
755  Assert(false);
756  RETURN(dir_end);
757 }
758 
768 void
769 GlassTable::add_item_to_leaf(uint8_t * p, LeafItem kt_, int c)
770 {
771  LOGCALL_VOID(DB, "GlassTable::add_item_to_leaf", (void*)p | Literal("kt_") | c);
772  Assert(writable);
773  int dir_end = DIR_END(p);
774  int kt_len = kt_.size();
775  int needed = kt_len + D2;
776  int new_total = TOTAL_FREE(p) - needed;
777  int new_max = MAX_FREE(p) - needed;
778 
779  Assert(new_total >= 0);
780 
781  AssertRel(MAX_FREE(p),>=,needed);
782 
783  AssertRel(DIR_START,<=,c);
784  AssertRel(c,<=,dir_end);
785 
786  memmove(p + c + D2, p + c, dir_end - c);
787  dir_end += D2;
788  SET_DIR_END(p, dir_end);
789 
790  int o = dir_end + new_max;
791  LeafItem_wr::setD(p, c, o);
792  memmove(p + o, kt_.get_address(), kt_len);
793 
794  SET_MAX_FREE(p, new_max);
795 
796  SET_TOTAL_FREE(p, new_total);
797 }
798 
808 void
809 GlassTable::add_item_to_branch(uint8_t * p, BItem kt_, int c)
810 {
811  LOGCALL_VOID(DB, "GlassTable::add_item_to_branch", (void*)p | Literal("kt_") | c);
812  Assert(writable);
813  int dir_end = DIR_END(p);
814  int kt_len = kt_.size();
815  int needed = kt_len + D2;
816  int new_total = TOTAL_FREE(p) - needed;
817  int new_max = MAX_FREE(p) - needed;
818 
819  Assert(new_total >= 0);
820 
821  AssertRel(MAX_FREE(p),>=,needed);
822 
823  AssertRel(DIR_START,<=,c);
824  AssertRel(c,<=,dir_end);
825 
826  memmove(p + c + D2, p + c, dir_end - c);
827  dir_end += D2;
828  SET_DIR_END(p, dir_end);
829 
830  int o = dir_end + new_max;
831  BItem_wr::setD(p, c, o);
832  memmove(p + o, kt_.get_address(), kt_len);
833 
834  SET_MAX_FREE(p, new_max);
835 
836  SET_TOTAL_FREE(p, new_total);
837 }
838 
844 void
846 {
847  LOGCALL_VOID(DB, "GlassTable::add_leaf_item", Literal("kt_"));
848  Assert(writable);
849  uint8_t * p = C[0].get_modifiable_p(block_size);
850  int c = C[0].c;
851  uint4 n;
852 
853  int needed = kt_.size() + D2;
854  if (TOTAL_FREE(p) < needed) {
855  int m;
856  // Prepare to split p. After splitting, the block is in two halves, the
857  // lower half is split_p, the upper half p again. add_to_upper_half
858  // becomes true when the item gets added to p, false when it gets added
859  // to split_p.
860 
861  if (seq_count < 0) {
862  // If we're not in sequential mode, we split at the mid point
863  // of the node.
864  m = mid_point(p);
865  } else {
866  // During sequential addition, split at the insert point
867  AssertRel(c,>=,DIR_START);
868  m = c;
869  }
870 
871  uint4 split_n = C[0].get_n();
872  C[0].set_n(free_list.get_block(this, block_size));
873 
874  memcpy(split_p, p, block_size); // replicate the whole block in split_p
875  SET_DIR_END(split_p, m);
876  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
877 
878  {
879  int residue = DIR_END(p) - m;
880  int new_dir_end = DIR_START + residue;
881  memmove(p + DIR_START, p + m, residue);
882  SET_DIR_END(p, new_dir_end);
883  }
884 
885  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
886 
887  bool add_to_upper_half;
888  if (seq_count < 0) {
889  add_to_upper_half = (c >= m);
890  } else {
891  // And add item to lower half if split_p has room, otherwise upper
892  // half
893  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
894  }
895 
896  if (add_to_upper_half) {
897  c -= (m - DIR_START);
898  Assert(seq_count < 0 || c <= DIR_START + D2);
899  Assert(c >= DIR_START);
900  Assert(c <= DIR_END(p));
901  add_item_to_leaf(p, kt_, c);
902  n = C[0].get_n();
903  } else {
904  Assert(c >= DIR_START);
905  Assert(c <= DIR_END(split_p));
906  add_item_to_leaf(split_p, kt_, c);
907  n = split_n;
908  }
909  write_block(split_n, split_p);
910 
911  // Check if we're splitting the root block.
912  if (0 == level) split_root(split_n);
913 
914  /* Enter a separating key at level 1 between */
915  /* the last key of block split_p, and the first key of block p */
916  enter_key_above_leaf(LeafItem(split_p, DIR_END(split_p) - D2),
917  LeafItem(p, DIR_START));
918  } else {
919  AssertRel(TOTAL_FREE(p),>=,needed);
920 
921  if (MAX_FREE(p) < needed) {
922  compact(p);
923  AssertRel(MAX_FREE(p),>=,needed);
924  }
925 
926  add_item_to_leaf(p, kt_, c);
927  n = C[0].get_n();
928  }
929 
930  changed_n = n;
931  changed_c = c;
932 }
933 
939 void
941 {
942  LOGCALL_VOID(DB, "GlassTable::add_branch_item", Literal("kt_") | j);
943  Assert(writable);
944  uint8_t * p = C[j].get_modifiable_p(block_size);
945  int c = C[j].c;
946 
947  int needed = kt_.size() + D2;
948  if (TOTAL_FREE(p) < needed) {
949  int m;
950  // Prepare to split p. After splitting, the block is in two halves, the
951  // lower half is split_p, the upper half p again. add_to_upper_half
952  // becomes true when the item gets added to p, false when it gets added
953  // to split_p.
954 
955  if (seq_count < 0) {
956  // If we're not in sequential mode, we split at the mid point
957  // of the node.
958  m = mid_point(p);
959  } else {
960  // During sequential addition, split at the insert point
961  AssertRel(c,>=,DIR_START);
962  m = c;
963  }
964 
965  uint4 split_n = C[j].get_n();
966  C[j].set_n(free_list.get_block(this, block_size));
967 
968  memcpy(split_p, p, block_size); // replicate the whole block in split_p
969  SET_DIR_END(split_p, m);
970  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
971 
972  {
973  int residue = DIR_END(p) - m;
974  int new_dir_end = DIR_START + residue;
975  memmove(p + DIR_START, p + m, residue);
976  SET_DIR_END(p, new_dir_end);
977  }
978 
979  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
980 
981  bool add_to_upper_half;
982  if (seq_count < 0) {
983  add_to_upper_half = (c >= m);
984  } else {
985  // And add item to lower half if split_p has room, otherwise upper
986  // half
987  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
988  }
989 
990  if (add_to_upper_half) {
991  c -= (m - DIR_START);
992  Assert(seq_count < 0 || c <= DIR_START + D2);
993  Assert(c >= DIR_START);
994  Assert(c <= DIR_END(p));
995  add_item_to_branch(p, kt_, c);
996  } else {
997  Assert(c >= DIR_START);
998  Assert(c <= DIR_END(split_p));
999  add_item_to_branch(split_p, kt_, c);
1000  }
1001  write_block(split_n, split_p);
1002 
1003  // Check if we're splitting the root block.
1004  if (j == level) split_root(split_n);
1005 
1006  /* Enter a separating key at level j + 1 between */
1007  /* the last key of block split_p, and the first key of block p */
1008  enter_key_above_branch(j + 1, BItem(p, DIR_START));
1009 
1010  // In branch levels, we can make the first key of block p null and
1011  // save a bit of disk space. Other redundant keys will still creep
1012  // in though.
1013  BItem_wr item(p, DIR_START);
1014  int new_total_free = TOTAL_FREE(p) + item.key().length();
1015  item.form_null_key(item.block_given_by());
1016  SET_TOTAL_FREE(p, new_total_free);
1017  } else {
1018  AssertRel(TOTAL_FREE(p),>=,needed);
1019 
1020  if (MAX_FREE(p) < needed) {
1021  compact(p);
1022  AssertRel(MAX_FREE(p),>=,needed);
1023  }
1024 
1025  add_item_to_branch(p, kt_, c);
1026  }
1027 }
1028 
1036 void
1038 {
1039  LOGCALL_VOID(DB, "GlassTable::delete_leaf_item", repeatedly);
1040  Assert(writable);
1041  uint8_t * p = C[0].get_modifiable_p(block_size);
1042  int c = C[0].c;
1043  AssertRel(DIR_START,<=,c);
1044  AssertRel(c,<,DIR_END(p));
1045  int kt_len = LeafItem(p, c).size(); /* size of the item to be deleted */
1046  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
1047 
1048  memmove(p + c, p + c + D2, dir_end - c);
1049  SET_DIR_END(p, dir_end);
1050  SET_MAX_FREE(p, MAX_FREE(p) + D2);
1051  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
1052 
1053  if (!repeatedly) return;
1054  if (0 < level) {
1055  if (dir_end == DIR_START) {
1056  free_list.mark_block_unused(this, block_size, C[0].get_n());
1057  C[0].rewrite = false;
1058  C[0].set_n(BLK_UNUSED);
1059  C[1].rewrite = true; /* *is* necessary */
1060  delete_branch_item(1);
1061  }
1062  }
1063 }
1064 
1071 void
1073 {
1074  LOGCALL_VOID(DB, "GlassTable::delete_branch_item", j);
1075  Assert(writable);
1076  uint8_t * p = C[j].get_modifiable_p(block_size);
1077  int c = C[j].c;
1078  AssertRel(DIR_START,<=,c);
1079  AssertRel(c,<,DIR_END(p));
1080  int kt_len = BItem(p, c).size(); /* size of the item to be deleted */
1081  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
1082 
1083  memmove(p + c, p + c + D2, dir_end - c);
1084  SET_DIR_END(p, dir_end);
1085  SET_MAX_FREE(p, MAX_FREE(p) + D2);
1086  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
1087 
1088  if (j < level) {
1089  if (dir_end == DIR_START) {
1090  free_list.mark_block_unused(this, block_size, C[j].get_n());
1091  C[j].rewrite = false;
1092  C[j].set_n(BLK_UNUSED);
1093  C[j + 1].rewrite = true; /* *is* necessary */
1094  delete_branch_item(j + 1);
1095  }
1096  } else {
1097  Assert(j == level);
1098  while (dir_end == DIR_START + D2 && level > 0) {
1099  /* single item in the root block, so lose a level */
1100  uint4 new_root = BItem(C[level].get_p(), DIR_START).block_given_by();
1101  free_list.mark_block_unused(this, block_size, C[level].get_n());
1102  C[level].destroy();
1103  level--;
1104 
1105  block_to_cursor(C, level, new_root);
1106 
1107  dir_end = DIR_END(C[level].get_p()); /* prepare for the loop */
1108  }
1109  }
1110 }
1111 
1112 /* debugging aid:
1113 static addcount = 0;
1114 */
1115 
1146 int
1148 {
1149  LOGCALL(DB, int, "GlassTable::add_kt", found);
1150  Assert(writable);
1151 
1152  /*
1153  {
1154  printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
1155  print_bytes(kt[I2], kt + I2 + K1); putchar('\n');
1156  }
1157  */
1158  alter();
1159 
1160  int result = 0;
1161  if (found) { /* replacement */
1162  seq_count = SEQ_START_POINT;
1163  sequential = false;
1164 
1165  uint8_t * p = C[0].get_modifiable_p(block_size);
1166  int c = C[0].c;
1167  AssertRel(DIR_START,<=,c);
1168  AssertRel(c,<,DIR_END(p));
1169  LeafItem item(p, c);
1170  int kt_size = kt.size();
1171  int needed = kt_size - item.size();
1172 
1173  result = item.last_component() ? 2 : 1;
1174 
1175  if (needed <= 0) {
1176  /* simple replacement */
1177  memmove(const_cast<uint8_t *>(item.get_address()),
1178  kt.get_address(), kt_size);
1179  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
1180  } else {
1181  /* new item into the block's freespace */
1182  int new_max = MAX_FREE(p) - kt_size;
1183  if (new_max >= 0) {
1184  int o = DIR_END(p) + new_max;
1185  memmove(p + o, kt.get_address(), kt_size);
1186  LeafItem_wr::setD(p, c, o);
1187  SET_MAX_FREE(p, new_max);
1188  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
1189  } else {
1190  /* do it the long way */
1191  delete_leaf_item(false);
1192  add_leaf_item(kt);
1193  }
1194  }
1195  } else {
1196  /* addition */
1197  if (changed_n == C[0].get_n() && changed_c == C[0].c) {
1198  if (seq_count < 0) seq_count++;
1199  } else {
1200  seq_count = SEQ_START_POINT;
1201  sequential = false;
1202  }
1203  C[0].c += D2;
1204  add_leaf_item(kt);
1205  }
1206  RETURN(result);
1207 }
1208 
1209 /* delete_kt() corresponds to add_kt(found), but there are only
1210  two cases: if the key is not found nothing is done, and if it is
1211  found the corresponding item is deleted with delete_leaf_item.
1212 
1213  Returns:
1214  0 : nothing to delete
1215  1 : deleted kt
1216  2 : deleted kt and it was the final one
1217 */
1218 
1219 int
1221 {
1222  LOGCALL(DB, int, "GlassTable::delete_kt", NO_ARGS);
1223  Assert(writable);
1224 
1225  seq_count = SEQ_START_POINT;
1226  sequential = false;
1227 
1228  if (!find(C))
1229  return 0;
1230 
1231  int result = LeafItem(C[0].get_p(), C[0].c).last_component() ? 2 : 1;
1232  alter();
1233  delete_leaf_item(true);
1234 
1235  RETURN(result);
1236 }
1237 
1238 /* GlassTable::form_key(key) treats address kt as an item holder and fills in
1239 the key part:
1240 
1241  (I) K key c (C tag)
1242 
1243 The bracketed parts are left blank. The key is filled in with key_len bytes and
1244 K set accordingly. c is set to 1.
1245 */
1246 
1247 void GlassTable::form_key(const string & key) const
1248 {
1249  LOGCALL_VOID(DB, "GlassTable::form_key", key);
1250  kt.form_key(key);
1251 }
1252 
1253 /* GlassTable::add(key, tag) adds the key/tag item to the
1254  B-tree, replacing any existing item with the same key.
1255 
1256  For a long tag, we end up having to add m components, of the form
1257 
1258  key 1 m tag1
1259  key 2 m tag2
1260  ...
1261  key m m tagm
1262 
1263  and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
1264  n components of the form
1265 
1266  key 1 n TAG1
1267  key 2 n TAG2
1268  ...
1269  key n n TAGn
1270 
1271  and n may be greater than, equal to, or less than m. These cases are dealt
1272  with in the code below. If m < n for example, we end up with a series of
1273  deletions.
1274 */
1275 
1276 void
1277 GlassTable::add(const string &key, string tag, bool already_compressed)
1278 {
1279  LOGCALL_VOID(DB, "GlassTable::add", key | tag | already_compressed);
1280  Assert(writable);
1281 
1282  if (handle < 0) {
1283  if (handle == -2) {
1285  }
1286  RootInfo root_info;
1287  root_info.init(block_size, compress_min);
1288  do_open_to_write(&root_info);
1289  }
1290 
1291  form_key(key);
1292 
1293  const char* tag_data = tag.data();
1294  size_t tag_size = tag.size();
1295 
1296  bool compressed = false;
1297  if (already_compressed) {
1298  compressed = true;
1299  } else if (compress_min > 0 && tag_size > compress_min) {
1300  const char * res = comp_stream.compress(tag_data, &tag_size);
1301  if (res) {
1302  compressed = true;
1303  tag_data = res;
1304  }
1305  }
1306 
1307  // sort of matching kt.append_chunk(), but setting the chunk
1308  const size_t cd = kt.key().length() + K1 + I2 + X2; // offset to the tag data
1309  const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1310  size_t first_L = L + X2; // - amount for tag1 (we don't store X there)
1311  bool found = find(C);
1312  if (tag_size <= first_L) {
1313  // The whole tag clearly fits in one item, so no need to make this
1314  // complicated.
1315  first_L = tag_size;
1316  } else if (!found) {
1317  const uint8_t * p = C[0].get_p();
1318  size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1319  if (n > D2 + cd) {
1320  n -= (D2 + cd);
1321  // if n >= last then fully filling this block won't produce
1322  // an extra item, so we might as well do this even if
1323  // full_compaction isn't active.
1324  //
1325  // In the full_compaction case, it turns out we shouldn't always
1326  // try to fill every last byte. Doing so can actually increase the
1327  // total space required (I believe this effect is due to longer
1328  // dividing keys being required in the index blocks). Empirically,
1329  // n >= key.size() + K appears a good criterion for K ~= 34. This
1330  // seems to save about 0.2% in total database size over always
1331  // splitting the tag. It'll also give be slightly faster retrieval
1332  // as we can avoid reading an extra block occasionally.
1333  size_t last = (tag_size - X2) % L;
1334  if (n >= last || (full_compaction && n >= key.size() + 34)) {
1335  // first_L < max_item_size + D2 - D2 - cd
1336  // Total size of first item = cd + first_L < max_item_size
1337  first_L = n + X2;
1338  }
1339  }
1340  }
1341 
1342  // There are m items to add.
1343  int m = (tag_size - first_L + L - 1) / L + 1;
1344  /* FIXME: sort out this error higher up and turn this into
1345  * an assert.
1346  */
1347  if (m >= BYTE_PAIR_RANGE)
1348  throw Xapian::UnimplementedError("Can't handle insanely large tags");
1349 
1350  size_t o = 0; // Offset into the tag
1351  size_t residue = tag_size; // Bytes of the tag remaining to add in
1352  bool replacement = false; // Has there been a replacement?
1353  bool components_to_del = false; // Are there components to delete?
1354  int i;
1355  for (i = 1; i <= m; ++i) {
1356  size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1357  size_t this_cd = (i == 1 ? cd - X2 : cd);
1358  Assert(this_cd + l <= block_size);
1359  Assert(o + l <= tag_size);
1360  kt.set_tag(this_cd, tag_data + o, l, compressed, i, m);
1361 
1362  o += l;
1363  residue -= l;
1364 
1365  if (i > 1) found = find(C);
1366  int result = add_kt(found);
1367  if (result) replacement = true;
1368  components_to_del = (result == 1);
1369  }
1370  AssertEq(o, tag_size);
1371  if (components_to_del) {
1372  i = m;
1373  do {
1374  kt.set_component_of(++i);
1375  } while (delete_kt() == 1);
1376  }
1377  if (!replacement) ++item_count;
1378  Btree_modified = true;
1379  if (cursor_created_since_last_modification) {
1380  cursor_created_since_last_modification = false;
1381  ++cursor_version;
1382  }
1383 }
1384 
1385 /* GlassTable::del(key) returns false if the key is not in the B-tree,
1386  otherwise deletes it and returns true.
1387 
1388  Again, this is parallel to GlassTable::add, but simpler in form.
1389 */
1390 
1391 bool
1392 GlassTable::del(const string &key)
1393 {
1394  LOGCALL(DB, bool, "GlassTable::del", key);
1395  Assert(writable);
1396 
1397  if (handle < 0) {
1398  if (handle == -2) {
1400  }
1401  RETURN(false);
1402  }
1403 
1404  // We can't delete a key which we is too long for us to store.
1405  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1406 
1407  if (key.empty()) RETURN(false);
1408  form_key(key);
1409 
1410  int r = delete_kt();
1411  if (r == 0) RETURN(false);
1412  int i = 1;
1413  while (r == 1) {
1414  kt.set_component_of(++i);
1415  r = delete_kt();
1416  }
1417 
1418  item_count--;
1419  Btree_modified = true;
1420  if (cursor_created_since_last_modification) {
1421  cursor_created_since_last_modification = false;
1422  ++cursor_version;
1423  }
1424  RETURN(true);
1425 }
1426 
1427 bool
1428 GlassTable::readahead_key(const string &key) const
1429 {
1430  LOGCALL(DB, bool, "GlassTable::readahead_key", key);
1431  Assert(!key.empty());
1432 
1433  // Three cases:
1434  //
1435  // handle == -1: Lazy table in a multi-file database which isn't yet open.
1436  //
1437  // handle == -2: Table has been closed. Since the readahead is just a
1438  // hint, we can safely ignore it for a closed table.
1439  //
1440  // handle <= -3: Lazy table in a single-file database which isn't yet
1441  // open.
1442  if (handle < 0)
1443  RETURN(false);
1444 
1445  // If the table only has one level, there are no branch blocks to preread.
1446  if (level == 0)
1447  RETURN(false);
1448 
1449  form_key(key);
1450 
1451  // We'll only readahead the first level, since descending the B-tree would
1452  // require actual reads that would likely hurt performance more than help.
1453  const uint8_t * p = C[level].get_p();
1454  int c = find_in_branch(p, kt, C[level].c);
1455  uint4 n = BItem(p, c).block_given_by();
1456  // Don't preread if it's the block we last preread or already in the
1457  // cursor.
1458  if (n != last_readahead && n != C[level - 1].get_n()) {
1459  last_readahead = n;
1460  if (!io_readahead_block(handle, block_size, n, offset))
1461  RETURN(false);
1462  }
1463  RETURN(true);
1464 }
1465 
1466 bool
1467 GlassTable::get_exact_entry(const string &key, string & tag) const
1468 {
1469  LOGCALL(DB, bool, "GlassTable::get_exact_entry", key | tag);
1470  Assert(!key.empty());
1471 
1472  if (handle < 0) {
1473  if (handle == -2) {
1475  }
1476  RETURN(false);
1477  }
1478 
1479  // An oversized key can't exist, so attempting to search for it should fail.
1480  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1481 
1482  form_key(key);
1483  if (!find(C)) RETURN(false);
1484 
1485  (void)read_tag(C, &tag, false);
1486  RETURN(true);
1487 }
1488 
1489 bool
1490 GlassTable::key_exists(const string &key) const
1491 {
1492  LOGCALL(DB, bool, "GlassTable::key_exists", key);
1493  Assert(!key.empty());
1494 
1495  // An oversized key can't exist, so attempting to search for it should fail.
1496  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1497 
1498  form_key(key);
1499  RETURN(find(C));
1500 }
1501 
1502 bool
1503 GlassTable::read_tag(Glass::Cursor * C_, string *tag, bool keep_compressed) const
1504 {
1505  LOGCALL(DB, bool, "GlassTable::read_tag", Literal("C_") | tag | keep_compressed);
1506 
1507  tag->resize(0);
1508 
1509  bool first = true;
1510  bool compressed = false;
1511  bool decompress = false;
1512  while (true) {
1513  LeafItem item(C_[0].get_p(), C_[0].c);
1514  if (first) {
1515  first = false;
1516  compressed = item.get_compressed();
1517  if (compressed && !keep_compressed) {
1518  comp_stream.decompress_start();
1519  decompress = true;
1520  }
1521  }
1522  bool last = item.last_component();
1523  if (decompress) {
1524  // Decompress each chunk as we read it so we don't need both the
1525  // full compressed and uncompressed tags in memory at once.
1526  bool done = item.decompress_chunk(comp_stream, *tag);
1527  if (done != last) {
1528  throw Xapian::DatabaseCorruptError(done ?
1529  "Too many chunks of compressed data" :
1530  "Too few chunks of compressed data");
1531  }
1532  } else {
1533  item.append_chunk(tag);
1534  }
1535  if (last) break;
1536  if (!next(C_, 0)) {
1537  throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1538  }
1539  }
1540  // At this point the cursor is on the last item - calling next will move
1541  // it to the next key (GlassCursor::read_tag() relies on this).
1542 
1543  RETURN(compressed && keep_compressed);
1544 }
1545 
1546 void
1548 {
1549  LOGCALL_VOID(DB, "GlassTable::set_full_compaction", parity);
1550  Assert(writable);
1551 
1552  if (parity) seq_count = 0;
1553  full_compaction = parity;
1554 }
1555 
1557  LOGCALL(DB, GlassCursor *, "GlassTable::cursor_get", NO_ARGS);
1558  if (handle < 0) {
1559  if (handle == -2) {
1561  }
1562  RETURN(NULL);
1563  }
1564  // FIXME Ick - casting away const is nasty
1565  RETURN(new GlassCursor(const_cast<GlassTable *>(this)));
1566 }
1567 
1568 /************ B-tree opening and closing ************/
1569 
1570 void
1572 {
1573  LOGCALL_VOID(DB, "GlassTable::basic_open", root_info|rev);
1574  revision_number = rev;
1575  root = root_info->get_root();
1576  level = root_info->get_level();
1577  item_count = root_info->get_num_entries();
1578  faked_root_block = root_info->get_root_is_fake();
1579  sequential = root_info->get_sequential();
1580  const string & fl_serialised = root_info->get_free_list();
1581  if (!fl_serialised.empty()) {
1582  if (!free_list.unpack(fl_serialised))
1583  throw Xapian::DatabaseCorruptError("Bad freelist metadata");
1584  } else {
1585  free_list.reset();
1586  }
1587 
1588  compress_min = root_info->get_compress_min();
1589 
1590  /* kt holds constructed items as well as keys */
1591  kt = LeafItem_wr(zeroed_new(block_size));
1592 
1593  set_max_item_size(BLOCK_CAPACITY);
1594 
1595  for (int j = 0; j <= level; ++j) {
1596  C[j].init(block_size);
1597  }
1598 
1599  read_root();
1600 
1601  if (cursor_created_since_last_modification) {
1602  cursor_created_since_last_modification = false;
1603  ++cursor_version;
1604  }
1605 }
1606 
1607 void
1609 {
1610  LOGCALL_VOID(DB, "GlassTable::read_root", NO_ARGS);
1611  if (faked_root_block) {
1612  /* root block for an unmodified database. */
1613  uint8_t * p = C[0].init(block_size);
1614  Assert(p);
1615 
1616  /* clear block - shouldn't be necessary, but is a bit nicer,
1617  * and means that the same operations should always produce
1618  * the same database. */
1619  memset(p, 0, block_size);
1620 
1621  int o = block_size - I2 - K1;
1622  LeafItem_wr(p + o).fake_root_item();
1623 
1624  LeafItem_wr::setD(p, DIR_START, o); // its directory entry
1625  SET_DIR_END(p, DIR_START + D2);// the directory size
1626 
1627  o -= (DIR_START + D2);
1628  SET_MAX_FREE(p, o);
1629  SET_TOTAL_FREE(p, o);
1630  SET_LEVEL(p, 0);
1631 
1632  if (!writable) {
1633  /* reading - revision number doesn't matter as long as
1634  * it's not greater than the current one. */
1635  SET_REVISION(p, 0);
1636  C[0].set_n(0);
1637  } else {
1638  /* writing - */
1639  SET_REVISION(p, revision_number + 1);
1640  C[0].set_n(free_list.get_block(this, block_size));
1641  C[0].rewrite = true;
1642  }
1643  } else {
1644  /* using a root block stored on disk */
1645  block_to_cursor(C, level, root);
1646 
1647  if (REVISION(C[level].get_p()) > revision_number) set_overwritten();
1648  /* although this is unlikely */
1649  }
1650 }
1651 
1652 void
1655 {
1656  LOGCALL_VOID(DB, "GlassTable::do_open_to_write", root_info|rev);
1657  if (handle == -2) {
1659  }
1660  if (handle <= -2) {
1661  // Single file database.
1662  handle = -3 - handle;
1663  } else {
1664  handle = io_open_block_wr(name + GLASS_TABLE_EXTENSION, (rev == 0));
1665  if (handle < 0) {
1666  // lazy doesn't make a lot of sense when we're creating a DB (which
1667  // is the case when rev==0), but ENOENT with O_CREAT means a parent
1668  // directory doesn't exist.
1669  if (lazy && rev && errno == ENOENT) {
1670  revision_number = rev;
1671  return;
1672  }
1673  string message((rev == 0) ? "Couldn't create " : "Couldn't open ");
1674  message += name;
1675  message += GLASS_TABLE_EXTENSION" read/write";
1676  throw Xapian::DatabaseOpeningError(message, errno);
1677  }
1678  }
1679 
1680  writable = true;
1681  basic_open(root_info, rev);
1682 
1683  split_p = new uint8_t[block_size];
1684 
1685  buffer = zeroed_new(block_size);
1686 
1687  changed_n = 0;
1688  changed_c = DIR_START;
1689  seq_count = SEQ_START_POINT;
1690 }
1691 
1692 GlassTable::GlassTable(const char * tablename_, const string & path_,
1693  bool readonly_, bool lazy_)
1694  : tablename(tablename_),
1695  revision_number(0),
1696  item_count(0),
1697  block_size(0),
1698  faked_root_block(true),
1699  sequential(true),
1700  handle(-1),
1701  level(0),
1702  root(0),
1703  kt(0),
1704  buffer(0),
1705  free_list(),
1706  name(path_),
1707  seq_count(0),
1708  changed_n(0),
1709  changed_c(0),
1710  max_item_size(0),
1711  Btree_modified(false),
1712  full_compaction(false),
1713  writable(!readonly_),
1714  cursor_created_since_last_modification(false),
1715  cursor_version(0),
1716  changes_obj(NULL),
1717  split_p(0),
1718  compress_min(0),
1719  comp_stream(Z_DEFAULT_STRATEGY),
1720  lazy(lazy_),
1721  last_readahead(BLK_UNUSED),
1722  offset(0)
1723 {
1724  LOGCALL_CTOR(DB, "GlassTable", tablename_ | path_ | readonly_ | lazy_);
1725 }
1726 
1727 GlassTable::GlassTable(const char * tablename_, int fd, off_t offset_,
1728  bool readonly_, bool lazy_)
1729  : tablename(tablename_),
1730  revision_number(0),
1731  item_count(0),
1732  block_size(0),
1734  sequential(true),
1735  handle(-3 - fd),
1736  level(0),
1737  root(0),
1738  kt(0),
1739  buffer(0),
1740  free_list(),
1741  name(),
1742  seq_count(0),
1743  changed_n(0),
1744  changed_c(0),
1745  max_item_size(0),
1748  writable(!readonly_),
1750  cursor_version(0),
1751  changes_obj(NULL),
1752  split_p(0),
1753  compress_min(0),
1754  comp_stream(Z_DEFAULT_STRATEGY),
1755  lazy(lazy_),
1757  offset(offset_)
1758 {
1759  LOGCALL_CTOR(DB, "GlassTable", tablename_ | fd | offset_ | readonly_ | lazy_);
1760 }
1761 
1762 bool
1764  LOGCALL(DB, bool, "GlassTable::exists", NO_ARGS);
1765  // We know a single-file database exists, since we have an fd open on it!
1767 }
1768 
1769 void
1770 GlassTable::create_and_open(int flags_, const RootInfo & root_info)
1771 {
1772  LOGCALL_VOID(DB, "GlassTable::create_and_open", flags_|root_info);
1773  if (handle == -2) {
1775  }
1776  Assert(writable);
1777  close();
1778 
1779  unsigned int block_size_ = root_info.get_blocksize();
1780  Assert(block_size_ >= 2048);
1781  Assert(block_size_ <= BYTE_PAIR_RANGE);
1782  // Must be a power of two.
1783  Assert((block_size_ & (block_size_ - 1)) == 0);
1784 
1785  flags = flags_;
1786  block_size = block_size_;
1787 
1788  if (lazy) {
1789  close();
1791  compress_min = root_info.get_compress_min();
1792  } else {
1793  // FIXME: it would be good to arrange that this works such that there's
1794  // always a valid table in place if you run create_and_open() on an
1795  // existing table.
1796 
1797  do_open_to_write(&root_info);
1798  }
1799 }
1800 
1802  LOGCALL_DTOR(DB, "GlassTable");
1804 }
1805 
1806 void GlassTable::close(bool permanent) {
1807  LOGCALL_VOID(DB, "GlassTable::close", permanent);
1808 
1809  if (handle >= 0) {
1810  if (single_file()) {
1811  handle = -3 - handle;
1812  } else {
1813  // If an error occurs here, we just ignore it, since we're just
1814  // trying to free everything.
1815  (void)::close(handle);
1816  handle = -1;
1817  }
1818  }
1819 
1820  if (permanent) {
1821  handle = -2;
1822  // Don't delete the resources in the table, since they may
1823  // still be used to look up cached content.
1824  return;
1825  }
1826  for (int j = level; j >= 0; --j) {
1827  C[j].destroy();
1828  }
1829  delete [] split_p;
1830  split_p = 0;
1831 
1832  delete [] kt.get_address();
1833  kt = LeafItem_wr(0);
1834  delete [] buffer;
1835  buffer = 0;
1836 }
1837 
1838 void
1840 {
1841  LOGCALL_VOID(DB, "GlassTable::flush_db", NO_ARGS);
1842  Assert(writable);
1843  if (handle < 0) {
1844  if (handle == -2) {
1846  }
1847  return;
1848  }
1849 
1850  for (int j = level; j >= 0; --j) {
1851  if (C[j].rewrite) {
1852  write_block(C[j].get_n(), C[j].get_p());
1853  }
1854  }
1855 
1856  faked_root_block = false;
1857 }
1858 
1859 void
1861 {
1862  LOGCALL_VOID(DB, "GlassTable::commit", revision|root_info);
1863  Assert(writable);
1864 
1865  if (revision <= revision_number) {
1866  throw Xapian::DatabaseError("New revision too low");
1867  }
1868 
1869  if (handle < 0) {
1870  if (handle == -2) {
1872  }
1874  root_info->set_blocksize(block_size);
1875  root_info->set_level(0);
1876  root_info->set_num_entries(0);
1877  root_info->set_root_is_fake(true);
1878  root_info->set_sequential(true);
1879  root_info->set_root(0);
1880  return;
1881  }
1882 
1883  try {
1884  root = C[level].get_n();
1885 
1886  root_info->set_blocksize(block_size);
1887  root_info->set_level(level);
1888  root_info->set_num_entries(item_count);
1889  root_info->set_root_is_fake(faked_root_block);
1890  root_info->set_sequential(sequential);
1891  root_info->set_root(root);
1892 
1893  Btree_modified = false;
1894 
1895  for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
1896  C[i].init(block_size);
1897  }
1898 
1899  free_list.set_revision(revision);
1900  free_list.commit(this, block_size);
1901 
1902  // Save the freelist details into the root_info.
1903  string serialised;
1904  free_list.pack(serialised);
1905  root_info->set_free_list(serialised);
1906 
1908 
1909  read_root();
1910 
1911  changed_n = 0;
1912  changed_c = DIR_START;
1914  } catch (...) {
1916  throw;
1917  }
1918 }
1919 
1920 void
1922 {
1923  LOGCALL_VOID(DB, "GlassTable::cancel", root_info|rev);
1924  Assert(writable);
1925 
1926  if (handle < 0) {
1927  if (handle == -2) {
1929  }
1930  return;
1931  }
1932 
1933  // This causes problems: if (!Btree_modified) return;
1934 
1936  throw Xapian::InvalidOperationError("cancel() not supported under Xapian::DB_DANGEROUS");
1937 
1938  revision_number = rev;
1939  block_size = root_info.get_blocksize();
1940  root = root_info.get_root();
1941  level = root_info.get_level();
1942  item_count = root_info.get_num_entries();
1943  faked_root_block = root_info.get_root_is_fake();
1944  sequential = root_info.get_sequential();
1945  const string & fl_serialised = root_info.get_free_list();
1946  if (!fl_serialised.empty()) {
1947  if (!free_list.unpack(fl_serialised))
1948  throw Xapian::DatabaseCorruptError("Bad freelist metadata");
1949  } else {
1950  free_list.reset();
1951  }
1952 
1953  Btree_modified = false;
1954 
1955  for (int j = 0; j <= level; ++j) {
1956  C[j].init(block_size);
1957  C[j].rewrite = false;
1958  }
1959  read_root();
1960 
1961  changed_n = 0;
1962  changed_c = DIR_START;
1964 
1967  ++cursor_version;
1968  }
1969 }
1970 
1971 /************ B-tree reading ************/
1972 
1973 void
1976 {
1977  LOGCALL(DB, bool, "GlassTable::do_open_to_read", root_info|rev);
1978  if (handle == -2) {
1980  }
1981  if (single_file()) {
1982  handle = -3 - handle;
1983  } else {
1985  if (handle < 0) {
1986  if (lazy) {
1987  // This table is optional when reading!
1988  revision_number = rev;
1989  return;
1990  }
1991  string message("Couldn't open ");
1992  message += name;
1993  message += GLASS_TABLE_EXTENSION" to read";
1994  throw Xapian::DatabaseOpeningError(message, errno);
1995  }
1996  }
1997 
1998  basic_open(root_info, rev);
1999 
2000  read_root();
2001 }
2002 
2003 void
2004 GlassTable::open(int flags_, const RootInfo & root_info,
2006 {
2007  LOGCALL_VOID(DB, "GlassTable::open", flags_|root_info|rev);
2008  close();
2009 
2010  flags = flags_;
2011  block_size = root_info.get_blocksize();
2012  root = root_info.get_root();
2013 
2014  if (!writable) {
2015  do_open_to_read(&root_info, rev);
2016  return;
2017  }
2018 
2019  do_open_to_write(&root_info, rev);
2020 }
2021 
2022 bool
2024 {
2025  LOGCALL(DB, bool, "GlassTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2026  int c = C_[0].c;
2027  AssertRel(DIR_START,<=,c);
2028  AssertRel(c,<,DIR_END(C_[0].get_p()));
2029  if (c == DIR_START) {
2030  uint4 n = C_[0].get_n();
2031  const uint8_t * p;
2032  while (true) {
2033  if (n == 0) RETURN(false);
2034  n--;
2035  if (n == C[0].get_n()) {
2036  // Block is a leaf block in the built-in cursor (potentially in
2037  // modified form if the table is writable).
2038  p = C_[0].clone(C[0]);
2039  } else {
2040  if (writable) {
2041  // Blocks in the built-in cursor may not have been written
2042  // to disk yet, so we have to check that the block number
2043  // isn't in the built-in cursor or we'll read an
2044  // uninitialised block (for which GET_LEVEL(p) will
2045  // probably return 0).
2046  int j;
2047  for (j = 1; j <= level; ++j) {
2048  if (n == C[j].get_n()) break;
2049  }
2050  if (j <= level) continue;
2051  }
2052 
2053  // Block isn't in the built-in cursor, so the form on disk
2054  // is valid, so read it to check if it's the next level 0
2055  // block.
2056  uint8_t * q = C_[0].init(block_size);
2057  read_block(n, q);
2058  p = q;
2059  C_[0].set_n(n);
2060  }
2061  if (REVISION(p) > revision_number + writable) {
2062  set_overwritten();
2063  RETURN(false);
2064  }
2065  if (GET_LEVEL(p) == 0) break;
2066  }
2067  c = DIR_END(p);
2068  AssertRel(DIR_START,<,c);
2069  }
2070  c -= D2;
2071  C_[0].c = c;
2072  RETURN(true);
2073 }
2074 
2075 bool
2077 {
2078  LOGCALL(DB, bool, "GlassTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2079  const uint8_t * p = C_[0].get_p();
2080  Assert(p);
2081  int c = C_[0].c;
2082  AssertRel(c,<,DIR_END(p));
2083  c += D2;
2084  Assert(unsigned(c) < block_size);
2085  if (c == DIR_END(p)) {
2086  uint4 n = C_[0].get_n();
2087  while (true) {
2088  n++;
2089  if (n >= free_list.get_first_unused_block()) RETURN(false);
2090  if (writable) {
2091  if (n == C[0].get_n()) {
2092  // Block is a leaf block in the built-in cursor
2093  // (potentially in modified form).
2094  p = C_[0].clone(C[0]);
2095  } else {
2096  // Blocks in the built-in cursor may not have been written
2097  // to disk yet, so we have to check that the block number
2098  // isn't in the built-in cursor or we'll read an
2099  // uninitialised block (for which GET_LEVEL(p) will
2100  // probably return 0).
2101  int j;
2102  for (j = 1; j <= level; ++j) {
2103  if (n == C[j].get_n()) break;
2104  }
2105  if (j <= level) continue;
2106 
2107  // Block isn't in the built-in cursor, so the form on disk
2108  // is valid, so read it to check if it's the next level 0
2109  // block.
2110  uint8_t * q = C_[0].init(block_size);
2111  read_block(n, q);
2112  p = q;
2113  }
2114  } else {
2115  uint8_t * q = C_[0].init(block_size);
2116  read_block(n, q);
2117  p = q;
2118  }
2119  if (REVISION(p) > revision_number + writable) {
2120  set_overwritten();
2121  RETURN(false);
2122  }
2123  if (GET_LEVEL(p) == 0) break;
2124  }
2125  c = DIR_START;
2126  C_[0].set_n(n);
2127  }
2128  C_[0].c = c;
2129  RETURN(true);
2130 }
2131 
2132 bool
2134 {
2135  LOGCALL(DB, bool, "GlassTable::prev_default", Literal("C_") | j);
2136  const uint8_t * p = C_[j].get_p();
2137  int c = C_[j].c;
2138  AssertRel(DIR_START,<=,c);
2139  AssertRel(c,<,DIR_END(p));
2140  AssertRel(unsigned(DIR_END(p)),<=,block_size);
2141  if (c == DIR_START) {
2142  if (j == level) RETURN(false);
2143  if (!prev_default(C_, j + 1)) RETURN(false);
2144  p = C_[j].get_p();
2145  c = DIR_END(p);
2146  AssertRel(DIR_START,<,c);
2147  }
2148  c -= D2;
2149  C_[j].c = c;
2150  if (j > 0) {
2151  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
2152  }
2153  RETURN(true);
2154 }
2155 
2156 bool
2158 {
2159  LOGCALL(DB, bool, "GlassTable::next_default", Literal("C_") | j);
2160  const uint8_t * p = C_[j].get_p();
2161  int c = C_[j].c;
2162  AssertRel(c,<,DIR_END(p));
2163  AssertRel(unsigned(DIR_END(p)),<=,block_size);
2164  c += D2;
2165  if (j > 0) {
2166  AssertRel(DIR_START,<,c);
2167  } else {
2168  AssertRel(DIR_START,<=,c);
2169  }
2170  // Sometimes c can be DIR_END(p) + 2 here it appears...
2171  if (c >= DIR_END(p)) {
2172  if (j == level) RETURN(false);
2173  if (!next_default(C_, j + 1)) RETURN(false);
2174  p = C_[j].get_p();
2175  c = DIR_START;
2176  }
2177  C_[j].c = c;
2178  if (j > 0) {
2179  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
2180 #ifdef BTREE_DEBUG_FULL
2181  printf("Block in GlassTable:next_default");
2182  report_block_full(j - 1, C_[j - 1].get_n(), C_[j - 1].get_p());
2183 #endif /* BTREE_DEBUG_FULL */
2184  }
2185  RETURN(true);
2186 }
2187 
2188 void
2190 {
2191  throw Xapian::DatabaseClosedError("Database has been closed");
2192 }
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:480
#define RETURN(A)
Definition: debuglog.h:482
uint4 get_compress_min() const
Definition: glass_version.h:68
#define Assert(COND)
Definition: omassert.h:122
bool key_exists(const std::string &key) const
Check if a key exists in the Btree.
int io_open_block_rd(const char *fname)
Open a block-based file for reading.
Definition: io_utils.h:38
GlassVersion class.
int DIR_END(const uint8_t *b)
Definition: chert_table.h:154
static void setD(uint8_t *q, int c, int x)
Definition: glass_table.h:391
int component_of() const
Definition: glass_table.h:184
off_t offset
offset to start of table in file.
Definition: glass_table.h:910
bool get_sequential() const
Definition: glass_version.h:63
Indicates an attempt to access a closed database.
Definition: error.h:1097
Glass::LeafItem_wr kt
buffer of size block_size for making up key-tag items
Definition: glass_table.h:804
#define AssertEq(A, B)
Definition: omassert.h:124
void create_and_open(int flags_, const RootInfo &root_info)
Create a new empty btree structure on disk and open it at the initial revision.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
const size_t BLOCK_CAPACITY
Even for items of at maximum size, it must be possible to get this number of items in a block...
Definition: glass_table.h:50
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
bool find(Glass::Cursor *) const
find(C_) searches for the key of B->kt in the B-tree.
Definition: glass_table.cc:530
#define true
Definition: header.h:8
void commit(const GlassTable *B, uint4 block_size)
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define GLASS_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: glass_table.h:57
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
int TOTAL_FREE(const uint8_t *b)
Definition: glass_table.h:113
static int find_in_branch(const uint8_t *p, Glass::LeafItem item, int c)
Definition: glass_table.cc:509
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:180
int changed_c
directory offset corresponding to last block to be changed by an addition
Definition: glass_table.h:828
bool readahead_key(const string &key) const
int find_in_branch_(const uint8_t *p, ITEM item, int c)
Definition: glass_table.cc:469
uint4 REVISION(const uint8_t *b)
Definition: glass_table.h:110
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:61
Provides wrappers with POSIXy semantics.
uint8_t * split_p
Buffer used when splitting a block.
Definition: glass_table.h:896
unsigned long cursor_version
Version count for tracking when cursors need to rebuild.
Definition: glass_table.h:846
void set_block_given_by(uint4 n)
Set this item&#39;s tag to point to block n (this block should not be at level 0).
Definition: glass_table.h:380
void set_blocksize(unsigned b)
Definition: glass_version.h:76
Constants in the Xapian namespace.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:479
int io_open_block_wr(const char *fname, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:67
bool writable
Set to true when the database is opened to write.
Definition: glass_table.h:840
#define STRINGIZE(X)
The STRINGIZE macro converts its parameter into a string constant.
Definition: stringutils.h:36
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
void set_truncated_key_and_block(Key newkey, int new_comp, int truncate_size, uint4 n)
Definition: glass_table.h:363
#define BLK_UNUSED
Definition: chert_cursor.h:31
STL namespace.
Definitions, types, etc for use inside glass.
Convert types to std::string.
int handle
File descriptor of the table.
Definition: glass_table.h:795
void set_key_and_block(Key newkey, uint4 n)
Definition: glass_table.h:355
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
Utility functions for testing files.
void compact(uint8_t *p)
compact(p) compact the block at p by shuffling all the items up to the end.
Definition: glass_table.cc:563
bool lazy
If true, don&#39;t create the table until it&#39;s needed.
Definition: glass_table.h:904
#define false
Definition: header.h:9
void SET_MAX_FREE(uint8_t *b, int x)
Definition: glass_table.h:119
void write_block(uint4 n, const uint8_t *p, bool appending=false) const
write_block(n, p, appending) writes block n in the DB file from address p.
Definition: glass_table.cc:191
void set_level(int level_)
Definition: glass_version.h:71
bool prev_default(Glass::Cursor *C_, int j) const
#define rare(COND)
Definition: config.h:543
Glass::Cursor C[Glass::BTREE_CURSOR_LEVELS]
Definition: glass_table.h:889
int flags
Flags like DB_NO_SYNC and DB_DANGEROUS.
Definition: glass_table.h:775
~GlassTable()
Close the Btree.
bool full_compaction
set to true when full compaction is to be achieved
Definition: glass_table.h:837
int size() const
SIZE in diagram above.
Definition: glass_table.h:178
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
const int DIR_START
Definition: chert_table.h:155
uint4 changed_n
the last block to be changed by an addition
Definition: glass_table.h:824
void set_n(uint4 n)
Definition: glass_cursor.h:106
int MAX_FREE(const uint8_t *b)
Definition: glass_table.h:112
int compare(ITEM1 a, ITEM2 b)
Compare two items by their keys.
Definition: glass_table.h:935
Hierarchy of classes which Xapian can throw as exceptions.
void add_item_to_leaf(uint8_t *p, Glass::LeafItem kt, int c)
add_item_to_leaf(p, kt_, c) adds item kt_ to the leaf block at p.
Definition: glass_table.cc:769
void set_num_entries(glass_tablesize_t n)
Definition: glass_version.h:72
void form_key(const std::string &key) const
const int K1
Definition: glass_table.h:72
Definition: pretty.h:45
void flush_db()
Flush any outstanding changes to the DB file of the table.
void add_item_to_branch(uint8_t *p, Glass::BItem kt, int c)
add_item_to_branch(p, kt_, c) adds item kt_ to the branch block at p.
Definition: glass_table.cc:809
void read_root()
void enter_key_above_branch(int j, Glass::BItem newitem)
enter_key_above_branch(j, newkey) is called after a branch block split.
Definition: glass_table.cc:701
const char * tablename
The name of the table (used when writing changesets).
Definition: glass_table.h:763
void do_open_to_read(const RootInfo *root_info, glass_revision_number_t rev)
Perform the opening operation to read.
uint32_t uint4
Definition: internaltypes.h:32
glass_tablesize_t item_count
keeps a count of the number of items in the B-tree.
Definition: glass_table.h:769
DatabaseModifiedError indicates a database was modified.
Definition: error.h:539
functions for reading and writing different width words
int GET_LEVEL(const uint8_t *b)
Definition: glass_table.h:111
void close(bool permanent=false)
Close the Btree.
bool prev_for_sequential(Glass::Cursor *C_, int dummy) const
int add_kt(bool found)
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C...
void split_root(uint4 split_n)
Btree needs to gain a new level to insert more items: so split root block and construct a new one...
Definition: glass_table.cc:600
static uint8_t * zeroed_new(size_t size)
Definition: glass_table.cc:85
Glass freelist.
static int find_in_leaf(const uint8_t *p, Glass::LeafItem item, int c, bool &exact)
find_in_leaf(p, key, c, exact) searches for the key in the leaf block at p.
Definition: glass_table.cc:418
void commit(glass_revision_number_t revision, RootInfo *root_info)
Commit any outstanding changes to the table.
bool decompress_chunk(CompressionStream &comp_stream, string &tag) const
Definition: glass_table.h:198
Glass changesets.
bool next_for_sequential(Glass::Cursor *C_, int dummy) const
GlassChanges * changes_obj
The GlassChanges object to write block changes to.
Definition: glass_table.h:852
uint4 last_readahead
Last block readahead_key() preread.
Definition: glass_table.h:907
uint4 get_first_unused_block() const
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:228
int GET_LEVEL(const uint8_t *b)
Definition: chert_table.h:151
void set_revision(uint4 revision_)
int length() const
Definition: glass_table.h:149
bool single_file() const
Definition: glass_table.h:854
GlassTable(const GlassTable &)
Copying not allowed.
const int I2
Definition: glass_table.h:73
const uint8_t * clone(const Cursor &o)
Definition: glass_cursor.h:68
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void alter()
Btree::alter(); is called when the B-tree is to be altered.
Definition: glass_table.cc:374
void add_branch_item(Glass::BItem kt, int j)
GlassTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
Definition: glass_table.cc:940
void SET_LEVEL(uint8_t *b, int x)
Definition: glass_table.h:118
bool get_compressed() const
Definition: glass_table.h:181
const int D2
Definition: glass_table.h:74
bool unpack(const char **pstart, const char *end)
glass_tablesize_t get_num_entries() const
Definition: glass_version.h:61
bool last_component() const
Definition: glass_table.h:183
CompressionStream comp_stream
Definition: glass_table.h:901
bool get_root_is_fake() const
Definition: glass_version.h:62
int DIR_END(const uint8_t *b)
Definition: glass_table.h:114
static void throw_database_closed()
Throw an exception indicating that the database is closed.
Btree implementation.
const int DB_DANGEROUS
Update the database in-place.
Definition: constants.h:103
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:478
int seq_count
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POIN...
Definition: glass_table.h:821
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
const int DIR_START
Definition: glass_table.h:115
int size() const
SIZE in diagram above.
Definition: glass_table.h:319
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
std::string name
The path name of the B tree.
Definition: glass_table.h:816
static void setD(uint8_t *q, int c, int x)
Definition: glass_table.h:278
bool Btree_modified
Set to true the first time the B-tree is modified.
Definition: glass_table.h:834
int mid_point(uint8_t *p) const
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in ...
Definition: glass_table.cc:732
unsigned int block_size
block size of the B tree in bytes
Definition: glass_table.h:772
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
void init(unsigned blocksize_, uint4 compress_min_)
Key key() const
Definition: glass_table.h:322
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
void read_block(uint4 n, uint8_t *p) const
read_block(n, p) reads block n of the DB file to address p.
Definition: glass_table.cc:160
const int LEVEL_FREELIST
Freelist blocks have their level set to LEVEL_FREELIST.
Definition: glass_table.h:136
bool read_tag(Glass::Cursor *C_, std::string *tag, bool keep_compressed) const
Read the tag value for the key pointed to by cursor C_.
void set_overwritten() const
Definition: glass_table.cc:289
uint8_t * buffer
buffer of size block_size for reforming blocks
Definition: glass_table.h:807
void append_chunk(std::string *tag) const
Definition: glass_table.h:189
unsigned get_blocksize() const
Definition: glass_version.h:64
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
char name[9]
Definition: dbcheck.cc:55
Interface to Btree cursors.
glass_block_t get_root() const
Definition: glass_version.h:59
void enter_key_above_leaf(Glass::LeafItem previtem, Glass::LeafItem newitem)
enter_key_above_leaf(previtem, newitem) is called after a leaf block split.
Definition: glass_table.cc:645
bool faked_root_block
true if the root block is faked (not written to disk).
Definition: glass_table.h:781
int delete_kt()
int c
offset in the block&#39;s directory
Definition: glass_cursor.h:134
size_t max_item_size
maximum size of an item (key-tag pair)
Definition: glass_table.h:831
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
Pack types into strings and unpack them again.
const int X2
Definition: glass_table.h:75
void basic_open(const RootInfo *root_info, glass_revision_number_t rev)
const int D2
Definition: chert_table.h:122
Wrappers for low-level POSIX I/O routines.
void set_root_is_fake(bool f)
Definition: glass_version.h:73
const int BTREE_CURSOR_LEVELS
Definition: chert_table.h:325
void SET_DIR_END(uint8_t *b, int x)
Definition: glass_table.h:121
Various handy helpers which std::string really should provide.
void SET_REVISION(uint8_t *b, uint4 rev)
Definition: glass_table.h:117
void delete_branch_item(int j)
GlassTable::delete_branch_item(j, repeatedly) is (almost) the converse of add_branch_item.
GlassFreeList free_list
List of free blocks.
Definition: glass_table.h:810
void block_to_cursor(Glass::Cursor *C_, int j, uint4 n) const
Definition: glass_table.cc:310
void add_leaf_item(Glass::LeafItem kt)
GlassTable::add_leaf_item(kt_) adds item kt_ to the leaf block.
Definition: glass_table.cc:845
uint4 compress_min
Minimum size tag to try compressing (0 for no compression).
Definition: glass_table.h:899
const int BTREE_CURSOR_LEVELS
Definition: glass_table.h:402
Definition: header.h:151
unsigned REVISION(const uint8_t *b)
Definition: chert_table.h:150
void pack(std::string &buf)
const int BYTES_PER_BLOCK_NUMBER
Definition: glass_table.h:60
#define BYTE_PAIR_RANGE
Definition: glass_table.cc:156
void cancel(const RootInfo &root_info, glass_revision_number_t rev)
Cancel any outstanding changes.
T get_address() const
Definition: glass_table.h:317
const uint8_t * get_p() const
Get pointer to block.
Definition: glass_cursor.h:116
Various assertion macros.
glass_revision_number_t revision_number
revision number of the opened B-tree.
Definition: glass_table.h:766
bool next_default(Glass::Cursor *C_, int j) const
DatabaseError indicates some sort of database related error.
Definition: error.h:367
static uint4 block_given_by(const uint8_t *p, int c)
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value...
void set_root(glass_block_t root_)
Definition: glass_version.h:75
bool exists() const
Determine whether the btree exists on disk.
void set_sequential(bool f)
Definition: glass_version.h:74
uint4 root
the root block of the B-tree
Definition: glass_table.h:801
void do_open_to_write(const RootInfo *root_info, glass_revision_number_t rev=0)
Perform the opening operation to write.
void form_null_key(uint4 n)
Form an item with a null key and with block number n in the tag.
Definition: glass_table.h:385
int level
number of levels, counting from 0
Definition: glass_table.h:798
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
void delete_leaf_item(bool repeatedly)
GlassTable::delete_leaf_item(repeatedly) is (almost) the converse of add_leaf_item.
const std::string & get_free_list() const
Definition: glass_version.h:69
bool sequential
true iff the data has been written in a single write in sequential order.
Definition: glass_table.h:786
void SET_TOTAL_FREE(uint8_t *b, int x)
Definition: glass_table.h:120
void set_free_list(const std::string &s)
Definition: glass_version.h:80
int get_level() const
Definition: glass_version.h:60
bool cursor_created_since_last_modification
Flag for tracking when cursors need to rebuild.
Definition: glass_table.h:843
uint4 block_given_by() const
Get this item&#39;s tag as a block number (this block should not be at level 0).
Definition: glass_table.h:326
bool del(const std::string &key)
Delete an entry from the table.
uint4 get_n() const
Get the block number.
Definition: glass_cursor.h:101
uint8_t * init(unsigned block_size)
Definition: glass_cursor.h:54
bool rewrite
true if the block is not the same as on disk, and so needs rewriting
Definition: glass_cursor.h:137
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
#define SEQ_START_POINT
Flip to sequential addition block-splitting after this number of observed sequential additions (in ne...
Definition: glass_table.cc:146
void destroy()
Definition: glass_cursor.h:83
void set_full_compaction(bool parity)
#define C(X)