xapian-core  1.4.25
glass_table.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
7  * Copyright 2008 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "glass_table.h"
28 
29 #include <xapian/error.h>
30 
31 #include "omassert.h"
32 #include "posixy_wrapper.h"
33 #include "str.h"
34 #include "stringutils.h" // For STRINGIZE().
35 
36 #include <sys/types.h>
37 
38 #include <cerrno>
39 #include <cstring> /* for memmove */
40 #include <climits> /* for CHAR_BIT */
41 
42 #include "glass_freelist.h"
43 #include "glass_changes.h"
44 #include "glass_cursor.h"
45 #include "glass_defs.h"
46 #include "glass_version.h"
47 
48 #include "debuglog.h"
49 #include "filetests.h"
50 #include "io_utils.h"
51 #include "pack.h"
52 #include "wordaccess.h"
53 
54 #include <algorithm> // for std::min()
55 #include <string>
56 
57 #include "xapian/constants.h"
58 
59 using namespace Glass;
60 using namespace std;
61 
62 //#define BTREE_DEBUG_FULL 1
63 #undef BTREE_DEBUG_FULL
64 
65 #ifdef BTREE_DEBUG_FULL
66 /*------debugging aids from here--------*/
67 
68 static void print_key(const uint8_t * p, int c, int j);
69 static void print_tag(const uint8_t * p, int c, int j);
70 
71 /*
72 static void report_cursor(int N, Btree * B, Glass::Cursor * C)
73 {
74  int i;
75  printf("%d)\n", N);
76  for (i = 0; i <= B->level; i++)
77  printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
78  C[i].p, C[i].c, C[i].n, C[i].rewrite);
79 }
80 */
81 
82 /*------to here--------*/
83 #endif /* BTREE_DEBUG_FULL */
84 
85 static inline uint8_t *zeroed_new(size_t size)
86 {
87  uint8_t *temp = new uint8_t[size];
88  memset(temp, 0, size);
89  return temp;
90 }
91 
92 /* A B-tree consists of a file with extension GLASS_TABLE_EXTENSION divided
93  into a sequence of equal sized blocks, numbered 0, 1, 2 ... some of which
94  are free, some in use. Those in use are arranged in a tree. Lists of
95  currently unused blocks are stored in a freelist which is itself stored in
96  unused blocks.
97 
98  Some "root info" is needed to locate a particular revision of the B-tree
99  - this is stored in the "version file" (we store this data for all tables
100  at a particular revision together).
101 
102  Each block, b, has a structure like this:
103 
104  R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
105  <---------- D ----------> <-M->
106 
107  And then,
108 
109  R = REVISION(b) is the revision number the B-tree had when the block was
110  written into the DB file.
111  L = GET_LEVEL(b) is the level of the block, which is the number of levels
112  towards the root of the B-tree structure. So leaf blocks
113  have level 0 and the one root block has the highest level
114  equal to the number of levels in the B-tree. For blocks
115  storing the freelist, the level is set to 254.
116  M = MAX_FREE(b) is the size of the gap between the end of the directory and
117  the first item of data. (It is not necessarily the maximum
118  size among the bits of space that are free, but I can't
119  think of a better name.)
120  T = TOTAL_FREE(b)is the total amount of free space left in b.
121  D = DIR_END(b) gives the offset to the end of the directory.
122 
123  o1, o2 ... oN are a directory of offsets to the N items held in the block.
124  The items are key-tag pairs, and as they occur in the directory are ordered
125  by the keys.
126 
127  An item has this form:
128 
129  I K key X tag
130  ←K→
131  <------I---->
132 
133  A long tag presented through the API is split up into C pieces small enough
134  to be accommodated in the blocks of the B-tree. The key is extended to
135  include a counter, x, which runs from 1 to C. The key is preceded by a
136  length, K, and the whole item with a length, I, as depicted above. The
137  upper bits of I encode a flag indicating if this item is compressed, and a
138  flag saying if this is the last piece of a tag (i.e. if x == C).
139 
140  Here are the corresponding definitions:
141 
142 */
143 
146 #define SEQ_START_POINT (-10)
147 
148 /* Note use of the limits.h values:
149  UCHAR_MAX = 255, an unsigned with all bits set, and
150  CHAR_BIT = 8, the number of bits per byte
151 
152  BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
153  bytes -- 64K effectively.
154 */
155 
156 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
157 
159 void
160 GlassTable::read_block(uint4 n, uint8_t * p) const
161 {
162  // Log the value of p, not the contents of the block it points to...
163  LOGCALL_VOID(DB, "GlassTable::read_block", n | (void*)p);
164  if (rare(handle == -2))
166  AssertRel(n,<,free_list.get_first_unused_block());
167 
168  io_read_block(handle, reinterpret_cast<char *>(p), block_size, n, offset);
169 
170  if (GET_LEVEL(p) != LEVEL_FREELIST) {
171  int dir_end = DIR_END(p);
172  if (rare(dir_end < DIR_START || unsigned(dir_end) > block_size)) {
173  string msg("dir_end invalid in block ");
174  msg += str(n);
175  throw Xapian::DatabaseCorruptError(msg);
176  }
177  }
178 }
179 
190 void
191 GlassTable::write_block(uint4 n, const uint8_t * p, bool appending) const
192 {
193  LOGCALL_VOID(DB, "GlassTable::write_block", n | p | appending);
194  Assert(writable);
195  /* Check that n is in range. */
196  AssertRel(n,<,free_list.get_first_unused_block());
197 
198  /* don't write to non-free */
199  // FIXME: We can no longer check this easily.
200  // AssertParanoid(free_list.block_free_at_start(block_size, n));
201 
202  /* write revision is okay */
203  AssertEqParanoid(REVISION(p), revision_number + 1);
204 
205  if (appending) {
206  // In the case of the freelist, new entries can safely be written into
207  // the space at the end of the latest freelist block, as that's unused
208  // by previous revisions. It is also safe to write into a block which
209  // wasn't used in the previous revision.
210  //
211  // It's only when we start a new freelist block that we need to worry
212  // about invalidating old revisions.
213  } else if (flags & Xapian::DB_DANGEROUS) {
214  // FIXME: We should somehow flag this to prevent readers opening the
215  // database. Removing "iamglass" or setting a flag in it doesn't deal
216  // with any existing readers though. Perhaps we want to have readers
217  // read lock and try to take an exclusive lock here?
218  }
219 
220  const char * p_char = reinterpret_cast<const char *>(p);
221  io_write_block(handle, p_char, block_size, n, offset);
222 
223  if (!changes_obj) return;
224 
225  unsigned char v;
226  // FIXME: track table_type in this class?
227  if (strcmp(tablename, "position") == 0) {
228  v = int(Glass::POSITION);
229  } else if (strcmp(tablename, "postlist") == 0) {
230  v = int(Glass::POSTLIST);
231  } else if (strcmp(tablename, "docdata") == 0) {
232  v = int(Glass::DOCDATA);
233  } else if (strcmp(tablename, "spelling") == 0) {
234  v = int(Glass::SPELLING);
235  } else if (strcmp(tablename, "synonym") == 0) {
236  v = int(Glass::SYNONYM);
237  } else if (strcmp(tablename, "termlist") == 0) {
238  v = int(Glass::TERMLIST);
239  } else {
240  return; // FIXME
241  }
242 
243  if (block_size == 2048) {
244  v |= 0 << 3;
245  } else if (block_size == 4096) {
246  v |= 1 << 3;
247  } else if (block_size == 8192) {
248  v |= 2 << 3;
249  } else if (block_size == 16384) {
250  v |= 3 << 3;
251  } else if (block_size == 32768) {
252  v |= 4 << 3;
253  } else if (block_size == 65536) {
254  v |= 5 << 3;
255  } else {
256  return; // FIXME
257  }
258 
259  string buf;
260  buf += char(v);
261  // Write the block number to the file
262  pack_uint(buf, n);
263 
264  changes_obj->write_block(buf);
265  changes_obj->write_block(reinterpret_cast<const char *>(p), block_size);
266 }
267 
268 /* A note on cursors:
269 
270  Each B-tree level has a corresponding array element C[j] in a
271  cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
272  root block level. Within a level j,
273 
274  C[j].p addresses the block
275  C[j].c is the offset into the directory entry in the block
276  C[j].n is the number of the block at C[j].p
277 
278  A look up in the B-tree causes navigation of the blocks starting
279  from the root. In each block, p, we find an offset, c, to an item
280  which gives the number, n, of the block for the next level. This
281  leads to an array of values p,c,n which are held inside the cursor.
282 
283  Any Btree object B has a built-in cursor, at B->C. But other cursors may
284  be created. If BC is a created cursor, BC->C is the cursor in the
285  sense given above, and BC->B is the handle for the B-tree again.
286 */
287 
288 void
290 {
291  LOGCALL_VOID(DB, "GlassTable::set_overwritten", NO_ARGS);
292  // If we're writable, there shouldn't be another writer who could cause
293  // overwritten to be flagged, so that's a DatabaseCorruptError.
294  if (writable)
295  throw Xapian::DatabaseCorruptError("Block overwritten - run xapian-check on this database");
296  throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
297 }
298 
299 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
300  C, writing the block currently at C[j] back to disk if necessary.
301  Note that
302 
303  C[j].rewrite
304 
305  is true iff C[j].n is different from block n in file DB. If it is
306  false no rewriting is necessary.
307 */
308 
309 void
311 {
312  LOGCALL_VOID(DB, "GlassTable::block_to_cursor", (void*)C_ | j | n);
313  if (n == C_[j].get_n()) return;
314 
315  if (writable && C_[j].rewrite) {
316  Assert(C == C_);
317  write_block(C_[j].get_n(), C_[j].get_p());
318  C_[j].rewrite = false;
319  }
320 
321  // Check if the block is in the built-in cursor (potentially in
322  // modified form).
323  const uint8_t * p;
324  if (n == C[j].get_n()) {
325  p = C_[j].clone(C[j]);
326  } else {
327  uint8_t * q = C_[j].init(block_size);
328  read_block(n, q);
329  p = q;
330  C_[j].set_n(n);
331  }
332 
333  if (j < level) {
334  /* unsigned comparison */
335  if (rare(REVISION(p) > REVISION(C_[j + 1].get_p()))) {
336  set_overwritten();
337  return;
338  }
339  }
340 
341  if (rare(j != GET_LEVEL(p))) {
342  string msg = "Expected block ";
343  msg += str(n);
344  msg += " to be level ";
345  msg += str(j);
346  msg += ", not ";
347  msg += str(GET_LEVEL(p));
348  throw Xapian::DatabaseCorruptError(msg);
349  }
350 }
351 
373 void
375 {
376  LOGCALL_VOID(DB, "GlassTable::alter", NO_ARGS);
377  Assert(writable);
378  if (flags & Xapian::DB_DANGEROUS) {
379  C[0].rewrite = true;
380  return;
381  }
382  int j = 0;
383  while (true) {
384  if (C[j].rewrite) return; /* all new, so return */
385  C[j].rewrite = true;
386 
387  glass_revision_number_t rev = REVISION(C[j].get_p());
388  if (rev == revision_number + 1) {
389  return;
390  }
391  Assert(rev < revision_number + 1);
392  uint4 n = C[j].get_n();
393  free_list.mark_block_unused(this, block_size, n);
394  SET_REVISION(C[j].get_modifiable_p(block_size), revision_number + 1);
395  n = free_list.get_block(this, block_size);
396  C[j].set_n(n);
397 
398  if (j == level) return;
399  j++;
400  BItem_wr(C[j].get_modifiable_p(block_size), C[j].c).set_block_given_by(n);
401  }
402 }
403 
417 int
418 GlassTable::find_in_leaf(const uint8_t * p, LeafItem item, int c, bool& exact)
419 {
420  LOGCALL_STATIC(DB, int, "GlassTable::find_in_leaf", (const void*)p | (const void *)item.get_address() | c | Literal("bool&"));
421  // c should be odd (either -1, or an even offset from DIR_START).
422  Assert((c & 1) == 1);
423  int i = DIR_START;
424  i -= D2;
425  if (c != -1) {
426  AssertRel(i,<=,c);
427  }
428  int j = DIR_END(p);
429 
430  if (c != -1) {
431  if (c < j && i < c) {
432  int r = compare(LeafItem(p, c), item);
433  if (r == 0) {
434  exact = true;
435  return c;
436  }
437  if (r < 0) i = c;
438  }
439  c += D2;
440  if (c < j && i < c) {
441  int r = compare(item, LeafItem(p, c));
442  if (r == 0) {
443  exact = true;
444  return c;
445  }
446  if (r < 0) j = c;
447  }
448  }
449 
450  while (j - i > D2) {
451  int k = i + ((j - i) / (D2 * 2)) * D2; /* mid way */
452  int r = compare(item, LeafItem(p, k));
453  if (r < 0) {
454  j = k;
455  } else {
456  i = k;
457  if (r == 0) {
458  exact = true;
459  break;
460  }
461  }
462  }
463  AssertRel(DIR_START - D2,<=,i);
464  AssertRel(i,<,DIR_END(p));
465  RETURN(i);
466 }
467 
468 template<typename ITEM> int
469 find_in_branch_(const uint8_t * p, ITEM item, int c)
470 {
471  // c should be odd (either -1, or an even offset from DIR_START).
472  Assert((c & 1) == 1);
473  int i = DIR_START;
474  if (c != -1) {
475  AssertRel(i,<=,c);
476  }
477  int j = DIR_END(p);
478 
479  if (c != -1) {
480  if (c < j && i < c) {
481  int r = compare(BItem(p, c), item);
482  if (r == 0) return c;
483  if (r < 0) i = c;
484  }
485  c += D2;
486  if (c < j && i < c) {
487  int r = compare(item, BItem(p, c));
488  if (r == 0) return c;
489  if (r < 0) j = c;
490  }
491  }
492 
493  while (j - i > D2) {
494  int k = i + ((j - i) / (D2 * 2)) * D2; /* mid way */
495  int r = compare(item, BItem(p, k));
496  if (r < 0) {
497  j = k;
498  } else {
499  i = k;
500  if (r == 0) break;
501  }
502  }
503  AssertRel(DIR_START,<=,i);
504  AssertRel(i,<,DIR_END(p));
505  return i;
506 }
507 
508 int
509 GlassTable::find_in_branch(const uint8_t * p, LeafItem item, int c)
510 {
511  LOGCALL_STATIC(DB, int, "GlassTable::find_in_branch", (const void*)p | (const void *)item.get_address() | c);
512  RETURN(find_in_branch_(p, item, c));
513 }
514 
515 int
516 GlassTable::find_in_branch(const uint8_t * p, BItem item, int c)
517 {
518  LOGCALL_STATIC(DB, int, "GlassTable::find_in_branch", (const void*)p | (const void *)item.get_address() | c);
519  RETURN(find_in_branch_(p, item, c));
520 }
521 
529 bool
531 {
532  LOGCALL(DB, bool, "GlassTable::find", (void*)C_);
533  // Note: the parameter is needed when we're called by GlassCursor
534  const uint8_t * p;
535  int c;
536  for (int j = level; j > 0; --j) {
537  p = C_[j].get_p();
538  c = find_in_branch(p, kt, C_[j].c);
539 #ifdef BTREE_DEBUG_FULL
540  printf("Block in GlassTable:find - code position 1");
541  report_block_full(j, C_[j].get_n(), p);
542 #endif /* BTREE_DEBUG_FULL */
543  C_[j].c = c;
544  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
545  }
546  p = C_[0].get_p();
547  bool exact = false;
548  c = find_in_leaf(p, kt, C_[0].c, exact);
549 #ifdef BTREE_DEBUG_FULL
550  printf("Block in GlassTable:find - code position 2");
551  report_block_full(0, C_[0].get_n(), p);
552 #endif /* BTREE_DEBUG_FULL */
553  C_[0].c = c;
554  RETURN(exact);
555 }
556 
562 void
564 {
565  LOGCALL_VOID(DB, "GlassTable::compact", (void*)p);
566  Assert(p != buffer);
567  Assert(writable);
568  int e = block_size;
569  uint8_t * b = buffer;
570  int dir_end = DIR_END(p);
571  if (GET_LEVEL(p) == 0) {
572  // Leaf.
573  for (int c = DIR_START; c < dir_end; c += D2) {
574  LeafItem item(p, c);
575  int l = item.size();
576  e -= l;
577  memcpy(b + e, item.get_address(), l);
578  LeafItem_wr::setD(p, c, e); /* reform in b */
579  }
580  } else {
581  // Branch.
582  for (int c = DIR_START; c < dir_end; c += D2) {
583  BItem item(p, c);
584  int l = item.size();
585  e -= l;
586  memcpy(b + e, item.get_address(), l);
587  BItem_wr::setD(p, c, e); /* reform in b */
588  }
589  }
590  memcpy(p + e, b + e, block_size - e); /* copy back */
591  e -= dir_end;
592  SET_TOTAL_FREE(p, e);
593  SET_MAX_FREE(p, e);
594 }
595 
599 void
601 {
602  LOGCALL_VOID(DB, "GlassTable::split_root", split_n);
603  /* gain a level */
604  ++level;
605 
606  /* check level overflow - this isn't something that should ever happen
607  * but deserves more than an Assert()... */
608  if (level == GLASS_BTREE_CURSOR_LEVELS) {
609  throw Xapian::DatabaseCorruptError("Btree has grown impossibly large ("
611  " levels)");
612  }
613 
614  uint8_t * q = C[level].init(block_size);
615  memset(q, 0, block_size);
616  C[level].c = DIR_START;
617  C[level].set_n(free_list.get_block(this, block_size));
618  C[level].rewrite = true;
619  SET_REVISION(q, revision_number + 1);
620  SET_LEVEL(q, level);
622  compact(q); /* to reset TOTAL_FREE, MAX_FREE */
623 
624  /* form a null key in b with a pointer to the old root */
625  uint8_t b[10]; /* 7 is exact */
626  BItem_wr item(b);
627  item.form_null_key(split_n);
628  add_branch_item(item, level);
629 }
630 
646 void
648 {
649  LOGCALL_VOID(DB, "GlassTable::enter_key_above_leaf", Literal("previtem") | Literal("newitem"));
650  Assert(writable);
651  Assert(compare(previtem, newitem) < 0);
652 
653  Key prevkey = previtem.key();
654  Key newkey = newitem.key();
655  int new_comp = newitem.component_of();
656 
657  uint4 blocknumber = C[0].get_n();
658 
659  // FIXME update to use Key
660  // Keys are truncated here: but don't truncate the count at the end away.
661  const int newkey_len = newkey.length();
662  AssertRel(newkey_len,>,0);
663 
664  // Truncate the key to the minimal key which differs from prevkey,
665  // the preceding key in the block.
666  int i = 0;
667  const int min_len = min(newkey_len, prevkey.length());
668  while (i < min_len && prevkey[i] == newkey[i]) {
669  i++;
670  }
671 
672  // Want one byte of difference.
673  if (i < newkey_len) i++;
674 
675  // Enough space for a branch item with maximum length key.
676  uint8_t b[BYTES_PER_BLOCK_NUMBER + K1 + 255 + X2];
677  BItem_wr item(b);
678  AssertRel(i, <=, 255);
679  item.set_truncated_key_and_block(newkey, new_comp, i, blocknumber);
680 
681  // The split block gets inserted into the parent after the pointer to the
682  // current child.
683  AssertEq(C[1].c, find_in_branch(C[1].get_p(), item, C[1].c));
684  C[1].c += D2;
685  C[1].rewrite = true; /* a subtle point: this *is* required. */
686  add_branch_item(item, 1);
687 }
688 
702 void
704 {
705  LOGCALL_VOID(DB, "GlassTable::enter_key_above_branch", j | Literal("newitem"));
706  Assert(writable);
707  AssertRel(j,>,1);
708 
709  /* Can't truncate between branch levels, since the separated keys
710  * are in at the leaf level, and truncating again will change the
711  * branch point.
712  */
713 
714  uint4 blocknumber = C[j - 1].get_n();
715 
716  // Enough space for a branch item with maximum length key.
717  uint8_t b[BYTES_PER_BLOCK_NUMBER + K1 + 255 + X2];
718  BItem_wr item(b);
719  item.set_key_and_block(newitem.key(), blocknumber);
720 
721  // The split block gets inserted into the parent after the pointer to the
722  // current child.
723  AssertEq(C[j].c, find_in_branch(C[j].get_p(), item, C[j].c));
724  C[j].c += D2;
725  C[j].rewrite = true; /* a subtle point: this *is* required. */
726  add_branch_item(item, j);
727 }
728 
733 int
734 GlassTable::mid_point(uint8_t * p) const
735 {
736  LOGCALL(DB, int, "GlassTable::mid_point", (void*)p);
737  int n = 0;
738  int dir_end = DIR_END(p);
739  int size = block_size - TOTAL_FREE(p) - dir_end;
740  for (int c = DIR_START; c < dir_end; c += D2) {
741  int l;
742  if (GET_LEVEL(p) == 0) {
743  l = LeafItem(p, c).size();
744  } else {
745  l = BItem(p, c).size();
746  }
747  n += 2 * l;
748  if (n >= size) {
749  if (l < n - size) RETURN(c);
750  RETURN(c + D2);
751  }
752  }
753 
754  /* This shouldn't happen, as the sum of the item sizes should be the same
755  * as the value calculated in size, so assert but return a sane value just
756  * in case. */
757  Assert(false);
758  RETURN(dir_end);
759 }
760 
770 void
771 GlassTable::add_item_to_leaf(uint8_t * p, LeafItem kt_, int c)
772 {
773  LOGCALL_VOID(DB, "GlassTable::add_item_to_leaf", (void*)p | Literal("kt_") | c);
774  Assert(writable);
775  int dir_end = DIR_END(p);
776  int kt_len = kt_.size();
777  int needed = kt_len + D2;
778  int new_total = TOTAL_FREE(p) - needed;
779  int new_max = MAX_FREE(p) - needed;
780 
781  Assert(new_total >= 0);
782 
783  AssertRel(MAX_FREE(p),>=,needed);
784 
785  AssertRel(DIR_START,<=,c);
786  AssertRel(c,<=,dir_end);
787 
788  memmove(p + c + D2, p + c, dir_end - c);
789  dir_end += D2;
790  SET_DIR_END(p, dir_end);
791 
792  int o = dir_end + new_max;
793  LeafItem_wr::setD(p, c, o);
794  memmove(p + o, kt_.get_address(), kt_len);
795 
796  SET_MAX_FREE(p, new_max);
797 
798  SET_TOTAL_FREE(p, new_total);
799 }
800 
810 void
811 GlassTable::add_item_to_branch(uint8_t * p, BItem kt_, int c)
812 {
813  LOGCALL_VOID(DB, "GlassTable::add_item_to_branch", (void*)p | Literal("kt_") | c);
814  Assert(writable);
815  int dir_end = DIR_END(p);
816  int kt_len = kt_.size();
817  int needed = kt_len + D2;
818  int new_total = TOTAL_FREE(p) - needed;
819  int new_max = MAX_FREE(p) - needed;
820 
821  Assert(new_total >= 0);
822 
823  AssertRel(MAX_FREE(p),>=,needed);
824 
825  AssertRel(DIR_START,<=,c);
826  AssertRel(c,<=,dir_end);
827 
828  memmove(p + c + D2, p + c, dir_end - c);
829  dir_end += D2;
830  SET_DIR_END(p, dir_end);
831 
832  int o = dir_end + new_max;
833  BItem_wr::setD(p, c, o);
834  memmove(p + o, kt_.get_address(), kt_len);
835 
836  SET_MAX_FREE(p, new_max);
837 
838  SET_TOTAL_FREE(p, new_total);
839 }
840 
846 void
848 {
849  LOGCALL_VOID(DB, "GlassTable::add_leaf_item", Literal("kt_"));
850  Assert(writable);
851  uint8_t * p = C[0].get_modifiable_p(block_size);
852  int c = C[0].c;
853  uint4 n;
854 
855  int needed = kt_.size() + D2;
856  if (TOTAL_FREE(p) < needed) {
857  int m;
858  // Prepare to split p. After splitting, the block is in two halves, the
859  // lower half is split_p, the upper half p again. add_to_upper_half
860  // becomes true when the item gets added to p, false when it gets added
861  // to split_p.
862 
863  if (seq_count < 0) {
864  // If we're not in sequential mode, we split at the mid point
865  // of the node.
866  m = mid_point(p);
867  } else {
868  // During sequential addition, split at the insert point
869  AssertRel(c,>=,DIR_START);
870  m = c;
871  }
872 
873  uint4 split_n = C[0].get_n();
874  C[0].set_n(free_list.get_block(this, block_size));
875 
876  memcpy(split_p, p, block_size); // replicate the whole block in split_p
877  SET_DIR_END(split_p, m);
878  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
879 
880  {
881  int residue = DIR_END(p) - m;
882  int new_dir_end = DIR_START + residue;
883  memmove(p + DIR_START, p + m, residue);
884  SET_DIR_END(p, new_dir_end);
885  }
886 
887  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
888 
889  bool add_to_upper_half;
890  if (seq_count < 0) {
891  add_to_upper_half = (c >= m);
892  } else {
893  // And add item to lower half if split_p has room, otherwise upper
894  // half
895  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
896  }
897 
898  if (add_to_upper_half) {
899  c -= (m - DIR_START);
900  Assert(seq_count < 0 || c <= DIR_START + D2);
901  Assert(c >= DIR_START);
902  Assert(c <= DIR_END(p));
903  add_item_to_leaf(p, kt_, c);
904  n = C[0].get_n();
905  } else {
906  Assert(c >= DIR_START);
907  Assert(c <= DIR_END(split_p));
908  add_item_to_leaf(split_p, kt_, c);
909  n = split_n;
910  }
911  write_block(split_n, split_p);
912 
913  // Check if we're splitting the root block.
914  if (0 == level) split_root(split_n);
915 
916  /* Enter a separating key at level 1 between */
917  /* the last key of block split_p, and the first key of block p */
918  enter_key_above_leaf(LeafItem(split_p, DIR_END(split_p) - D2),
919  LeafItem(p, DIR_START));
920  } else {
921  AssertRel(TOTAL_FREE(p),>=,needed);
922 
923  if (MAX_FREE(p) < needed) {
924  compact(p);
925  AssertRel(MAX_FREE(p),>=,needed);
926  }
927 
928  add_item_to_leaf(p, kt_, c);
929  n = C[0].get_n();
930  }
931 
932  changed_n = n;
933  changed_c = c;
934 }
935 
941 void
943 {
944  LOGCALL_VOID(DB, "GlassTable::add_branch_item", Literal("kt_") | j);
945  Assert(writable);
946  uint8_t * p = C[j].get_modifiable_p(block_size);
947  int c = C[j].c;
948 
949  int needed = kt_.size() + D2;
950  if (TOTAL_FREE(p) < needed) {
951  int m;
952  // Prepare to split p. After splitting, the block is in two halves, the
953  // lower half is split_p, the upper half p again. add_to_upper_half
954  // becomes true when the item gets added to p, false when it gets added
955  // to split_p.
956 
957  if (seq_count < 0) {
958  // If we're not in sequential mode, we split at the mid point
959  // of the node.
960  m = mid_point(p);
961  } else {
962  // During sequential addition, split at the insert point
963  AssertRel(c,>=,DIR_START);
964  m = c;
965  }
966 
967  uint4 split_n = C[j].get_n();
968  C[j].set_n(free_list.get_block(this, block_size));
969 
970  memcpy(split_p, p, block_size); // replicate the whole block in split_p
971  SET_DIR_END(split_p, m);
972  compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
973 
974  {
975  int residue = DIR_END(p) - m;
976  int new_dir_end = DIR_START + residue;
977  memmove(p + DIR_START, p + m, residue);
978  SET_DIR_END(p, new_dir_end);
979  }
980 
981  compact(p); /* to reset TOTAL_FREE, MAX_FREE */
982 
983  bool add_to_upper_half;
984  if (seq_count < 0) {
985  add_to_upper_half = (c >= m);
986  } else {
987  // And add item to lower half if split_p has room, otherwise upper
988  // half
989  add_to_upper_half = (TOTAL_FREE(split_p) < needed);
990  }
991 
992  if (add_to_upper_half) {
993  c -= (m - DIR_START);
994  Assert(seq_count < 0 || c <= DIR_START + D2);
995  Assert(c >= DIR_START);
996  Assert(c <= DIR_END(p));
997  add_item_to_branch(p, kt_, c);
998  } else {
999  Assert(c >= DIR_START);
1000  Assert(c <= DIR_END(split_p));
1001  add_item_to_branch(split_p, kt_, c);
1002  }
1003  write_block(split_n, split_p);
1004 
1005  // Check if we're splitting the root block.
1006  if (j == level) split_root(split_n);
1007 
1008  /* Enter a separating key at level j + 1 between */
1009  /* the last key of block split_p, and the first key of block p */
1010  enter_key_above_branch(j + 1, BItem(p, DIR_START));
1011 
1012  // In branch levels, we can make the first key of block p null and
1013  // save a bit of disk space. Other redundant keys will still creep
1014  // in though.
1015  BItem_wr item(p, DIR_START);
1016  int new_total_free = TOTAL_FREE(p) + item.key().length();
1017  item.form_null_key(item.block_given_by());
1018  SET_TOTAL_FREE(p, new_total_free);
1019  } else {
1020  AssertRel(TOTAL_FREE(p),>=,needed);
1021 
1022  if (MAX_FREE(p) < needed) {
1023  compact(p);
1024  AssertRel(MAX_FREE(p),>=,needed);
1025  }
1026 
1027  add_item_to_branch(p, kt_, c);
1028  }
1029 }
1030 
1038 void
1040 {
1041  LOGCALL_VOID(DB, "GlassTable::delete_leaf_item", repeatedly);
1042  Assert(writable);
1043  uint8_t * p = C[0].get_modifiable_p(block_size);
1044  int c = C[0].c;
1045  AssertRel(DIR_START,<=,c);
1046  AssertRel(c,<,DIR_END(p));
1047  int kt_len = LeafItem(p, c).size(); /* size of the item to be deleted */
1048  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
1049 
1050  memmove(p + c, p + c + D2, dir_end - c);
1051  SET_DIR_END(p, dir_end);
1052  SET_MAX_FREE(p, MAX_FREE(p) + D2);
1053  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
1054 
1055  if (!repeatedly) return;
1056  if (0 < level) {
1057  if (dir_end == DIR_START) {
1058  free_list.mark_block_unused(this, block_size, C[0].get_n());
1059  C[0].rewrite = false;
1060  C[0].set_n(BLK_UNUSED);
1061  C[1].rewrite = true; /* *is* necessary */
1062  delete_branch_item(1);
1063  }
1064  }
1065 }
1066 
1073 void
1075 {
1076  LOGCALL_VOID(DB, "GlassTable::delete_branch_item", j);
1077  Assert(writable);
1078  uint8_t * p = C[j].get_modifiable_p(block_size);
1079  int c = C[j].c;
1080  AssertRel(DIR_START,<=,c);
1081  AssertRel(c,<,DIR_END(p));
1082  int kt_len = BItem(p, c).size(); /* size of the item to be deleted */
1083  int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
1084 
1085  memmove(p + c, p + c + D2, dir_end - c);
1086  SET_DIR_END(p, dir_end);
1087  SET_MAX_FREE(p, MAX_FREE(p) + D2);
1088  SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
1089 
1090  if (j < level) {
1091  if (dir_end == DIR_START) {
1092  free_list.mark_block_unused(this, block_size, C[j].get_n());
1093  C[j].rewrite = false;
1094  C[j].set_n(BLK_UNUSED);
1095  C[j + 1].rewrite = true; /* *is* necessary */
1096  delete_branch_item(j + 1);
1097  }
1098  } else {
1099  Assert(j == level);
1100  while (dir_end == DIR_START + D2 && level > 0) {
1101  /* single item in the root block, so lose a level */
1102  uint4 new_root = BItem(C[level].get_p(), DIR_START).block_given_by();
1103  free_list.mark_block_unused(this, block_size, C[level].get_n());
1104  C[level].destroy();
1105  level--;
1106 
1107  block_to_cursor(C, level, new_root);
1108 
1109  dir_end = DIR_END(C[level].get_p()); /* prepare for the loop */
1110  }
1111  }
1112 }
1113 
1114 /* debugging aid:
1115 static addcount = 0;
1116 */
1117 
1148 int
1150 {
1151  LOGCALL(DB, int, "GlassTable::add_kt", found);
1152  Assert(writable);
1153 
1154  /*
1155  {
1156  printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
1157  print_bytes(kt[I2], kt + I2 + K1); putchar('\n');
1158  }
1159  */
1160  alter();
1161 
1162  int result = 0;
1163  if (found) { /* replacement */
1164  seq_count = SEQ_START_POINT;
1165  sequential = false;
1166 
1167  uint8_t * p = C[0].get_modifiable_p(block_size);
1168  int c = C[0].c;
1169  AssertRel(DIR_START,<=,c);
1170  AssertRel(c,<,DIR_END(p));
1171  LeafItem item(p, c);
1172  int kt_size = kt.size();
1173  int needed = kt_size - item.size();
1174 
1175  result = item.last_component() ? 2 : 1;
1176 
1177  if (needed <= 0) {
1178  /* simple replacement */
1179  memmove(const_cast<uint8_t *>(item.get_address()),
1180  kt.get_address(), kt_size);
1181  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
1182  } else {
1183  /* new item into the block's freespace */
1184  int new_max = MAX_FREE(p) - kt_size;
1185  if (new_max >= 0) {
1186  int o = DIR_END(p) + new_max;
1187  memmove(p + o, kt.get_address(), kt_size);
1188  LeafItem_wr::setD(p, c, o);
1189  SET_MAX_FREE(p, new_max);
1190  SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
1191  } else {
1192  /* do it the long way */
1193  delete_leaf_item(false);
1194  add_leaf_item(kt);
1195  }
1196  }
1197  } else {
1198  /* addition */
1199  if (changed_n == C[0].get_n() && changed_c == C[0].c) {
1200  if (seq_count < 0) seq_count++;
1201  } else {
1202  seq_count = SEQ_START_POINT;
1203  sequential = false;
1204  }
1205  C[0].c += D2;
1206  add_leaf_item(kt);
1207  }
1208  RETURN(result);
1209 }
1210 
1211 /* delete_kt() corresponds to add_kt(found), but there are only
1212  two cases: if the key is not found nothing is done, and if it is
1213  found the corresponding item is deleted with delete_leaf_item.
1214 
1215  Returns:
1216  0 : nothing to delete
1217  1 : deleted kt
1218  2 : deleted kt and it was the final one
1219 */
1220 
1221 int
1223 {
1224  LOGCALL(DB, int, "GlassTable::delete_kt", NO_ARGS);
1225  Assert(writable);
1226 
1227  seq_count = SEQ_START_POINT;
1228  sequential = false;
1229 
1230  if (!find(C))
1231  return 0;
1232 
1233  int result = LeafItem(C[0].get_p(), C[0].c).last_component() ? 2 : 1;
1234  alter();
1235  delete_leaf_item(true);
1236 
1237  RETURN(result);
1238 }
1239 
1240 /* GlassTable::form_key(key) treats address kt as an item holder and fills in
1241 the key part:
1242 
1243  (I) K key c (C tag)
1244 
1245 The bracketed parts are left blank. The key is filled in with key_len bytes and
1246 K set accordingly. c is set to 1.
1247 */
1248 
1249 void GlassTable::form_key(const string & key) const
1250 {
1251  LOGCALL_VOID(DB, "GlassTable::form_key", key);
1252  kt.form_key(key);
1253 }
1254 
1255 /* GlassTable::add(key, tag) adds the key/tag item to the
1256  B-tree, replacing any existing item with the same key.
1257 
1258  For a long tag, we end up having to add m components, of the form
1259 
1260  key 1 m tag1
1261  key 2 m tag2
1262  ...
1263  key m m tagm
1264 
1265  and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
1266  n components of the form
1267 
1268  key 1 n TAG1
1269  key 2 n TAG2
1270  ...
1271  key n n TAGn
1272 
1273  and n may be greater than, equal to, or less than m. These cases are dealt
1274  with in the code below. If m < n for example, we end up with a series of
1275  deletions.
1276 */
1277 
1278 void
1279 GlassTable::add(const string& key, const string& tag, bool already_compressed)
1280 {
1281  LOGCALL_VOID(DB, "GlassTable::add", key | tag | already_compressed);
1282  Assert(writable);
1283 
1284  if (handle < 0) {
1285  if (handle == -2) {
1287  }
1288  RootInfo root_info;
1289  root_info.init(block_size, compress_min);
1290  do_open_to_write(&root_info);
1291  }
1292 
1293  form_key(key);
1294 
1295  const char* tag_data = tag.data();
1296  size_t tag_size = tag.size();
1297 
1298  bool compressed = false;
1299  if (already_compressed) {
1300  compressed = true;
1301  } else if (compress_min > 0 && tag_size > compress_min) {
1302  const char * res = comp_stream.compress(tag_data, &tag_size);
1303  if (res) {
1304  compressed = true;
1305  tag_data = res;
1306  }
1307  }
1308 
1309  // sort of matching kt.append_chunk(), but setting the chunk
1310  const size_t cd = kt.key().length() + K1 + I2 + X2; // offset to the tag data
1311  const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1312  size_t first_L = L + X2; // - amount for tag1 (we don't store X there)
1313  bool found = find(C);
1314  if (tag_size <= first_L) {
1315  // The whole tag clearly fits in one item, so no need to make this
1316  // complicated.
1317  first_L = tag_size;
1318  } else if (!found) {
1319  const uint8_t * p = C[0].get_p();
1320  size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1321  if (n > D2 + cd) {
1322  n -= (D2 + cd);
1323  // if n >= last then fully filling this block won't produce
1324  // an extra item, so we might as well do this even if
1325  // full_compaction isn't active.
1326  //
1327  // In the full_compaction case, it turns out we shouldn't always
1328  // try to fill every last byte. Doing so can actually increase the
1329  // total space required (I believe this effect is due to longer
1330  // dividing keys being required in the index blocks). Empirically,
1331  // n >= key.size() + K appears a good criterion for K ~= 34. This
1332  // seems to save about 0.2% in total database size over always
1333  // splitting the tag. It'll also give be slightly faster retrieval
1334  // as we can avoid reading an extra block occasionally.
1335  size_t last = (tag_size - X2) % L;
1336  if (n >= last || (full_compaction && n >= key.size() + 34)) {
1337  // first_L < max_item_size + D2 - D2 - cd
1338  // Total size of first item = cd + first_L < max_item_size
1339  first_L = n + X2;
1340  }
1341  }
1342  }
1343 
1344  // There are m items to add.
1345  int m = (tag_size - first_L + L - 1) / L + 1;
1346  /* FIXME: sort out this error higher up and turn this into
1347  * an assert.
1348  */
1349  if (m >= BYTE_PAIR_RANGE)
1350  throw Xapian::UnimplementedError("Can't handle insanely large tags");
1351 
1352  size_t o = 0; // Offset into the tag
1353  size_t residue = tag_size; // Bytes of the tag remaining to add in
1354  bool replacement = false; // Has there been a replacement?
1355  bool components_to_del = false; // Are there components to delete?
1356  int i;
1357  for (i = 1; i <= m; ++i) {
1358  size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1359  size_t this_cd = (i == 1 ? cd - X2 : cd);
1360  Assert(this_cd + l <= block_size);
1361  Assert(o + l <= tag_size);
1362  kt.set_tag(this_cd, tag_data + o, l, compressed, i, m);
1363 
1364  o += l;
1365  residue -= l;
1366 
1367  if (i > 1) found = find(C);
1368  int result = add_kt(found);
1369  if (result) replacement = true;
1370  components_to_del = (result == 1);
1371  }
1372  AssertEq(o, tag_size);
1373  if (components_to_del) {
1374  i = m;
1375  do {
1376  kt.set_component_of(++i);
1377  } while (delete_kt() == 1);
1378  }
1379  if (!replacement) ++item_count;
1380  Btree_modified = true;
1381  if (cursor_created_since_last_modification) {
1382  cursor_created_since_last_modification = false;
1383  ++cursor_version;
1384  }
1385 }
1386 
1387 /* GlassTable::del(key) returns false if the key is not in the B-tree,
1388  otherwise deletes it and returns true.
1389 
1390  Again, this is parallel to GlassTable::add, but simpler in form.
1391 */
1392 
1393 bool
1394 GlassTable::del(const string &key)
1395 {
1396  LOGCALL(DB, bool, "GlassTable::del", key);
1397  Assert(writable);
1398 
1399  if (handle < 0) {
1400  if (handle == -2) {
1402  }
1403  RETURN(false);
1404  }
1405 
1406  // We can't delete a key which we is too long for us to store.
1407  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1408 
1409  if (key.empty()) RETURN(false);
1410  form_key(key);
1411 
1412  int r = delete_kt();
1413  if (r == 0) RETURN(false);
1414  int i = 1;
1415  while (r == 1) {
1416  kt.set_component_of(++i);
1417  r = delete_kt();
1418  }
1419 
1420  item_count--;
1421  Btree_modified = true;
1422  if (cursor_created_since_last_modification) {
1423  cursor_created_since_last_modification = false;
1424  ++cursor_version;
1425  }
1426  RETURN(true);
1427 }
1428 
1429 bool
1430 GlassTable::readahead_key(const string &key) const
1431 {
1432  LOGCALL(DB, bool, "GlassTable::readahead_key", key);
1433  Assert(!key.empty());
1434 
1435  // Three cases:
1436  //
1437  // handle == -1: Lazy table in a multi-file database which isn't yet open.
1438  //
1439  // handle == -2: Table has been closed. Since the readahead is just a
1440  // hint, we can safely ignore it for a closed table.
1441  //
1442  // handle <= -3: Lazy table in a single-file database which isn't yet
1443  // open.
1444  if (handle < 0)
1445  RETURN(false);
1446 
1447  // If the table only has one level, there are no branch blocks to preread.
1448  if (level == 0)
1449  RETURN(false);
1450 
1451  // An overlong key cannot be found.
1452  if (key.size() > GLASS_BTREE_MAX_KEY_LEN)
1453  RETURN(true);
1454 
1455  form_key(key);
1456 
1457  // We'll only readahead the first level, since descending the B-tree would
1458  // require actual reads that would likely hurt performance more than help.
1459  const uint8_t * p = C[level].get_p();
1460  int c = find_in_branch(p, kt, C[level].c);
1461  uint4 n = BItem(p, c).block_given_by();
1462  // Don't preread if it's the block we last preread or already in the
1463  // cursor.
1464  if (n != last_readahead && n != C[level - 1].get_n()) {
1465  last_readahead = n;
1466  if (!io_readahead_block(handle, block_size, n, offset))
1467  RETURN(false);
1468  }
1469  RETURN(true);
1470 }
1471 
1472 bool
1473 GlassTable::get_exact_entry(const string &key, string & tag) const
1474 {
1475  LOGCALL(DB, bool, "GlassTable::get_exact_entry", key | tag);
1476  Assert(!key.empty());
1477 
1478  if (handle < 0) {
1479  if (handle == -2) {
1481  }
1482  RETURN(false);
1483  }
1484 
1485  // An oversized key can't exist, so attempting to search for it should fail.
1486  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1487 
1488  form_key(key);
1489  if (!find(C)) RETURN(false);
1490 
1491  (void)read_tag(C, &tag, false);
1492  RETURN(true);
1493 }
1494 
1495 bool
1496 GlassTable::key_exists(const string &key) const
1497 {
1498  LOGCALL(DB, bool, "GlassTable::key_exists", key);
1499  Assert(!key.empty());
1500 
1501  // An oversized key can't exist, so attempting to search for it should fail.
1502  if (key.size() > GLASS_BTREE_MAX_KEY_LEN) RETURN(false);
1503 
1504  form_key(key);
1505  RETURN(find(C));
1506 }
1507 
1508 bool
1509 GlassTable::read_tag(Glass::Cursor * C_, string *tag, bool keep_compressed) const
1510 {
1511  LOGCALL(DB, bool, "GlassTable::read_tag", Literal("C_") | tag | keep_compressed);
1512 
1513  tag->resize(0);
1514 
1515  bool first = true;
1516  bool compressed = false;
1517  bool decompress = false;
1518  while (true) {
1519  LeafItem item(C_[0].get_p(), C_[0].c);
1520  if (first) {
1521  first = false;
1522  compressed = item.get_compressed();
1523  if (compressed && !keep_compressed) {
1524  comp_stream.decompress_start();
1525  decompress = true;
1526  }
1527  }
1528  bool last = item.last_component();
1529  if (decompress) {
1530  // Decompress each chunk as we read it so we don't need both the
1531  // full compressed and uncompressed tags in memory at once.
1532  bool done = item.decompress_chunk(comp_stream, *tag);
1533  if (done != last) {
1534  throw Xapian::DatabaseCorruptError(done ?
1535  "Too many chunks of compressed data" :
1536  "Too few chunks of compressed data");
1537  }
1538  } else {
1539  item.append_chunk(tag);
1540  }
1541  if (last) break;
1542  if (!next(C_, 0)) {
1543  throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1544  }
1545  }
1546  // At this point the cursor is on the last item - calling next will move
1547  // it to the next key (GlassCursor::read_tag() relies on this).
1548 
1549  RETURN(compressed && keep_compressed);
1550 }
1551 
1552 void
1554 {
1555  LOGCALL_VOID(DB, "GlassTable::set_full_compaction", parity);
1556  Assert(writable);
1557 
1558  if (parity) seq_count = 0;
1559  full_compaction = parity;
1560 }
1561 
1563  LOGCALL(DB, GlassCursor *, "GlassTable::cursor_get", NO_ARGS);
1564  if (handle < 0) {
1565  if (handle == -2) {
1567  }
1568  RETURN(NULL);
1569  }
1570  // FIXME Ick - casting away const is nasty
1571  RETURN(new GlassCursor(const_cast<GlassTable *>(this)));
1572 }
1573 
1574 /************ B-tree opening and closing ************/
1575 
1576 void
1578 {
1579  LOGCALL_VOID(DB, "GlassTable::basic_open", root_info|rev);
1580  revision_number = rev;
1581  root = root_info->get_root();
1582  level = root_info->get_level();
1583  item_count = root_info->get_num_entries();
1584  faked_root_block = root_info->get_root_is_fake();
1585  sequential = root_info->get_sequential();
1586  const string & fl_serialised = root_info->get_free_list();
1587  if (!fl_serialised.empty()) {
1588  if (!free_list.unpack(fl_serialised))
1589  throw Xapian::DatabaseCorruptError("Bad freelist metadata");
1590  } else {
1591  free_list.reset();
1592  }
1593 
1594  compress_min = root_info->get_compress_min();
1595 
1596  /* kt holds constructed items as well as keys */
1597  kt = LeafItem_wr(zeroed_new(block_size));
1598 
1599  set_max_item_size(BLOCK_CAPACITY);
1600 
1601  for (int j = 0; j <= level; ++j) {
1602  C[j].init(block_size);
1603  }
1604 
1605  read_root();
1606 
1607  if (cursor_created_since_last_modification) {
1608  cursor_created_since_last_modification = false;
1609  ++cursor_version;
1610  }
1611 }
1612 
1613 void
1615 {
1616  LOGCALL_VOID(DB, "GlassTable::read_root", NO_ARGS);
1617  if (faked_root_block) {
1618  /* root block for an unmodified database. */
1619  uint8_t * p = C[0].init(block_size);
1620  Assert(p);
1621 
1622  /* clear block - shouldn't be necessary, but is a bit nicer,
1623  * and means that the same operations should always produce
1624  * the same database. */
1625  memset(p, 0, block_size);
1626 
1627  int o = block_size - I2 - K1;
1628  LeafItem_wr(p + o).fake_root_item();
1629 
1630  LeafItem_wr::setD(p, DIR_START, o); // its directory entry
1631  SET_DIR_END(p, DIR_START + D2);// the directory size
1632 
1633  o -= (DIR_START + D2);
1634  SET_MAX_FREE(p, o);
1635  SET_TOTAL_FREE(p, o);
1636  SET_LEVEL(p, 0);
1637 
1638  if (!writable) {
1639  /* reading - revision number doesn't matter as long as
1640  * it's not greater than the current one. */
1641  SET_REVISION(p, 0);
1642  C[0].set_n(0);
1643  } else {
1644  /* writing - */
1645  SET_REVISION(p, revision_number + 1);
1646  C[0].set_n(free_list.get_block(this, block_size));
1647  C[0].rewrite = true;
1648  }
1649  } else {
1650  /* using a root block stored on disk */
1651  block_to_cursor(C, level, root);
1652 
1653  if (REVISION(C[level].get_p()) > revision_number) set_overwritten();
1654  /* although this is unlikely */
1655  }
1656 }
1657 
1658 void
1661 {
1662  LOGCALL_VOID(DB, "GlassTable::do_open_to_write", root_info|rev);
1663  if (handle == -2) {
1665  }
1666  if (handle <= -2) {
1667  // Single file database.
1668  handle = -3 - handle;
1669  } else {
1670  handle = io_open_block_wr(name + GLASS_TABLE_EXTENSION, (rev == 0));
1671  if (handle < 0) {
1672  // lazy doesn't make a lot of sense when we're creating a DB (which
1673  // is the case when rev==0), but ENOENT with O_CREAT means a parent
1674  // directory doesn't exist.
1675  if (lazy && rev && errno == ENOENT) {
1676  revision_number = rev;
1677  return;
1678  }
1679  string message((rev == 0) ? "Couldn't create " : "Couldn't open ");
1680  message += name;
1681  message += GLASS_TABLE_EXTENSION" read/write";
1682  throw Xapian::DatabaseOpeningError(message, errno);
1683  }
1684  }
1685 
1686  writable = true;
1687  basic_open(root_info, rev);
1688 
1689  split_p = new uint8_t[block_size];
1690 
1691  buffer = zeroed_new(block_size);
1692 
1693  changed_n = 0;
1694  changed_c = DIR_START;
1695  seq_count = SEQ_START_POINT;
1696 }
1697 
1698 GlassTable::GlassTable(const char * tablename_, const string & path_,
1699  bool readonly_, bool lazy_)
1700  : tablename(tablename_),
1701  revision_number(0),
1702  item_count(0),
1703  block_size(0),
1704  faked_root_block(true),
1705  sequential(true),
1706  handle(-1),
1707  level(0),
1708  root(0),
1709  kt(0),
1710  buffer(0),
1711  free_list(),
1712  name(path_),
1713  seq_count(0),
1714  changed_n(0),
1715  changed_c(0),
1716  max_item_size(0),
1717  Btree_modified(false),
1718  full_compaction(false),
1719  writable(!readonly_),
1720  cursor_created_since_last_modification(false),
1721  cursor_version(0),
1722  changes_obj(NULL),
1723  split_p(0),
1724  compress_min(0),
1725  comp_stream(Z_DEFAULT_STRATEGY),
1726  lazy(lazy_),
1727  last_readahead(BLK_UNUSED),
1728  offset(0)
1729 {
1730  LOGCALL_CTOR(DB, "GlassTable", tablename_ | path_ | readonly_ | lazy_);
1731 }
1732 
1733 GlassTable::GlassTable(const char * tablename_, int fd, off_t offset_,
1734  bool readonly_, bool lazy_)
1735  : tablename(tablename_),
1736  revision_number(0),
1737  item_count(0),
1738  block_size(0),
1740  sequential(true),
1741  handle(-3 - fd),
1742  level(0),
1743  root(0),
1744  kt(0),
1745  buffer(0),
1746  free_list(),
1747  name(),
1748  seq_count(0),
1749  changed_n(0),
1750  changed_c(0),
1751  max_item_size(0),
1754  writable(!readonly_),
1756  cursor_version(0),
1757  changes_obj(NULL),
1758  split_p(0),
1759  compress_min(0),
1760  comp_stream(Z_DEFAULT_STRATEGY),
1761  lazy(lazy_),
1763  offset(offset_)
1764 {
1765  LOGCALL_CTOR(DB, "GlassTable", tablename_ | fd | offset_ | readonly_ | lazy_);
1766 }
1767 
1768 bool
1770  LOGCALL(DB, bool, "GlassTable::exists", NO_ARGS);
1771  // We know a single-file database exists, since we have an fd open on it!
1773 }
1774 
1775 void
1776 GlassTable::create_and_open(int flags_, const RootInfo & root_info)
1777 {
1778  LOGCALL_VOID(DB, "GlassTable::create_and_open", flags_|root_info);
1779  if (handle == -2) {
1781  }
1782  Assert(writable);
1783  close();
1784 
1785  unsigned int block_size_ = root_info.get_blocksize();
1786  Assert(block_size_ >= 2048);
1787  Assert(block_size_ <= BYTE_PAIR_RANGE);
1788  // Must be a power of two.
1789  Assert((block_size_ & (block_size_ - 1)) == 0);
1790 
1791  flags = flags_;
1792  block_size = block_size_;
1793 
1794  if (lazy) {
1795  close();
1797  compress_min = root_info.get_compress_min();
1798  } else {
1799  // FIXME: it would be good to arrange that this works such that there's
1800  // always a valid table in place if you run create_and_open() on an
1801  // existing table.
1802 
1803  do_open_to_write(&root_info);
1804  }
1805 }
1806 
1808  LOGCALL_DTOR(DB, "GlassTable");
1810 }
1811 
1812 void GlassTable::close(bool permanent) {
1813  LOGCALL_VOID(DB, "GlassTable::close", permanent);
1814 
1815  if (handle >= 0) {
1816  if (single_file()) {
1817  handle = -3 - handle;
1818  } else {
1819  // If an error occurs here, we just ignore it, since we're just
1820  // trying to free everything.
1821  (void)::close(handle);
1822  handle = -1;
1823  }
1824  }
1825 
1826  if (permanent) {
1827  handle = -2;
1828  // Don't delete the resources in the table, since they may
1829  // still be used to look up cached content.
1830  return;
1831  }
1832  for (int j = level; j >= 0; --j) {
1833  C[j].destroy();
1834  }
1835  delete [] split_p;
1836  split_p = 0;
1837 
1838  delete [] kt.get_address();
1839  kt = LeafItem_wr(0);
1840  delete [] buffer;
1841  buffer = 0;
1842 }
1843 
1844 void
1846 {
1847  LOGCALL_VOID(DB, "GlassTable::flush_db", NO_ARGS);
1848  Assert(writable);
1849  if (handle < 0) {
1850  if (handle == -2) {
1852  }
1853  return;
1854  }
1855 
1856  for (int j = level; j >= 0; --j) {
1857  if (C[j].rewrite) {
1858  write_block(C[j].get_n(), C[j].get_p());
1859  }
1860  }
1861 
1862  faked_root_block = false;
1863 }
1864 
1865 void
1867 {
1868  LOGCALL_VOID(DB, "GlassTable::commit", revision|root_info);
1869  Assert(writable);
1870 
1871  if (revision <= revision_number) {
1872  throw Xapian::DatabaseError("New revision too low");
1873  }
1874 
1875  if (handle < 0) {
1876  if (handle == -2) {
1878  }
1880  root_info->set_blocksize(block_size);
1881  root_info->set_level(0);
1882  root_info->set_num_entries(0);
1883  root_info->set_root_is_fake(true);
1884  root_info->set_sequential(true);
1885  root_info->set_root(0);
1886  return;
1887  }
1888 
1889  try {
1890  root = C[level].get_n();
1891 
1892  root_info->set_blocksize(block_size);
1893  root_info->set_level(level);
1894  root_info->set_num_entries(item_count);
1895  root_info->set_root_is_fake(faked_root_block);
1896  root_info->set_sequential(sequential);
1897  root_info->set_root(root);
1898 
1899  Btree_modified = false;
1900 
1901  for (int i = 0; i <= level; ++i) {
1902  C[i].init(block_size);
1903  }
1904 
1905  free_list.set_revision(revision);
1906  free_list.commit(this, block_size);
1907 
1908  // Save the freelist details into the root_info.
1909  string serialised;
1910  free_list.pack(serialised);
1911  root_info->set_free_list(serialised);
1912 
1914 
1915  read_root();
1916 
1917  changed_n = 0;
1918  changed_c = DIR_START;
1920  } catch (...) {
1922  throw;
1923  }
1924 }
1925 
1926 void
1928 {
1929  LOGCALL_VOID(DB, "GlassTable::cancel", root_info|rev);
1930  Assert(writable);
1931 
1932  if (handle < 0) {
1933  if (handle == -2) {
1935  }
1936  return;
1937  }
1938 
1939  // This causes problems: if (!Btree_modified) return;
1940 
1942  throw Xapian::InvalidOperationError("cancel() not supported under Xapian::DB_DANGEROUS");
1943 
1944  revision_number = rev;
1945  block_size = root_info.get_blocksize();
1946  root = root_info.get_root();
1947  level = root_info.get_level();
1948  item_count = root_info.get_num_entries();
1949  faked_root_block = root_info.get_root_is_fake();
1950  sequential = root_info.get_sequential();
1951  const string & fl_serialised = root_info.get_free_list();
1952  if (!fl_serialised.empty()) {
1953  if (!free_list.unpack(fl_serialised))
1954  throw Xapian::DatabaseCorruptError("Bad freelist metadata");
1955  } else {
1956  free_list.reset();
1957  }
1958 
1959  Btree_modified = false;
1960 
1961  for (int j = 0; j <= level; ++j) {
1962  C[j].init(block_size);
1963  C[j].rewrite = false;
1964  }
1965  read_root();
1966 
1967  changed_n = 0;
1968  changed_c = DIR_START;
1970 
1973  ++cursor_version;
1974  }
1975 }
1976 
1977 /************ B-tree reading ************/
1978 
1979 void
1982 {
1983  LOGCALL(DB, bool, "GlassTable::do_open_to_read", root_info|rev);
1984  if (handle == -2) {
1986  }
1987  if (single_file()) {
1988  handle = -3 - handle;
1989  } else {
1991  if (handle < 0) {
1992  if (lazy) {
1993  // This table is optional when reading!
1994  revision_number = rev;
1995  return;
1996  }
1997  string message("Couldn't open ");
1998  message += name;
1999  message += GLASS_TABLE_EXTENSION" to read";
2000  throw Xapian::DatabaseOpeningError(message, errno);
2001  }
2002  }
2003 
2004  basic_open(root_info, rev);
2005 
2006  read_root();
2007 }
2008 
2009 void
2010 GlassTable::open(int flags_, const RootInfo & root_info,
2012 {
2013  LOGCALL_VOID(DB, "GlassTable::open", flags_|root_info|rev);
2014  close();
2015 
2016  flags = flags_;
2017  block_size = root_info.get_blocksize();
2018  root = root_info.get_root();
2019 
2020  if (!writable) {
2021  do_open_to_read(&root_info, rev);
2022  return;
2023  }
2024 
2025  do_open_to_write(&root_info, rev);
2026 }
2027 
2028 bool
2030 {
2031  LOGCALL(DB, bool, "GlassTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2032  int c = C_[0].c;
2033  AssertRel(DIR_START,<=,c);
2034  AssertRel(c,<,DIR_END(C_[0].get_p()));
2035  if (c == DIR_START) {
2036  uint4 n = C_[0].get_n();
2037  const uint8_t * p;
2038  while (true) {
2039  if (n == 0) RETURN(false);
2040  n--;
2041  if (n == C[0].get_n()) {
2042  // Block is a leaf block in the built-in cursor (potentially in
2043  // modified form if the table is writable).
2044  p = C_[0].clone(C[0]);
2045  } else {
2046  if (writable) {
2047  // Blocks in the built-in cursor may not have been written
2048  // to disk yet, so we have to check that the block number
2049  // isn't in the built-in cursor or we'll read an
2050  // uninitialised block (for which GET_LEVEL(p) will
2051  // probably return 0).
2052  int j;
2053  for (j = 1; j <= level; ++j) {
2054  if (n == C[j].get_n()) break;
2055  }
2056  if (j <= level) continue;
2057  }
2058 
2059  // Block isn't in the built-in cursor, so the form on disk
2060  // is valid, so read it to check if it's the next level 0
2061  // block.
2062  uint8_t * q = C_[0].init(block_size);
2063  read_block(n, q);
2064  p = q;
2065  C_[0].set_n(n);
2066  }
2067  if (REVISION(p) > revision_number + writable) {
2068  set_overwritten();
2069  RETURN(false);
2070  }
2071  if (GET_LEVEL(p) == 0) break;
2072  }
2073  c = DIR_END(p);
2074  AssertRel(DIR_START,<,c);
2075  }
2076  c -= D2;
2077  C_[0].c = c;
2078  RETURN(true);
2079 }
2080 
2081 bool
2083 {
2084  LOGCALL(DB, bool, "GlassTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2085  const uint8_t * p = C_[0].get_p();
2086  Assert(p);
2087  int c = C_[0].c;
2088  AssertRel(c,<,DIR_END(p));
2089  c += D2;
2090  Assert(unsigned(c) < block_size);
2091  if (c == DIR_END(p)) {
2092  uint4 n = C_[0].get_n();
2093  while (true) {
2094  n++;
2095  if (n >= free_list.get_first_unused_block()) RETURN(false);
2096  if (writable) {
2097  if (n == C[0].get_n()) {
2098  // Block is a leaf block in the built-in cursor
2099  // (potentially in modified form).
2100  p = C_[0].clone(C[0]);
2101  } else {
2102  // Blocks in the built-in cursor may not have been written
2103  // to disk yet, so we have to check that the block number
2104  // isn't in the built-in cursor or we'll read an
2105  // uninitialised block (for which GET_LEVEL(p) will
2106  // probably return 0).
2107  int j;
2108  for (j = 1; j <= level; ++j) {
2109  if (n == C[j].get_n()) break;
2110  }
2111  if (j <= level) continue;
2112 
2113  // Block isn't in the built-in cursor, so the form on disk
2114  // is valid, so read it to check if it's the next level 0
2115  // block.
2116  uint8_t * q = C_[0].init(block_size);
2117  read_block(n, q);
2118  p = q;
2119  }
2120  } else {
2121  uint8_t * q = C_[0].init(block_size);
2122  read_block(n, q);
2123  p = q;
2124  }
2125  if (REVISION(p) > revision_number + writable) {
2126  set_overwritten();
2127  RETURN(false);
2128  }
2129  if (GET_LEVEL(p) == 0) break;
2130  }
2131  c = DIR_START;
2132  C_[0].set_n(n);
2133  }
2134  C_[0].c = c;
2135  RETURN(true);
2136 }
2137 
2138 bool
2140 {
2141  LOGCALL(DB, bool, "GlassTable::prev_default", Literal("C_") | j);
2142  const uint8_t * p = C_[j].get_p();
2143  int c = C_[j].c;
2144  AssertRel(DIR_START,<=,c);
2145  AssertRel(c,<,DIR_END(p));
2146  AssertRel(unsigned(DIR_END(p)),<=,block_size);
2147  if (c == DIR_START) {
2148  if (j == level) RETURN(false);
2149  if (!prev_default(C_, j + 1)) RETURN(false);
2150  p = C_[j].get_p();
2151  c = DIR_END(p);
2152  AssertRel(DIR_START,<,c);
2153  }
2154  c -= D2;
2155  C_[j].c = c;
2156  if (j > 0) {
2157  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
2158  }
2159  RETURN(true);
2160 }
2161 
2162 bool
2164 {
2165  LOGCALL(DB, bool, "GlassTable::next_default", Literal("C_") | j);
2166  const uint8_t * p = C_[j].get_p();
2167  int c = C_[j].c;
2168  AssertRel(c,<,DIR_END(p));
2169  AssertRel(unsigned(DIR_END(p)),<=,block_size);
2170  c += D2;
2171  if (j > 0) {
2172  AssertRel(DIR_START,<,c);
2173  } else {
2174  AssertRel(DIR_START,<=,c);
2175  }
2176  // Sometimes c can be DIR_END(p) + 2 here it appears...
2177  if (c >= DIR_END(p)) {
2178  if (j == level) RETURN(false);
2179  if (!next_default(C_, j + 1)) RETURN(false);
2180  p = C_[j].get_p();
2181  c = DIR_START;
2182  }
2183  C_[j].c = c;
2184  if (j > 0) {
2185  block_to_cursor(C_, j - 1, BItem(p, c).block_given_by());
2186 #ifdef BTREE_DEBUG_FULL
2187  printf("Block in GlassTable:next_default");
2188  report_block_full(j - 1, C_[j - 1].get_n(), C_[j - 1].get_p());
2189 #endif /* BTREE_DEBUG_FULL */
2190  }
2191  RETURN(true);
2192 }
2193 
2194 void
2196 {
2197  throw Xapian::DatabaseClosedError("Database has been closed");
2198 }
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:491
#define RETURN(A)
Definition: debuglog.h:493
uint4 get_compress_min() const
Definition: glass_version.h:68
#define Assert(COND)
Definition: omassert.h:122
bool key_exists(const std::string &key) const
Check if a key exists in the Btree.
int io_open_block_rd(const char *fname)
Open a block-based file for reading.
Definition: io_utils.h:38
GlassVersion class.
int DIR_END(const uint8_t *b)
Definition: chert_table.h:154
static void setD(uint8_t *q, int c, int x)
Definition: glass_table.h:391
int component_of() const
Definition: glass_table.h:184
off_t offset
offset to start of table in file.
Definition: glass_table.h:905
bool get_sequential() const
Definition: glass_version.h:63
Indicates an attempt to access a closed database.
Definition: error.h:1097
Glass::LeafItem_wr kt
buffer of size block_size for making up key-tag items
Definition: glass_table.h:799
#define AssertEq(A, B)
Definition: omassert.h:124
void create_and_open(int flags_, const RootInfo &root_info)
Create a new empty btree structure on disk and open it at the initial revision.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
const size_t BLOCK_CAPACITY
Even for items of at maximum size, it must be possible to get this number of items in a block...
Definition: glass_table.h:50
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
bool find(Glass::Cursor *) const
find(C_) searches for the key of B->kt in the B-tree.
Definition: glass_table.cc:530
#define true
Definition: header.h:8
void commit(const GlassTable *B, uint4 block_size)
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define GLASS_BTREE_MAX_KEY_LEN
The largest possible value of a key_len.
Definition: glass_table.h:57
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
int TOTAL_FREE(const uint8_t *b)
Definition: glass_table.h:113
static int find_in_branch(const uint8_t *p, Glass::LeafItem item, int c)
Definition: glass_table.cc:509
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:180
int changed_c
directory offset corresponding to last block to be changed by an addition
Definition: glass_table.h:823
bool readahead_key(const string &key) const
int find_in_branch_(const uint8_t *p, ITEM item, int c)
Definition: glass_table.cc:469
uint4 REVISION(const uint8_t *b)
Definition: glass_table.h:110
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
Provides wrappers with POSIXy semantics.
uint8_t * split_p
Buffer used when splitting a block.
Definition: glass_table.h:891
unsigned long cursor_version
Version count for tracking when cursors need to rebuild.
Definition: glass_table.h:841
void set_block_given_by(uint4 n)
Set this item&#39;s tag to point to block n (this block should not be at level 0).
Definition: glass_table.h:380
void set_blocksize(unsigned b)
Definition: glass_version.h:76
Constants in the Xapian namespace.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
int io_open_block_wr(const char *fname, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:67
bool writable
Set to true when the database is opened to write.
Definition: glass_table.h:835
#define STRINGIZE(X)
The STRINGIZE macro converts its parameter into a string constant.
Definition: stringutils.h:36
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
void set_truncated_key_and_block(Key newkey, int new_comp, int truncate_size, uint4 n)
Definition: glass_table.h:363
#define BLK_UNUSED
Definition: chert_cursor.h:31
STL namespace.
Definitions, types, etc for use inside glass.
Convert types to std::string.
int handle
File descriptor of the table.
Definition: glass_table.h:790
void set_key_and_block(Key newkey, uint4 n)
Definition: glass_table.h:355
void add(const std::string &key, const std::string &tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
Utility functions for testing files.
void compact(uint8_t *p)
compact(p) compact the block at p by shuffling all the items up to the end.
Definition: glass_table.cc:563
bool lazy
If true, don&#39;t create the table until it&#39;s needed.
Definition: glass_table.h:899
#define false
Definition: header.h:9
void SET_MAX_FREE(uint8_t *b, int x)
Definition: glass_table.h:119
void write_block(uint4 n, const uint8_t *p, bool appending=false) const
write_block(n, p, appending) writes block n in the DB file from address p.
Definition: glass_table.cc:191
void set_level(int level_)
Definition: glass_version.h:71
bool prev_default(Glass::Cursor *C_, int j) const
#define rare(COND)
Definition: config.h:565
int flags
Flags like DB_NO_SYNC and DB_DANGEROUS.
Definition: glass_table.h:770
~GlassTable()
Close the Btree.
bool full_compaction
set to true when full compaction is to be achieved
Definition: glass_table.h:832
int size() const
SIZE in diagram above.
Definition: glass_table.h:178
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27
const int DIR_START
Definition: chert_table.h:155
uint4 changed_n
the last block to be changed by an addition
Definition: glass_table.h:819
void set_n(uint4 n)
Definition: glass_cursor.h:106
int MAX_FREE(const uint8_t *b)
Definition: glass_table.h:112
int compare(ITEM1 a, ITEM2 b)
Compare two items by their keys.
Definition: glass_table.h:930
Hierarchy of classes which Xapian can throw as exceptions.
void add_item_to_leaf(uint8_t *p, Glass::LeafItem kt, int c)
add_item_to_leaf(p, kt_, c) adds item kt_ to the leaf block at p.
Definition: glass_table.cc:771
void set_num_entries(glass_tablesize_t n)
Definition: glass_version.h:72
void form_key(const std::string &key) const
const int K1
Definition: glass_table.h:72
Definition: pretty.h:45
void flush_db()
Flush any outstanding changes to the DB file of the table.
void add_item_to_branch(uint8_t *p, Glass::BItem kt, int c)
add_item_to_branch(p, kt_, c) adds item kt_ to the branch block at p.
Definition: glass_table.cc:811
void read_root()
void enter_key_above_branch(int j, Glass::BItem newitem)
enter_key_above_branch(j, newkey) is called after a branch block split.
Definition: glass_table.cc:703
const char * tablename
The name of the table (used when writing changesets).
Definition: glass_table.h:758
void do_open_to_read(const RootInfo *root_info, glass_revision_number_t rev)
Perform the opening operation to read.
uint32_t uint4
Definition: internaltypes.h:32
glass_tablesize_t item_count
keeps a count of the number of items in the B-tree.
Definition: glass_table.h:764
DatabaseModifiedError indicates a database was modified.
Definition: error.h:539
functions for reading and writing different width words
Glass::Cursor C[GLASS_BTREE_CURSOR_LEVELS]
Definition: glass_table.h:884
int GET_LEVEL(const uint8_t *b)
Definition: glass_table.h:111
void close(bool permanent=false)
Close the Btree.
bool prev_for_sequential(Glass::Cursor *C_, int dummy) const
int add_kt(bool found)
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C...
void split_root(uint4 split_n)
Btree needs to gain a new level to insert more items: so split root block and construct a new one...
Definition: glass_table.cc:600
static uint8_t * zeroed_new(size_t size)
Definition: glass_table.cc:85
Glass freelist.
static int find_in_leaf(const uint8_t *p, Glass::LeafItem item, int c, bool &exact)
find_in_leaf(p, key, c, exact) searches for the key in the leaf block at p.
Definition: glass_table.cc:418
void commit(glass_revision_number_t revision, RootInfo *root_info)
Commit any outstanding changes to the table.
bool decompress_chunk(CompressionStream &comp_stream, string &tag) const
Definition: glass_table.h:198
Glass changesets.
bool next_for_sequential(Glass::Cursor *C_, int dummy) const
GlassChanges * changes_obj
The GlassChanges object to write block changes to.
Definition: glass_table.h:847
uint4 last_readahead
Last block readahead_key() preread.
Definition: glass_table.h:902
uint4 get_first_unused_block() const
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:228
int GET_LEVEL(const uint8_t *b)
Definition: chert_table.h:151
void set_revision(uint4 revision_)
int length() const
Definition: glass_table.h:149
bool single_file() const
Definition: glass_table.h:849
GlassTable(const GlassTable &)
Copying not allowed.
const int I2
Definition: glass_table.h:73
const uint8_t * clone(const Cursor &o)
Definition: glass_cursor.h:68
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void alter()
Btree::alter(); is called when the B-tree is to be altered.
Definition: glass_table.cc:374
void add_branch_item(Glass::BItem kt, int j)
GlassTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
Definition: glass_table.cc:942
void SET_LEVEL(uint8_t *b, int x)
Definition: glass_table.h:118
bool get_compressed() const
Definition: glass_table.h:181
const int D2
Definition: glass_table.h:74
bool unpack(const char **pstart, const char *end)
glass_tablesize_t get_num_entries() const
Definition: glass_version.h:61
bool last_component() const
Definition: glass_table.h:183
CompressionStream comp_stream
Definition: glass_table.h:896
bool get_root_is_fake() const
Definition: glass_version.h:62
int DIR_END(const uint8_t *b)
Definition: glass_table.h:114
static void throw_database_closed()
Throw an exception indicating that the database is closed.
Btree implementation.
const int DB_DANGEROUS
Update the database in-place.
Definition: constants.h:103
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
int seq_count
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POIN...
Definition: glass_table.h:816
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
const int DIR_START
Definition: glass_table.h:115
#define GLASS_BTREE_CURSOR_LEVELS
Allow for this many levels in the B-tree.
Definition: glass_defs.h:43
int size() const
SIZE in diagram above.
Definition: glass_table.h:319
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
std::string name
The path name of the B tree.
Definition: glass_table.h:811
static void setD(uint8_t *q, int c, int x)
Definition: glass_table.h:278
bool Btree_modified
Set to true the first time the B-tree is modified.
Definition: glass_table.h:829
int mid_point(uint8_t *p) const
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in ...
Definition: glass_table.cc:734
unsigned int block_size
block size of the B tree in bytes
Definition: glass_table.h:767
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
void init(unsigned blocksize_, uint4 compress_min_)
Key key() const
Definition: glass_table.h:322
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
void read_block(uint4 n, uint8_t *p) const
read_block(n, p) reads block n of the DB file to address p.
Definition: glass_table.cc:160
const int LEVEL_FREELIST
Freelist blocks have their level set to LEVEL_FREELIST.
Definition: glass_table.h:136
bool read_tag(Glass::Cursor *C_, std::string *tag, bool keep_compressed) const
Read the tag value for the key pointed to by cursor C_.
void set_overwritten() const
Definition: glass_table.cc:289
uint8_t * buffer
buffer of size block_size for reforming blocks
Definition: glass_table.h:802
void append_chunk(std::string *tag) const
Definition: glass_table.h:189
unsigned get_blocksize() const
Definition: glass_version.h:64
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
char name[9]
Definition: dbcheck.cc:55
Interface to Btree cursors.
glass_block_t get_root() const
Definition: glass_version.h:59
void enter_key_above_leaf(Glass::LeafItem previtem, Glass::LeafItem newitem)
enter_key_above_leaf(previtem, newitem) is called after a leaf block split.
Definition: glass_table.cc:647
bool faked_root_block
true if the root block is faked (not written to disk).
Definition: glass_table.h:776
int delete_kt()
int c
offset in the block&#39;s directory
Definition: glass_cursor.h:134
size_t max_item_size
maximum size of an item (key-tag pair)
Definition: glass_table.h:826
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
Pack types into strings and unpack them again.
const int X2
Definition: glass_table.h:75
void basic_open(const RootInfo *root_info, glass_revision_number_t rev)
const int D2
Definition: chert_table.h:122
Wrappers for low-level POSIX I/O routines.
void set_root_is_fake(bool f)
Definition: glass_version.h:73
void SET_DIR_END(uint8_t *b, int x)
Definition: glass_table.h:121
Various handy helpers which std::string really should provide.
void SET_REVISION(uint8_t *b, uint4 rev)
Definition: glass_table.h:117
void delete_branch_item(int j)
GlassTable::delete_branch_item(j, repeatedly) is (almost) the converse of add_branch_item.
GlassFreeList free_list
List of free blocks.
Definition: glass_table.h:805
void block_to_cursor(Glass::Cursor *C_, int j, uint4 n) const
Definition: glass_table.cc:310
void add_leaf_item(Glass::LeafItem kt)
GlassTable::add_leaf_item(kt_) adds item kt_ to the leaf block.
Definition: glass_table.cc:847
uint4 compress_min
Minimum size tag to try compressing (0 for no compression).
Definition: glass_table.h:894
Definition: header.h:151
unsigned REVISION(const uint8_t *b)
Definition: chert_table.h:150
void pack(std::string &buf)
const int BYTES_PER_BLOCK_NUMBER
Definition: glass_table.h:60
#define BYTE_PAIR_RANGE
Definition: glass_table.cc:156
void cancel(const RootInfo &root_info, glass_revision_number_t rev)
Cancel any outstanding changes.
T get_address() const
Definition: glass_table.h:317
const uint8_t * get_p() const
Get pointer to block.
Definition: glass_cursor.h:116
Various assertion macros.
glass_revision_number_t revision_number
revision number of the opened B-tree.
Definition: glass_table.h:761
bool next_default(Glass::Cursor *C_, int j) const
DatabaseError indicates some sort of database related error.
Definition: error.h:367
static uint4 block_given_by(const uint8_t *p, int c)
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value...
void set_root(glass_block_t root_)
Definition: glass_version.h:75
bool exists() const
Determine whether the btree exists on disk.
void set_sequential(bool f)
Definition: glass_version.h:74
uint4 root
the root block of the B-tree
Definition: glass_table.h:796
void do_open_to_write(const RootInfo *root_info, glass_revision_number_t rev=0)
Perform the opening operation to write.
void form_null_key(uint4 n)
Form an item with a null key and with block number n in the tag.
Definition: glass_table.h:385
int level
number of levels, counting from 0
Definition: glass_table.h:793
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
void delete_leaf_item(bool repeatedly)
GlassTable::delete_leaf_item(repeatedly) is (almost) the converse of add_leaf_item.
const std::string & get_free_list() const
Definition: glass_version.h:69
bool sequential
true iff the data has been written in a single write in sequential order.
Definition: glass_table.h:781
void SET_TOTAL_FREE(uint8_t *b, int x)
Definition: glass_table.h:120
void set_free_list(const std::string &s)
Definition: glass_version.h:80
int get_level() const
Definition: glass_version.h:60
bool cursor_created_since_last_modification
Flag for tracking when cursors need to rebuild.
Definition: glass_table.h:838
uint4 block_given_by() const
Get this item&#39;s tag as a block number (this block should not be at level 0).
Definition: glass_table.h:326
bool del(const std::string &key)
Delete an entry from the table.
uint4 get_n() const
Get the block number.
Definition: glass_cursor.h:101
uint8_t * init(unsigned block_size)
Definition: glass_cursor.h:54
bool rewrite
true if the block is not the same as on disk, and so needs rewriting
Definition: glass_cursor.h:137
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
#define SEQ_START_POINT
Flip to sequential addition block-splitting after this number of observed sequential additions (in ne...
Definition: glass_table.cc:146
void destroy()
Definition: glass_cursor.h:83
void set_full_compaction(bool parity)
#define C(X)