backends/flint/flint_table.cc

Go to the documentation of this file.
00001 /* flint_table.cc: Btree implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2005,2006,2007,2009,2010 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 
00025 #include <xapian/error.h>
00026 
00027 #include "safeerrno.h"
00028 #ifdef __WIN32__
00029 # include "msvc_posix_wrapper.h"
00030 #endif
00031 
00032 #include "omassert.h"
00033 #include "stringutils.h" // For STRINGIZE().
00034 
00035 // Define to use "dangerous" mode - in this mode we write modified btree
00036 // blocks back in place.  This is somewhat faster (especially once we're
00037 // I/O bound) but the database can't be safely searched during indexing
00038 // and if indexing is terminated uncleanly, the database may be corrupt.
00039 //
00040 // Despite the risks, it's useful for speeding up a full rebuild.
00041 //
00042 // FIXME: make this mode run-time selectable, and record that it is currently
00043 // in use somewhere on disk, so readers can check and refuse to open the
00044 // database.
00045 //
00046 // #define DANGEROUS
00047 
00048 #include <sys/types.h>
00049 
00050 // Trying to include the correct headers with the correct defines set to
00051 // get pread() and pwrite() prototyped on every platform without breaking any
00052 // other platform is a real can of worms.  So instead we probe for what
00053 // prototypes (if any) are required in configure and put them into
00054 // PREAD_PROTOTYPE and PWRITE_PROTOTYPE.
00055 #if defined HAVE_PREAD && defined PREAD_PROTOTYPE
00056 PREAD_PROTOTYPE
00057 #endif
00058 #if defined HAVE_PWRITE && defined PWRITE_PROTOTYPE
00059 PWRITE_PROTOTYPE
00060 #endif
00061 
00062 #include <stdio.h>
00063 #include <string.h>   /* for memmove */
00064 #include <limits.h>   /* for CHAR_BIT */
00065 
00066 #include "flint_io.h"
00067 #include "flint_table.h"
00068 #include "flint_btreeutil.h"
00069 #include "flint_btreebase.h"
00070 #include "flint_cursor.h"
00071 #include "flint_utils.h"
00072 
00073 #include "omassert.h"
00074 #include "omdebug.h"
00075 #include <xapian/error.h>
00076 #include "utils.h"
00077 
00078 #include <algorithm>  // for std::min()
00079 #include <string>
00080 
00081 using namespace std;
00082 
00083 // Only try to compress tags longer than this many bytes.
00084 const size_t COMPRESS_MIN = 4;
00085 
00086 //#define BTREE_DEBUG_FULL 1
00087 #undef BTREE_DEBUG_FULL
00088 
00089 #ifdef BTREE_DEBUG_FULL
00090 /*------debugging aids from here--------*/
00091 
00092 static void print_key(const byte * p, int c, int j);
00093 static void print_tag(const byte * p, int c, int j);
00094 
00095 /*
00096 static void report_cursor(int N, Btree * B, Cursor_ * C)
00097 {
00098     int i;
00099     printf("%d)\n", N);
00100     for (i = 0; i <= B->level; i++)
00101         printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
00102                 C[i].p, C[i].c, C[i].n, C[i].rewrite);
00103 }
00104 */
00105 
00106 /*------to here--------*/
00107 #endif /* BTREE_DEBUG_FULL */
00108 
00109 /* Input/output is defined with calls to the basic Unix system interface: */
00110 
00111 static void sys_unlink(const string &filename)
00112 {
00113 #ifdef __WIN32__
00114     if (msvc_posix_unlink(filename.c_str()) == -1) {
00115 #else
00116     if (unlink(filename) == -1) {
00117 #endif
00118         string message = "Failed to unlink ";
00119         message += filename;
00120         message += ": ";
00121         message += strerror(errno);
00122         throw Xapian::DatabaseCorruptError(message);
00123     }
00124 }
00125 
00126 static inline byte *zeroed_new(size_t size)
00127 {
00128     byte *temp = new byte[size];
00129     // No need to check if temp is NULL, new throws std::bad_alloc if
00130     // allocation fails.
00131     Assert(temp);
00132     memset(temp, 0, size);
00133     return temp;
00134 }
00135 
00136 /* A B-tree comprises (a) a base file, containing essential information (Block
00137    size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
00138    bitmap being set if the Nth block of the B-tree file is in use, and (c) a
00139    file DB containing the B-tree proper. The DB file is divided into a sequence
00140    of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
00141    use. Those in use are arranged in a tree.
00142 
00143    Each block, b, has a structure like this:
00144 
00145      R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
00146      <---------- D ----------> <-M->
00147 
00148    And then,
00149 
00150    R = REVISION(b)  is the revision number the B-tree had when the block was
00151                     written into the DB file.
00152    L = GET_LEVEL(b) is the level of the block, which is the number of levels
00153                     towards the root of the B-tree structure. So leaf blocks
00154                     have level 0 and the one root block has the highest level
00155                     equal to the number of levels in the B-tree.
00156    M = MAX_FREE(b)  is the size of the gap between the end of the directory and
00157                     the first item of data. (It is not necessarily the maximum
00158                     size among the bits of space that are free, but I can't
00159                     think of a better name.)
00160    T = TOTAL_FREE(b)is the total amount of free space left in b.
00161    D = DIR_END(b)   gives the offset to the end of the directory.
00162 
00163    o1, o2 ... oN are a directory of offsets to the N items held in the block.
00164    The items are key-tag pairs, and as they occur in the directory are ordered
00165    by the keys.
00166 
00167    An item has this form:
00168 
00169            I K key x C tag
00170              <--K-->
00171            <------I------>
00172 
00173    A long tag presented through the API is split up into C tags small enough to
00174    be accommodated in the blocks of the B-tree. The key is extended to include
00175    a counter, x, which runs from 1 to C. The key is preceded by a length, K,
00176    and the whole item with a length, I, as depicted above.
00177 
00178    Here are the corresponding definitions:
00179 
00180 */
00181 
00182 #define REVISION(b)      static_cast<unsigned int>(getint4(b, 0))
00183 #define GET_LEVEL(b)     getint1(b, 4)
00184 #define MAX_FREE(b)      getint2(b, 5)
00185 #define TOTAL_FREE(b)    getint2(b, 7)
00186 #define DIR_END(b)       getint2(b, 9)
00187 #define DIR_START        11
00188 
00189 #define SET_REVISION(b, x)      setint4(b, 0, x)
00190 #define SET_LEVEL(b, x)         setint1(b, 4, x)
00191 #define SET_MAX_FREE(b, x)      setint2(b, 5, x)
00192 #define SET_TOTAL_FREE(b, x)    setint2(b, 7, x)
00193 #define SET_DIR_END(b, x)       setint2(b, 9, x)
00194 
00197 #define SEQ_START_POINT (-10)
00198 
00201 #define BLOCK_CAPACITY 4
00202 
00203 
00204 
00205 /* There are two bit maps in bit_map0 and bit_map. The nth bit of bitmap is 0
00206    if the nth block is free, otherwise 1. bit_map0 is the initial state of
00207    bitmap at the start of the current transaction.
00208 
00209    Note use of the limits.h values:
00210    UCHAR_MAX = 255, an unsigned with all bits set, and
00211    CHAR_BIT = 8, the number of bits per byte
00212 
00213    BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
00214    bytes -- 64K effectively.
00215 */
00216 
00217 #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
00218 
00220 void
00221 FlintTable::read_block(uint4 n, byte * p) const
00222 {
00223     // Log the value of p, not the contents of the block it points to...
00224     DEBUGCALL(DB, void, "FlintTable::read_block", n << ", " << (void*)p);
00225     /* Use the base bit_map_size not the bitmap's size, because
00226      * the latter is uninitialised in readonly mode.
00227      */
00228     Assert(n / CHAR_BIT < base.get_bit_map_size());
00229 
00230 #ifdef HAVE_PREAD
00231     off_t offset = off_t(block_size) * n;
00232     int m = block_size;
00233     while (true) {
00234         ssize_t bytes_read = pread(handle, reinterpret_cast<char *>(p), m,
00235                                    offset);
00236         // normal case - read succeeded, so return.
00237         if (bytes_read == m) return;
00238         if (bytes_read == -1) {
00239             if (errno == EINTR) continue;
00240             string message = "Error reading block " + om_tostring(n) + ": ";
00241             message += strerror(errno);
00242             throw Xapian::DatabaseError(message);
00243         } else if (bytes_read == 0) {
00244             string message = "Error reading block " + om_tostring(n) + ": got end of file";
00245             throw Xapian::DatabaseError(message);
00246         } else if (bytes_read < m) {
00247             /* Read part of the block, which is not an error.  We should
00248              * continue reading the rest of the block.
00249              */
00250             m -= int(bytes_read);
00251             p += bytes_read;
00252             offset += bytes_read;
00253         }
00254     }
00255 #else
00256     if (lseek(handle, off_t(block_size) * n, SEEK_SET) == -1) {
00257         string message = "Error seeking to block: ";
00258         message += strerror(errno);
00259         throw Xapian::DatabaseError(message);
00260     }
00261 
00262     flint_io_read(handle, reinterpret_cast<char *>(p), block_size, block_size);
00263 #endif
00264 }
00265 
00272 void
00273 FlintTable::write_block(uint4 n, const byte * p) const
00274 {
00275     DEBUGCALL(DB, void, "FlintTable::write_block", n << ", " << p);
00276     Assert(writable);
00277     /* Check that n is in range. */
00278     Assert(n / CHAR_BIT < base.get_bit_map_size());
00279 
00280     /* don't write to non-free */;
00281     AssertParanoid(base.block_free_at_start(n));
00282 
00283     /* write revision is okay */
00284     AssertEqParanoid(REVISION(p), latest_revision_number + 1);
00285 
00286     if (both_bases) {
00287         // Delete the old base before modifying the database.
00288         sys_unlink(name + "base" + other_base_letter());
00289         both_bases = false;
00290         latest_revision_number = revision_number;
00291     }
00292 
00293 #ifdef HAVE_PWRITE
00294     off_t offset = off_t(block_size) * n;
00295     int m = block_size;
00296     while (true) {
00297         ssize_t bytes_written = pwrite(handle, p, m, offset);
00298         if (bytes_written == m) {
00299             // normal case - write succeeded, so return.
00300             return;
00301         } else if (bytes_written == -1) {
00302             if (errno == EINTR) continue;
00303             string message = "Error writing block: ";
00304             message += strerror(errno);
00305             throw Xapian::DatabaseError(message);
00306         } else if (bytes_written == 0) {
00307             string message = "Error writing block: wrote no data";
00308             throw Xapian::DatabaseError(message);
00309         } else if (bytes_written < m) {
00310             /* Wrote part of the block, which is not an error.  We should
00311              * continue writing the rest of the block.
00312              */
00313             m -= int(bytes_written);
00314             p += bytes_written;
00315             offset += bytes_written;
00316         }
00317     }
00318 #else
00319     if (lseek(handle, (off_t)block_size * n, SEEK_SET) == -1) {
00320         string message = "Error seeking to block: ";
00321         message += strerror(errno);
00322         throw Xapian::DatabaseError(message);
00323     }
00324 
00325     flint_io_write(handle, reinterpret_cast<const char *>(p), block_size);
00326 #endif
00327 }
00328 
00329 
00330 /* A note on cursors:
00331 
00332    Each B-tree level has a corresponding array element C[j] in a
00333    cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
00334    root block level. Within a level j,
00335 
00336        C[j].p  addresses the block
00337        C[j].c  is the offset into the directory entry in the block
00338        C[j].n  is the number of the block at C[j].p
00339 
00340    A look up in the B-tree causes navigation of the blocks starting
00341    from the root. In each block, p, we find an offset, c, to an item
00342    which gives the number, n, of the block for the next level. This
00343    leads to an array of values p,c,n which are held inside the cursor.
00344 
00345    Any Btree object B has a built-in cursor, at B->C. But other cursors may
00346    be created.  If BC is a created cursor, BC->C is the cursor in the
00347    sense given above, and BC->B is the handle for the B-tree again.
00348 */
00349 
00350 
00351 void
00352 FlintTable::set_overwritten() const
00353 {
00354     DEBUGCALL(DB, void, "FlintTable::set_overwritten", "");
00355     // If we're writable, there shouldn't be another writer who could cause
00356     // overwritten to be flagged, so that's a DatabaseCorruptError.
00357     if (writable)
00358         throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
00359     throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
00360 }
00361 
00362 /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
00363    C, writing the block currently at C[j] back to disk if necessary.
00364    Note that
00365 
00366        C[j].rewrite
00367 
00368    is true iff C[j].n is different from block n in file DB. If it is
00369    false no rewriting is necessary.
00370 */
00371 
00372 void
00373 FlintTable::block_to_cursor(Cursor_ * C_, int j, uint4 n) const
00374 {
00375     DEBUGCALL(DB, void, "FlintTable::block_to_cursor", (void*)C_ << ", " << j << ", " << n);
00376     if (n == C_[j].n) return;
00377     byte * p = C_[j].p;
00378     Assert(p);
00379 
00380     // FIXME: only needs to be done in write mode
00381     if (C_[j].rewrite) {
00382         Assert(writable);
00383         Assert(C == C_);
00384         write_block(C_[j].n, p);
00385         C_[j].rewrite = false;
00386     }
00387     // Check if the block is in the built-in cursor (potentially in
00388     // modified form).
00389     if (writable && n == C[j].n) {
00390         if (p != C[j].p)
00391             memcpy(p, C[j].p, block_size);
00392     } else {
00393         read_block(n, p);
00394     }
00395 
00396     C_[j].n = n;
00397     if (j < level) {
00398         /* unsigned comparison */
00399         if (REVISION(p) > REVISION(C_[j + 1].p)) {
00400             set_overwritten();
00401             return;
00402         }
00403     }
00404     AssertEq(j, GET_LEVEL(p));
00405 }
00406 
00428 void
00429 FlintTable::alter()
00430 {
00431     DEBUGCALL(DB, void, "FlintTable::alter", "");
00432     Assert(writable);
00433 #ifdef DANGEROUS
00434     C[0].rewrite = true;
00435 #else
00436     int j = 0;
00437     byte * p = C[j].p;
00438     while (true) {
00439         if (C[j].rewrite) return; /* all new, so return */
00440         C[j].rewrite = true;
00441 
00442         uint4 n = C[j].n;
00443         if (base.block_free_at_start(n)) {
00444             Assert(REVISION(p) == latest_revision_number + 1);
00445             return;
00446         }
00447         Assert(REVISION(p) < latest_revision_number + 1);
00448         base.free_block(n);
00449         n = base.next_free_block();
00450         C[j].n = n;
00451         SET_REVISION(p, latest_revision_number + 1);
00452 
00453         if (j == level) return;
00454         j++;
00455         p = C[j].p;
00456         Item_wr_(p, C[j].c).set_block_given_by(n);
00457     }
00458 #endif
00459 }
00460 
00473 int FlintTable::find_in_block(const byte * p, Key_ key, bool leaf, int c)
00474 {
00475     DEBUGCALL_STATIC(DB, int, "FlintTable::find_in_block", reinterpret_cast<const void*>(p) << ", " << reinterpret_cast<const void*>(key.get_address()) << ", " << leaf << ", " << c);
00476     int i = DIR_START;
00477     if (leaf) i -= D2;
00478     int j = DIR_END(p);
00479 
00480     if (c != -1) {
00481         if (c < j && i < c && Item_(p, c).key() <= key)
00482             i = c;
00483         c += D2;
00484         if (c < j && i < c && key < Item_(p, c).key())
00485             j = c;
00486     }
00487 
00488     while (j - i > D2) {
00489         int k = i + ((j - i)/(D2 * 2))*D2; /* mid way */
00490         if (key < Item_(p, k).key()) j = k; else i = k;
00491     }
00492     RETURN(i);
00493 }
00494 
00502 bool
00503 FlintTable::find(Cursor_ * C_) const
00504 {
00505     DEBUGCALL(DB, bool, "FlintTable::find", (void*)C_);
00506     // Note: the parameter is needed when we're called by FlintCursor
00507     const byte * p;
00508     int c;
00509     Key_ key = kt.key();
00510     for (int j = level; j > 0; --j) {
00511         p = C_[j].p;
00512         c = find_in_block(p, key, false, C_[j].c);
00513 #ifdef BTREE_DEBUG_FULL
00514         printf("Block in FlintTable:find - code position 1");
00515         report_block_full(j, C_[j].n, p);
00516 #endif /* BTREE_DEBUG_FULL */
00517         C_[j].c = c;
00518         block_to_cursor(C_, j - 1, Item_(p, c).block_given_by());
00519     }
00520     p = C_[0].p;
00521     c = find_in_block(p, key, true, C_[0].c);
00522 #ifdef BTREE_DEBUG_FULL
00523     printf("Block in FlintTable:find - code position 2");
00524     report_block_full(0, C_[0].n, p);
00525 #endif /* BTREE_DEBUG_FULL */
00526     C_[0].c = c;
00527     if (c < DIR_START) RETURN(false);
00528     RETURN(Item_(p, c).key() == key);
00529 }
00530 
00536 void
00537 FlintTable::compact(byte * p)
00538 {
00539     DEBUGCALL(DB, void, "FlintTable::compact", (void*)p);
00540     Assert(writable);
00541     int e = block_size;
00542     byte * b = buffer;
00543     int dir_end = DIR_END(p);
00544     for (int c = DIR_START; c < dir_end; c += D2) {
00545         Item_ item(p, c);
00546         int l = item.size();
00547         e -= l;
00548         memmove(b + e, item.get_address(), l);
00549         setD(p, c, e);  /* reform in b */
00550     }
00551     memmove(p + e, b + e, block_size - e);  /* copy back */
00552     e -= dir_end;
00553     SET_TOTAL_FREE(p, e);
00554     SET_MAX_FREE(p, e);
00555 }
00556 
00560 void
00561 FlintTable::split_root(uint4 split_n)
00562 {
00563     DEBUGCALL(DB, void, "FlintTable::split_root", split_n);
00564     /* gain a level */
00565     ++level;
00566 
00567     /* check level overflow - this isn't something that should ever happen
00568      * but deserves more than an Assert()... */
00569     if (level == BTREE_CURSOR_LEVELS) {
00570         throw Xapian::DatabaseCorruptError("Btree has grown impossibly large ("STRINGIZE(BTREE_CURSOR_LEVELS)" levels)");
00571     }
00572 
00573     byte * q = zeroed_new(block_size);
00574     C[level].p = q;
00575     C[level].c = DIR_START;
00576     C[level].n = base.next_free_block();
00577     C[level].rewrite = true;
00578     SET_REVISION(q, latest_revision_number + 1);
00579     SET_LEVEL(q, level);
00580     SET_DIR_END(q, DIR_START);
00581     compact(q);   /* to reset TOTAL_FREE, MAX_FREE */
00582 
00583     /* form a null key in b with a pointer to the old root */
00584     byte b[10]; /* 7 is exact */
00585     Item_wr_ item(b);
00586     item.form_null_key(split_n);
00587     add_item(item, level);
00588 }
00589 
00605 void
00606 FlintTable::enter_key(int j, Key_ prevkey, Key_ newkey)
00607 {
00608     Assert(writable);
00609     Assert(prevkey < newkey);
00610     Assert(j >= 1);
00611 
00612     uint4 blocknumber = C[j - 1].n;
00613 
00614     // FIXME update to use Key_
00615     // Keys are truncated here: but don't truncate the count at the end away.
00616     const int newkey_len = newkey.length();
00617     int i;
00618 
00619     if (j == 1) {
00620         // Truncate the key to the minimal key which differs from prevkey,
00621         // the preceding key in the block.
00622         i = 0;
00623         const int min_len = min(newkey_len, prevkey.length());
00624         while (i < min_len && prevkey[i] == newkey[i]) {
00625             i++;
00626         }
00627 
00628         // Want one byte of difference.
00629         if (i < newkey_len) i++;
00630     } else {
00631         /* Can't truncate between branch levels, since the separated keys
00632          * are in at the leaf level, and truncating again will change the
00633          * branch point.
00634          */
00635         i = newkey_len;
00636     }
00637 
00638     byte b[UCHAR_MAX + 6];
00639     Item_wr_ item(b);
00640     Assert(i <= 256 - I2 - C2);
00641     Assert(i <= (int)sizeof(b) - I2 - C2 - 4);
00642     item.set_key_and_block(newkey, i, blocknumber);
00643 
00644     // When j > 1 we can make the first key of block p null.  This is probably
00645     // worthwhile as it trades a small amount of CPU and RAM use for a small
00646     // saving in disk use.  Other redundant keys will still creep in though.
00647     if (j > 1) {
00648         byte * p = C[j - 1].p;
00649         uint4 n = getint4(newkey.get_address(), newkey_len + K1 + C2);
00650         int new_total_free = TOTAL_FREE(p) + newkey_len + C2;
00651         // FIXME: incredibly icky going from key to item like this...
00652         Item_wr_(const_cast<byte*>(newkey.get_address()) - I2).form_null_key(n);
00653         SET_TOTAL_FREE(p, new_total_free);
00654     }
00655 
00656     C[j].c = find_in_block(C[j].p, item.key(), false, 0) + D2;
00657     C[j].rewrite = true; /* a subtle point: this *is* required. */
00658     add_item(item, j);
00659 }
00660 
00665 int
00666 FlintTable::mid_point(byte * p)
00667 {
00668     int n = 0;
00669     int dir_end = DIR_END(p);
00670     int size = block_size - TOTAL_FREE(p) - dir_end;
00671     for (int c = DIR_START; c < dir_end; c += D2) {
00672         int l = Item_(p, c).size();
00673         n += 2 * l;
00674         if (n >= size) {
00675             if (l < n - size) return c;
00676             return c + D2;
00677         }
00678     }
00679 
00680     /* falling out of mid_point */
00681     Assert(false);
00682     return 0; /* Stop compiler complaining about end of method. */
00683 }
00684 
00693 void
00694 FlintTable::add_item_to_block(byte * p, Item_wr_ kt_, int c)
00695 {
00696     Assert(writable);
00697     int dir_end = DIR_END(p);
00698     int kt_len = kt_.size();
00699     int needed = kt_len + D2;
00700     int new_total = TOTAL_FREE(p) - needed;
00701     int new_max = MAX_FREE(p) - needed;
00702 
00703     Assert(new_total >= 0);
00704 
00705     if (new_max < 0) {
00706         compact(p);
00707         new_max = MAX_FREE(p) - needed;
00708         Assert(new_max >= 0);
00709     }
00710     Assert(dir_end >= c);
00711 
00712     memmove(p + c + D2, p + c, dir_end - c);
00713     dir_end += D2;
00714     SET_DIR_END(p, dir_end);
00715 
00716     int o = dir_end + new_max;
00717     setD(p, c, o);
00718     memmove(p + o, kt_.get_address(), kt_len);
00719 
00720     SET_MAX_FREE(p, new_max);
00721 
00722     SET_TOTAL_FREE(p, new_total);
00723 }
00724 
00730 void
00731 FlintTable::add_item(Item_wr_ kt_, int j)
00732 {
00733     Assert(writable);
00734     byte * p = C[j].p;
00735     int c = C[j].c;
00736     uint4 n;
00737 
00738     int needed = kt_.size() + D2;
00739     if (TOTAL_FREE(p) < needed) {
00740         int m;
00741         // Prepare to split p. After splitting, the block is in two halves, the
00742         // lower half is split_p, the upper half p again. add_to_upper_half
00743         // becomes true when the item gets added to p, false when it gets added
00744         // to split_p.
00745 
00746         if (seq_count < 0) {
00747             // If we're not in sequential mode, we split at the mid point
00748             // of the node.
00749             m = mid_point(p);
00750         } else {
00751             // During sequential addition, split at the insert point
00752             m = c;
00753         }
00754 
00755         uint4 split_n = C[j].n;
00756         C[j].n = base.next_free_block();
00757 
00758         memcpy(split_p, p, block_size);  // replicate the whole block in split_p
00759         SET_DIR_END(split_p, m);
00760         compact(split_p);      /* to reset TOTAL_FREE, MAX_FREE */
00761 
00762         {
00763             int residue = DIR_END(p) - m;
00764             int new_dir_end = DIR_START + residue;
00765             memmove(p + DIR_START, p + m, residue);
00766             SET_DIR_END(p, new_dir_end);
00767         }
00768 
00769         compact(p);      /* to reset TOTAL_FREE, MAX_FREE */
00770 
00771         bool add_to_upper_half;
00772         if (seq_count < 0) {
00773             add_to_upper_half = (c >= m);
00774         } else {
00775             // And add item to lower half if split_p has room, otherwise upper
00776             // half
00777             add_to_upper_half = (TOTAL_FREE(split_p) < needed);
00778         }
00779 
00780         if (add_to_upper_half) {
00781             c -= (m - DIR_START);
00782             Assert(seq_count < 0 || c <= DIR_START + D2);
00783             Assert(c >= DIR_START);
00784             Assert(c <= DIR_END(p));
00785             add_item_to_block(p, kt_, c);
00786             n = C[j].n;
00787         } else {
00788             Assert(c >= DIR_START);
00789             Assert(c <= DIR_END(split_p));
00790             add_item_to_block(split_p, kt_, c);
00791             n = split_n;
00792         }
00793         write_block(split_n, split_p);
00794 
00795         // Check if we're splitting the root block.
00796         if (j == level) split_root(split_n);
00797 
00798         /* Enter a separating key at level j + 1 between */
00799         /* the last key of block split_p, and the first key of block p */
00800         enter_key(j + 1,
00801                   Item_(split_p, DIR_END(split_p) - D2).key(),
00802                   Item_(p, DIR_START).key());
00803     } else {
00804         add_item_to_block(p, kt_, c);
00805         n = C[j].n;
00806     }
00807     if (j == 0) {
00808         changed_n = n;
00809         changed_c = c;
00810     }
00811 }
00812 
00820 void
00821 FlintTable::delete_item(int j, bool repeatedly)
00822 {
00823     Assert(writable);
00824     byte * p = C[j].p;
00825     int c = C[j].c;
00826     int kt_len = Item_(p, c).size(); /* size of the item to be deleted */
00827     int dir_end = DIR_END(p) - D2;   /* directory length will go down by 2 bytes */
00828 
00829     memmove(p + c, p + c + D2, dir_end - c);
00830     SET_DIR_END(p, dir_end);
00831     SET_MAX_FREE(p, MAX_FREE(p) + D2);
00832     SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
00833 
00834     if (!repeatedly) return;
00835     if (j < level) {
00836         if (dir_end == DIR_START) {
00837             base.free_block(C[j].n);
00838             C[j].rewrite = false;
00839             C[j].n = BLK_UNUSED;
00840             C[j + 1].rewrite = true;  /* *is* necessary */
00841             delete_item(j + 1, true);
00842         }
00843     } else {
00844         Assert(j == level);
00845         while (dir_end == DIR_START + D2 && level > 0) {
00846             /* single item in the root block, so lose a level */
00847             uint4 new_root = Item_(p, DIR_START).block_given_by();
00848             delete [] p;
00849             C[level].p = 0;
00850             base.free_block(C[level].n);
00851             C[level].rewrite = false;
00852             C[level].n = BLK_UNUSED;
00853             level--;
00854 
00855             block_to_cursor(C, level, new_root);
00856 
00857             p = C[level].p;
00858             dir_end = DIR_END(p); /* prepare for the loop */
00859         }
00860     }
00861 }
00862 
00863 /* debugging aid:
00864 static addcount = 0;
00865 */
00866 
00892 int
00893 FlintTable::add_kt(bool found)
00894 {
00895     Assert(writable);
00896     int components = 0;
00897 
00898     /*
00899     {
00900         printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
00901         print_bytes(kt[I2] - K1 - C2, kt + I2 + K1); putchar('\n');
00902     }
00903     */
00904     alter();
00905 
00906     if (found) { /* replacement */
00907         seq_count = SEQ_START_POINT;
00908         sequential = false;
00909 
00910         byte * p = C[0].p;
00911         int c = C[0].c;
00912         Item_ item(p, c);
00913         int kt_size = kt.size();
00914         int needed = kt_size - item.size();
00915 
00916         components = Item_(p, c).components_of();
00917 
00918         if (needed <= 0) {
00919             /* simple replacement */
00920             memmove(const_cast<byte *>(item.get_address()),
00921                     kt.get_address(), kt_size);
00922             SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
00923         } else {
00924             /* new item into the block's freespace */
00925             int new_max = MAX_FREE(p) - kt_size;
00926             if (new_max >= 0) {
00927                 int o = DIR_END(p) + new_max;
00928                 memmove(p + o, kt.get_address(), kt_size);
00929                 setD(p, c, o);
00930                 SET_MAX_FREE(p, new_max);
00931                 SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
00932             } else {
00933                 /* do it the long way */
00934                 delete_item(0, false);
00935                 add_item(kt, 0);
00936             }
00937         }
00938     } else {
00939         /* addition */
00940         if (changed_n == C[0].n && changed_c == C[0].c) {
00941             if (seq_count < 0) seq_count++;
00942         } else {
00943             seq_count = SEQ_START_POINT;
00944             sequential = false;
00945         }
00946         C[0].c += D2;
00947         add_item(kt, 0);
00948     }
00949     return components;
00950 }
00951 
00952 /* delete_kt() corresponds to add_kt(found), but there are only
00953    two cases: if the key is not found nothing is done, and if it is
00954    found the corresponding item is deleted with delete_item.
00955 */
00956 
00957 int
00958 FlintTable::delete_kt()
00959 {
00960     Assert(writable);
00961 
00962     bool found = find(C);
00963 
00964     int components = 0;
00965     seq_count = SEQ_START_POINT;
00966     sequential = false;
00967 
00968     /*
00969     {
00970         printf("%d) %s ", addcount++, (found ? "deleting " : "ignoring "));
00971         print_bytes(B->kt[I2] - K1 - C2, B->kt + I2 + K1); putchar('\n');
00972     }
00973     */
00974     if (found) {
00975         components = Item_(C[0].p, C[0].c).components_of();
00976         alter();
00977         delete_item(0, true);
00978     }
00979     return components;
00980 }
00981 
00982 /* FlintTable::form_key(key) treats address kt as an item holder and fills in
00983 the key part:
00984 
00985            (I) K key c (C tag)
00986 
00987 The bracketed parts are left blank. The key is filled in with key_len bytes and
00988 K set accordingly. c is set to 1.
00989 */
00990 
00991 void FlintTable::form_key(const string & key) const
00992 {
00993     kt.form_key(key);
00994 }
00995 
00996 /* FlintTable::add(key, tag) adds the key/tag item to the
00997    B-tree, replacing any existing item with the same key.
00998 
00999    For a long tag, we end up having to add m components, of the form
01000 
01001        key 1 m tag1
01002        key 2 m tag2
01003        ...
01004        key m m tagm
01005 
01006    and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
01007    n components of the form
01008 
01009        key 1 n TAG1
01010        key 2 n TAG2
01011        ...
01012        key n n TAGn
01013 
01014    and n may be greater than, equal to, or less than m. These cases are dealt
01015    with in the code below. If m < n for example, we end up with a series of
01016    deletions.
01017 */
01018 
01019 bool
01020 FlintTable::add(const string &key, string tag, bool already_compressed)
01021 {
01022     DEBUGCALL(DB, bool, "FlintTable::add", key << ", " << tag);
01023     Assert(writable);
01024 
01025     if (handle < 0) create_and_open(block_size);
01026 
01027     form_key(key);
01028 
01029     bool compressed = false;
01030     if (already_compressed) {
01031         compressed = true;
01032     } else if (compress_strategy != DONT_COMPRESS && tag.size() > COMPRESS_MIN) {
01033         CompileTimeAssert(DONT_COMPRESS != Z_DEFAULT_STRATEGY);
01034         CompileTimeAssert(DONT_COMPRESS != Z_FILTERED);
01035         CompileTimeAssert(DONT_COMPRESS != Z_HUFFMAN_ONLY);
01036 #ifdef Z_RLE
01037         CompileTimeAssert(DONT_COMPRESS != Z_RLE);
01038 #endif
01039 
01040         lazy_alloc_deflate_zstream();
01041 
01042         // zlib takes a non-const pointer to the input, but doesn't modify it.
01043         char * non_const_tag = const_cast<char *>(tag.data());
01044         deflate_zstream->next_in = reinterpret_cast<Byte *>(non_const_tag);
01045         deflate_zstream->avail_in = uInt(tag.size());
01046 
01047         // If compressed size is >= tag.size(), we don't want to compress.
01048         unsigned long blk_len = tag.size() - 1;
01049         unsigned char * blk = new unsigned char[blk_len];
01050         deflate_zstream->next_out = blk;
01051         deflate_zstream->avail_out = uInt(blk_len);
01052 
01053         int err = deflate(deflate_zstream, Z_FINISH);
01054         if (err == Z_STREAM_END) {
01055             // If deflate succeeded, then the output was at least one byte
01056             // smaller than the input.
01057             tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
01058             compressed = true;
01059         } else {
01060             // Deflate failed - presumably the data wasn't compressible.
01061         }
01062 
01063         delete [] blk;
01064     }
01065 
01066     // sort of matching kt.append_chunk(), but setting the chunk
01067     const size_t cd = kt.key().length() + K1 + I2 + C2 + C2;  // offset to the tag data
01068     const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
01069     size_t first_L = L;                  // - amount for tag1
01070     bool found = find(C);
01071     if (!found) {
01072         byte * p = C[0].p;
01073         size_t n = TOTAL_FREE(p) % (max_item_size + D2);
01074         if (n > D2 + cd) {
01075             n -= (D2 + cd);
01076             // if n >= last then fully filling this block won't produce
01077             // an extra item, so we might as well do this even if
01078             // full_compaction isn't active.
01079             //
01080             // In the full_compaction case, it turns out we shouldn't always
01081             // try to fill every last byte.  Doing so can actually increase the
01082             // total space required (I believe this effect is due to longer
01083             // dividing keys being required in the index blocks).  Empirically,
01084             // n >= key.size() + K appears a good criterion for K ~= 34.  This
01085             // seems to save about 0.2% in total database size over always
01086             // splitting the tag.  It'll also give be slightly faster retrieval
01087             // as we can avoid reading an extra block occasionally.
01088             size_t last = tag.length() % L;
01089             if (n >= last || (full_compaction && n >= key.size() + 34))
01090                 first_L = n;
01091         }
01092     }
01093 
01094     // a null tag must be added in of course
01095     int m = tag.empty() ? 1 : (tag.length() - first_L + L - 1) / L + 1;
01096                                       // there are m items to add
01097     /* FIXME: sort out this error higher up and turn this into
01098      * an assert.
01099      */
01100     if (m >= BYTE_PAIR_RANGE) RETURN(false);
01101 
01102     int n = 0; // initialise to shut off warning
01103                                       // - and there will be n to delete
01104     int o = 0;                        // Offset into the tag
01105     size_t residue = tag.length();    // Bytes of the tag remaining to add in
01106     int replacement = false;          // Has there been a replacement ?
01107     int i;
01108     kt.set_components_of(m);
01109     for (i = 1; i <= m; i++) {
01110         size_t l = (i == m ? residue : (i == 1 ? first_L : L));
01111         Assert(cd + l <= block_size);
01112         Assert(string::size_type(o + l) <= tag.length());
01113         kt.set_tag(cd, tag.data() + o, l, compressed);
01114         kt.set_component_of(i);
01115 
01116         o += l;
01117         residue -= l;
01118 
01119         if (i > 1) found = find(C);
01120         n = add_kt(found);
01121         if (n > 0) replacement = true;
01122     }
01123     /* o == tag.length() here, and n may be zero */
01124     for (i = m + 1; i <= n; i++) {
01125         kt.set_component_of(i);
01126         delete_kt();
01127     }
01128     if (!replacement) ++item_count;
01129     Btree_modified = true;
01130     if (cursor_created_since_last_modification) {
01131         cursor_created_since_last_modification = false;
01132         ++cursor_version;
01133     }
01134     RETURN(true);
01135 }
01136 
01137 /* FlintTable::del(key) returns false if the key is not in the B-tree,
01138    otherwise deletes it and returns true.
01139 
01140    Again, this is parallel to FlintTable::add, but simpler in form.
01141 */
01142 
01143 bool
01144 FlintTable::del(const string &key)
01145 {
01146     DEBUGCALL(DB, bool, "FlintTable::del", key);
01147     Assert(writable);
01148 
01149     if (handle < 0) RETURN(false);
01150 
01151     // We can't delete a key which we is too long for us to store.
01152     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
01153 
01154     if (key.empty()) RETURN(false);
01155     form_key(key);
01156 
01157     int n = delete_kt();  /* there are n items to delete */
01158     if (n <= 0) RETURN(false);
01159 
01160     for (int i = 2; i <= n; i++) {
01161         kt.set_component_of(i);
01162         delete_kt();
01163     }
01164 
01165     item_count--;
01166     Btree_modified = true;
01167     if (cursor_created_since_last_modification) {
01168         cursor_created_since_last_modification = false;
01169         ++cursor_version;
01170     }
01171     RETURN(true);
01172 }
01173 
01174 bool
01175 FlintTable::get_exact_entry(const string &key, string & tag) const
01176 {
01177     DEBUGCALL(DB, bool, "FlintTable::get_exact_entry", key << ", " << tag);
01178     Assert(!key.empty());
01179 
01180     if (handle < 0) RETURN(false);
01181 
01182     // An oversized key can't exist, so attempting to search for it should fail.
01183     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
01184 
01185     RETURN(find_tag(key, &tag));
01186 }
01187 
01188 bool
01189 FlintTable::key_exists(const string &key) const
01190 {
01191     DEBUGCALL(DB, bool, "FlintTable::key_exists", key);
01192     Assert(!key.empty());
01193 
01194     // An oversized key can't exist, so attempting to search for it should fail.
01195     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
01196 
01197     form_key(key);
01198     RETURN(find(C));
01199 }
01200 
01201 bool
01202 FlintTable::find_tag(const string &key, string * tag) const
01203 {
01204     DEBUGCALL(DB, bool, "FlintTable::find_tag", key << ", &tag");
01205     if (handle == -1) RETURN(false);
01206 
01207     // An oversized key can't exist, so attempting to search for it should fail.
01208     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
01209 
01210     form_key(key);
01211     if (!find(C)) RETURN(false);
01212 
01213     (void)read_tag(C, tag, false);
01214     RETURN(true);
01215 }
01216 
01217 bool
01218 FlintTable::read_tag(Cursor_ * C_, string *tag, bool keep_compressed) const
01219 {
01220     Item_ item(C_[0].p, C_[0].c);
01221 
01222     /* n components to join */
01223     int n = item.components_of();
01224 
01225     tag->resize(0);
01226     // max_item_size also includes K1 + I2 + C2 + C2 bytes overhead and the key
01227     // (which is at least 1 byte long).
01228     if (n > 1) tag->reserve((max_item_size - (1 + K1 + I2 + C2 + C2)) * n);
01229 
01230     item.append_chunk(tag);
01231     bool compressed = item.get_compressed();
01232 
01233     for (int i = 2; i <= n; i++) {
01234         if (!next(C_, 0)) {
01235             throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
01236         }
01237         (void)Item_(C_[0].p, C_[0].c).append_chunk(tag);
01238     }
01239     // At this point the cursor is on the last item - calling next will move
01240     // it to the next key (FlintCursor::get_tag() relies on this).
01241     if (!compressed || keep_compressed) return compressed;
01242 
01243     // FIXME: Perhaps we should we decompress each chunk as we read it so we
01244     // don't need both the full compressed and uncompressed tags in memory
01245     // at once.
01246 
01247     string utag;
01248     // May not be enough for a compressed tag, but it's a reasonable guess.
01249     utag.reserve(tag->size() + tag->size() / 2);
01250 
01251     Bytef buf[8192];
01252 
01253     lazy_alloc_inflate_zstream();
01254 
01255     // zlib takes a non-const pointer to the input, but doesn't modify it.
01256     char * non_const_tag = const_cast<char *>(tag->data());
01257     inflate_zstream->next_in = reinterpret_cast<Byte *>(non_const_tag);
01258     inflate_zstream->avail_in = uInt(tag->size());
01259 
01260     int err = Z_OK;
01261     while (err != Z_STREAM_END) {
01262         inflate_zstream->next_out = buf;
01263         inflate_zstream->avail_out = uInt(sizeof(buf));
01264         err = inflate(inflate_zstream, Z_SYNC_FLUSH);
01265         if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
01266             DEBUGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
01267             Bytef header2[4];
01268             setint4(header2, 0, inflate_zstream->adler);
01269             inflate_zstream->next_in = header2;
01270             inflate_zstream->avail_in = 4;
01271             err = inflate(inflate_zstream, Z_SYNC_FLUSH);
01272             if (err == Z_STREAM_END) break;
01273         }
01274 
01275         if (err != Z_OK && err != Z_STREAM_END) {
01276             if (err == Z_MEM_ERROR) throw std::bad_alloc();
01277             string msg = "inflate failed";
01278             if (inflate_zstream->msg) {
01279                 msg += " (";
01280                 msg += inflate_zstream->msg;
01281                 msg += ')';
01282             }
01283             throw Xapian::DatabaseError(msg);
01284         }
01285 
01286         utag.append(reinterpret_cast<const char *>(buf),
01287                     inflate_zstream->next_out - buf);
01288     }
01289     if (utag.size() != inflate_zstream->total_out) {
01290         string msg = "compressed tag didn't expand to the expected size: ";
01291         msg += om_tostring(utag.size());
01292         msg += " != ";
01293         // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
01294         msg += om_tostring(size_t(inflate_zstream->total_out));
01295         throw Xapian::DatabaseCorruptError(msg);
01296     }
01297 
01298     swap(*tag, utag);
01299 
01300     return false;
01301 }
01302 
01303 void
01304 FlintTable::set_full_compaction(bool parity)
01305 {
01306     Assert(writable);
01307 
01308     if (parity) seq_count = 0;
01309     full_compaction = parity;
01310 }
01311 
01312 FlintCursor * FlintTable::cursor_get() const {
01313     if (handle < 0) return NULL;
01314     // FIXME Ick - casting away const is nasty
01315     return new FlintCursor(const_cast<FlintTable *>(this));
01316 }
01317 
01318 /************ B-tree opening and closing ************/
01319 
01320 bool
01321 FlintTable::basic_open(bool revision_supplied, flint_revision_number_t revision_)
01322 {
01323     char ch = 'X'; /* will be 'A' or 'B' */
01324 
01325     {
01326         const size_t BTREE_BASES = 2;
01327         string err_msg;
01328         static const char basenames[BTREE_BASES] = { 'A', 'B' };
01329 
01330         FlintTable_base bases[BTREE_BASES];
01331         bool base_ok[BTREE_BASES];
01332 
01333         both_bases = true;
01334         bool valid_base = false;
01335         {
01336             for (size_t i = 0; i < BTREE_BASES; ++i) {
01337                 bool ok = bases[i].read(name, basenames[i], err_msg);
01338                 base_ok[i] = ok;
01339                 if (ok) {
01340                     valid_base = true;
01341                 } else {
01342                     both_bases = false;
01343                 }
01344             }
01345         }
01346 
01347         if (!valid_base) {
01348             if (handle >= 0) {
01349                 ::close(handle);
01350                 handle = -1;
01351             }
01352             string message = "Error opening table `";
01353             message += name;
01354             message += "':\n";
01355             message += err_msg;
01356             throw Xapian::DatabaseOpeningError(message);
01357         }
01358 
01359         if (revision_supplied) {
01360             bool found_revision = false;
01361             for (size_t i = 0; i < BTREE_BASES; ++i) {
01362                 if (base_ok[i] && bases[i].get_revision() == revision_) {
01363                     ch = basenames[i];
01364                     found_revision = true;
01365                     break;
01366                 }
01367             }
01368             if (!found_revision) {
01369                 /* Couldn't open the revision that was asked for.
01370                  * This shouldn't throw an exception, but should just return
01371                  * false to upper levels.
01372                  */
01373                 return false;
01374             }
01375         } else {
01376             flint_revision_number_t highest_revision = 0;
01377             for (size_t i = 0; i < BTREE_BASES; ++i) {
01378                 if (base_ok[i] && bases[i].get_revision() >= highest_revision) {
01379                     ch = basenames[i];
01380                     highest_revision = bases[i].get_revision();
01381                 }
01382             }
01383         }
01384 
01385         FlintTable_base *basep = 0;
01386         FlintTable_base *other_base = 0;
01387 
01388         for (size_t i = 0; i < BTREE_BASES; ++i) {
01389             DEBUGLINE(UNKNOWN, "Checking (ch == " << ch << ") against "
01390                       "basenames[" << i << "] == " << basenames[i]);
01391             DEBUGLINE(UNKNOWN, "bases[" << i << "].get_revision() == " <<
01392                       bases[i].get_revision());
01393             DEBUGLINE(UNKNOWN, "base_ok[" << i << "] == " << base_ok[i]);
01394             if (ch == basenames[i]) {
01395                 basep = &bases[i];
01396 
01397                 // FIXME: assuming only two bases for other_base
01398                 size_t otherbase_num = 1-i;
01399                 if (base_ok[otherbase_num]) {
01400                     other_base = &bases[otherbase_num];
01401                 }
01402                 break;
01403             }
01404         }
01405         Assert(basep);
01406 
01407         /* basep now points to the most recent base block */
01408 
01409         /* Avoid copying the bitmap etc. - swap contents with the base
01410          * object in the vector, since it'll be destroyed anyway soon.
01411          */
01412         base.swap(*basep);
01413 
01414         revision_number =  base.get_revision();
01415         block_size =       base.get_block_size();
01416         root =             base.get_root();
01417         level =            base.get_level();
01418         //bit_map_size =     basep->get_bit_map_size();
01419         item_count =       base.get_item_count();
01420         faked_root_block = base.get_have_fakeroot();
01421         sequential =       base.get_sequential();
01422 
01423         if (other_base != 0) {
01424             latest_revision_number = other_base->get_revision();
01425             if (revision_number > latest_revision_number)
01426                 latest_revision_number = revision_number;
01427         } else {
01428             latest_revision_number = revision_number;
01429         }
01430     }
01431 
01432     /* kt holds constructed items as well as keys */
01433     kt = Item_wr_(zeroed_new(block_size));
01434 
01435     set_max_item_size(BLOCK_CAPACITY);
01436 
01437     base_letter = ch;
01438 
01439     /* ready to open the main file */
01440 
01441     return true;
01442 }
01443 
01444 void
01445 FlintTable::read_root()
01446 {
01447     if (faked_root_block) {
01448         /* root block for an unmodified database. */
01449         byte * p = C[0].p;
01450         Assert(p);
01451 
01452         /* clear block - shouldn't be necessary, but is a bit nicer,
01453          * and means that the same operations should always produce
01454          * the same database. */
01455         memset(p, 0, block_size);
01456 
01457         int o = block_size - I2 - K1 - C2 - C2;
01458         Item_wr_(p + o).fake_root_item();
01459 
01460         setD(p, DIR_START, o);         // its directory entry
01461         SET_DIR_END(p, DIR_START + D2);// the directory size
01462 
01463         o -= (DIR_START + D2);
01464         SET_MAX_FREE(p, o);
01465         SET_TOTAL_FREE(p, o);
01466         SET_LEVEL(p, 0);
01467 
01468         if (!writable) {
01469             /* reading - revision number doesn't matter as long as
01470              * it's not greater than the current one. */
01471             SET_REVISION(p, 0);
01472             C[0].n = 0;
01473         } else {
01474             /* writing - */
01475             SET_REVISION(p, latest_revision_number + 1);
01476             C[0].n = base.next_free_block();
01477         }
01478     } else {
01479         /* using a root block stored on disk */
01480         block_to_cursor(C, level, root);
01481 
01482         if (REVISION(C[level].p) > revision_number) set_overwritten();
01483         /* although this is unlikely */
01484     }
01485 }
01486 
01487 bool
01488 FlintTable::do_open_to_write(bool revision_supplied,
01489                              flint_revision_number_t revision_,
01490                              bool create_db)
01491 {
01492     if (handle == -2) {
01493         throw Xapian::DatabaseError("Database has been closed");
01494     }
01495     int flags = O_RDWR | O_BINARY;
01496     if (create_db) flags |= O_CREAT | O_TRUNC;
01497     handle = ::open((name + "DB").c_str(), flags, 0666);
01498     if (handle < 0) {
01499         // lazy doesn't make a lot of sense with create_db anyway, but ENOENT
01500         // with O_CREAT means a parent directory doesn't exist.
01501         if (lazy && !create_db && errno == ENOENT) {
01502             revision_number = revision_;
01503             return true;
01504         }
01505         string message(create_db ? "Couldn't create " : "Couldn't open ");
01506         message += name;
01507         message += "DB read/write: ";
01508         message += strerror(errno);
01509         throw Xapian::DatabaseOpeningError(message);
01510     }
01511 
01512     if (!basic_open(revision_supplied, revision_)) {
01513 	::close(handle);
01514         handle = -1;
01515         if (!revision_supplied) {
01516             throw Xapian::DatabaseOpeningError("Failed to open for writing");
01517         }
01518         /* When the revision is supplied, it's not an exceptional
01519          * case when open failed, so we just return false here.
01520          */
01521         return false;
01522     }
01523 
01524     writable = true;
01525 
01526     for (int j = 0; j <= level; j++) {
01527         C[j].n = BLK_UNUSED;
01528         C[j].p = new byte[block_size];
01529         if (C[j].p == 0) {
01530             throw std::bad_alloc();
01531         }
01532     }
01533     split_p = new byte[block_size];
01534     if (split_p == 0) {
01535         throw std::bad_alloc();
01536     }
01537     read_root();
01538 
01539     buffer = zeroed_new(block_size);
01540 
01541     changed_n = 0;
01542     changed_c = DIR_START;
01543     seq_count = SEQ_START_POINT;
01544 
01545     return true;
01546 }
01547 
01548 FlintTable::FlintTable(const string & path_, bool readonly_,
01549                        int compress_strategy_, bool lazy_)
01550         : revision_number(0),
01551           item_count(0),
01552           block_size(0),
01553           latest_revision_number(0),
01554           both_bases(false),
01555           base_letter('A'),
01556           faked_root_block(true),
01557           sequential(true),
01558           handle(-1),
01559           level(0),
01560           root(0),
01561           kt(0),
01562           buffer(0),
01563           base(),
01564           name(path_),
01565           seq_count(0),
01566           changed_n(0),
01567           changed_c(0),
01568           max_item_size(0),
01569           Btree_modified(false),
01570           full_compaction(false),
01571           writable(!readonly_),
01572           cursor_created_since_last_modification(false),
01573           cursor_version(0),
01574           split_p(0),
01575           compress_strategy(compress_strategy_),
01576           deflate_zstream(NULL),
01577           inflate_zstream(NULL),
01578           lazy(lazy_)
01579 {
01580     DEBUGCALL(DB, void, "FlintTable::FlintTable",
01581               path_ << ", " << readonly_ << ", " <<
01582               compress_strategy_ << ", " << lazy_);
01583 }
01584 
01585 bool
01586 FlintTable::really_empty() const
01587 {
01588     if (handle < 0) {
01589         if (handle == -2) {
01590             throw Xapian::DatabaseError("Database has been closed");
01591         }
01592         return true;
01593     }
01594     FlintCursor cur(const_cast<FlintTable*>(this));
01595     cur.find_entry(string());
01596     return !cur.next();
01597 }
01598 
01599 void
01600 FlintTable::lazy_alloc_deflate_zstream() const {
01601     if (usual(deflate_zstream)) {
01602         if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
01603         // Try to recover by deleting the stream and starting from scratch.
01604         delete deflate_zstream;
01605     }
01606 
01607     deflate_zstream = new z_stream;
01608 
01609     deflate_zstream->zalloc = Z_NULL;
01610     deflate_zstream->zfree = Z_NULL;
01611     deflate_zstream->opaque = Z_NULL;
01612 
01613     // -15 means raw deflate with 32K LZ77 window (largest)
01614     // memLevel 9 is the highest (8 is default)
01615     int err;
01616     err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
01617                        -15, 9, compress_strategy);
01618     if (rare(err != Z_OK)) {
01619         if (err == Z_MEM_ERROR) {
01620             delete deflate_zstream;
01621             deflate_zstream = 0;
01622             throw std::bad_alloc();
01623         }
01624         string msg = "deflateInit2 failed (";
01625         if (deflate_zstream->msg) {
01626             msg += deflate_zstream->msg;
01627         } else {
01628             msg += om_tostring(err);
01629         }
01630         msg += ')';
01631         delete deflate_zstream;
01632         deflate_zstream = 0;
01633         throw Xapian::DatabaseError(msg);
01634     }
01635 }
01636 
01637 void
01638 FlintTable::lazy_alloc_inflate_zstream() const {
01639     if (usual(inflate_zstream)) {
01640         if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
01641         // Try to recover by deleting the stream and starting from scratch.
01642         delete inflate_zstream;
01643     }
01644 
01645     inflate_zstream = new z_stream;
01646 
01647     inflate_zstream->zalloc = Z_NULL;
01648     inflate_zstream->zfree = Z_NULL;
01649     inflate_zstream->opaque = Z_NULL;
01650 
01651     inflate_zstream->next_in = Z_NULL;
01652     inflate_zstream->avail_in = 0;
01653 
01654     int err = inflateInit2(inflate_zstream, -15);
01655     if (rare(err != Z_OK)) {
01656         if (err == Z_MEM_ERROR) {
01657             delete inflate_zstream;
01658             inflate_zstream = 0;
01659             throw std::bad_alloc();
01660         }
01661         string msg = "inflateInit2 failed (";
01662         if (inflate_zstream->msg) {
01663             msg += inflate_zstream->msg;
01664         } else {
01665             msg += om_tostring(err);
01666         }
01667         msg += ')';
01668         delete inflate_zstream;
01669         inflate_zstream = 0;
01670         throw Xapian::DatabaseError(msg);
01671     }
01672 }
01673 
01674 bool
01675 FlintTable::exists() const {
01676     DEBUGCALL(DB, bool, "FlintTable::exists", "");
01677     return (file_exists(name + "DB") &&
01678             (file_exists(name + "baseA") || file_exists(name + "baseB")));
01679 }
01680 
01684 static void
01685 sys_unlink_if_exists(const string & filename)
01686 {
01687     if (unlink(filename) == -1) {
01688         if (errno == ENOENT) return;
01689         throw Xapian::DatabaseError("Can't delete file: `" + filename +
01690                               "': " + strerror(errno));
01691     }
01692 }
01693 
01694 void
01695 FlintTable::erase()
01696 {
01697     DEBUGCALL(DB, void, "FlintTable::erase", "");
01698     close();
01699 
01700     sys_unlink_if_exists(name + "baseA");
01701     sys_unlink_if_exists(name + "baseB");
01702     sys_unlink_if_exists(name + "DB");
01703 }
01704 
01705 void
01706 FlintTable::set_block_size(unsigned int block_size_)
01707 {
01708     DEBUGCALL(DB, void, "FlintTable::set_block_size", block_size_);
01709     // Block size must in the range 2048..BYTE_PAIR_RANGE, and a power of two.
01710     if (block_size_ < 2048 || block_size_ > BYTE_PAIR_RANGE ||
01711         (block_size_ & (block_size_ - 1)) != 0) {
01712         block_size_ = FLINT_DEFAULT_BLOCK_SIZE;
01713     }
01714     block_size = block_size_;
01715 }
01716 
01717 void
01718 FlintTable::create_and_open(unsigned int block_size_)
01719 {
01720     DEBUGCALL(DB, void, "FlintTable::create_and_open", block_size_);
01721     if (handle == -2) {
01722         throw Xapian::DatabaseError("Database has been closed");
01723     }
01724     Assert(writable);
01725     close();
01726 
01727     if (block_size_ == 0) abort();
01728     set_block_size(block_size_);
01729 
01730     // FIXME: it would be good to arrange that this works such that there's
01731     // always a valid table in place if you run create_and_open() on an
01732     // existing table.
01733 
01734     /* write initial values to files */
01735 
01736     /* create the base file */
01737     FlintTable_base base_;
01738     base_.set_revision(revision_number);
01739     base_.set_block_size(block_size_);
01740     base_.set_have_fakeroot(true);
01741     base_.set_sequential(true);
01742     base_.write_to_file(name + "baseA");
01743 
01744     /* remove the alternative base file, if any */
01745     sys_unlink_if_exists(name + "baseB");
01746 
01747     // Any errors are thrown if revision_supplied is false.
01748     (void)do_open_to_write(false, 0, true);
01749 }
01750 
01751 FlintTable::~FlintTable() {
01752     DEBUGCALL(DB, void, "FlintTable::~FlintTable", "");
01753     FlintTable::close();
01754 
01755     if (deflate_zstream) {
01756         // Errors which we care about have already been handled, so just ignore
01757         // any which get returned here.
01758         (void) deflateEnd(deflate_zstream);
01759         delete deflate_zstream;
01760     }
01761 
01762     if (inflate_zstream) {
01763         // Errors which we care about have already been handled, so just ignore
01764         // any which get returned here.
01765         (void) inflateEnd(inflate_zstream);
01766         delete inflate_zstream;
01767     }
01768 }
01769 
01770 void FlintTable::close(bool permanent) {
01771     DEBUGCALL(DB, void, "FlintTable::close", "");
01772 
01773     if (handle >= 0) {
01774         // If an error occurs here, we just ignore it, since we're just
01775         // trying to free everything.
01776         (void)::close(handle);
01777         handle = -1;
01778     }
01779 
01780     if (permanent) {
01781         handle = -2;
01782     }
01783 
01784     for (int j = level; j >= 0; j--) {
01785         delete [] C[j].p;
01786         C[j].p = 0;
01787     }
01788     delete [] split_p;
01789     split_p = 0;
01790 
01791     delete [] kt.get_address();
01792     kt = 0;
01793     delete [] buffer;
01794     buffer = 0;
01795 }
01796 
01797 void
01798 FlintTable::commit(flint_revision_number_t revision)
01799 {
01800     DEBUGCALL(DB, void, "FlintTable::commit", revision);
01801     Assert(writable);
01802 
01803     if (revision <= revision_number) {
01804         throw Xapian::DatabaseError("New revision too low");
01805     }
01806 
01807     if (handle < 0) {
01808         latest_revision_number = revision_number = revision;
01809         return;
01810     }
01811 
01812     // FIXME: this doesn't work (probably because the table revisions get
01813     // out of step) but it's wasteful to keep applying changes to value
01814     // and position if they're never used...
01815     //
01816     // if (!Btree_modified) return;
01817 
01818     for (int j = level; j >= 0; j--) {
01819         if (C[j].rewrite) {
01820             write_block(C[j].n, C[j].p);
01821         }
01822     }
01823 
01824     if (Btree_modified) {
01825         faked_root_block = false;
01826     }
01827 
01828     try {
01829         if (faked_root_block) {
01830             /* We will use a dummy bitmap. */
01831             base.clear_bit_map();
01832         }
01833 
01834         base.set_revision(revision);
01835         base.set_root(C[level].n);
01836         base.set_level(level);
01837         base.set_item_count(item_count);
01838         base.set_have_fakeroot(faked_root_block);
01839         base.set_sequential(sequential);
01840 
01841         base_letter = other_base_letter();
01842 
01843         both_bases = true;
01844         latest_revision_number = revision_number = revision;
01845         root = C[level].n;
01846 
01847         Btree_modified = false;
01848 
01849         for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
01850             C[i].n = BLK_UNUSED;
01851             C[i].c = -1;
01852             C[i].rewrite = false;
01853         }
01854 
01855         if (!flint_io_sync(handle)) {
01856             (void)::close(handle);
01857             handle = -1;
01858             throw Xapian::DatabaseError("Can't commit new revision - failed to flush DB to disk");
01859         }
01860 
01861         // Save to "<table>.tmp" and then rename to "<table>.base<letter>" so
01862         // that a reader can't try to read a partially written base file.
01863         string tmp = name;
01864         tmp += "tmp";
01865         string basefile = name;
01866         basefile += "base";
01867         basefile += char(base_letter);
01868         base.write_to_file(tmp);
01869 #if defined __WIN32__
01870         if (msvc_posix_rename(tmp.c_str(), basefile.c_str()) < 0) {
01871 #else
01872         if (rename(tmp.c_str(), basefile.c_str()) < 0) {
01873 #endif
01874             // With NFS, rename() failing may just mean that the server crashed
01875             // after successfully renaming, but before reporting this, and then
01876             // the retried operation fails.  So we need to check if the source
01877             // file still exists, which we do by calling unlink(), since we want
01878             // to remove the temporary file anyway.
01879             int saved_errno = errno;
01880             if (unlink(tmp) == 0 || errno != ENOENT) {
01881                 string msg("Couldn't update base file ");
01882                 msg += basefile;
01883                 msg += ": ";
01884                 msg += strerror(saved_errno);
01885                 throw Xapian::DatabaseError(msg);
01886             }
01887         }
01888         base.commit();
01889 
01890         read_root();
01891 
01892         changed_n = 0;
01893         changed_c = DIR_START;
01894         seq_count = SEQ_START_POINT;
01895     } catch (...) {
01896         FlintTable::close();
01897         throw;
01898     }
01899 }
01900 
01901 void
01902 FlintTable::cancel()
01903 {
01904     DEBUGCALL(DB, void, "FlintTable::cancel", "");
01905     Assert(writable);
01906 
01907     if (handle < 0) {
01908         latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
01909         return;
01910     }
01911 
01912     // This causes problems: if (!Btree_modified) return;
01913 
01914     string err_msg;
01915     if (!base.read(name, base_letter, err_msg)) {
01916         throw Xapian::DatabaseCorruptError(string("Couldn't reread base ") + base_letter);
01917     }
01918 
01919     revision_number =  base.get_revision();
01920     block_size =       base.get_block_size();
01921     root =             base.get_root();
01922     level =            base.get_level();
01923     //bit_map_size =     basep->get_bit_map_size();
01924     item_count =       base.get_item_count();
01925     faked_root_block = base.get_have_fakeroot();
01926     sequential =       base.get_sequential();
01927 
01928     latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
01929 
01930     for (int j = 0; j <= level; j++) {
01931         C[j].n = BLK_UNUSED;
01932         C[j].rewrite = false;
01933     }
01934     read_root();
01935 
01936     changed_n = 0;
01937     changed_c = DIR_START;
01938     seq_count = SEQ_START_POINT;
01939 }
01940 
01941 /************ B-tree reading ************/
01942 
01943 bool
01944 FlintTable::do_open_to_read(bool revision_supplied, flint_revision_number_t revision_)
01945 {
01946     if (handle == -2) {
01947         throw Xapian::DatabaseError("Database has been closed");
01948     }
01949     handle = ::open((name + "DB").c_str(), O_RDONLY | O_BINARY);
01950     if (handle < 0) {
01951         if (lazy) {
01952             // This table is optional when reading!
01953             revision_number = revision_;
01954             return true;
01955         }
01956         string message("Couldn't open ");
01957         message += name;
01958         message += "DB to read: ";
01959         message += strerror(errno);
01960         throw Xapian::DatabaseOpeningError(message);
01961     }
01962 
01963     if (!basic_open(revision_supplied, revision_)) {
01964 	::close(handle);
01965         handle = -1;
01966         if (revision_supplied) {
01967             // The requested revision was not available.
01968             // This could be because the database was modified underneath us, or
01969             // because a base file is missing.  Return false, and work out what
01970             // the problem was at a higher level.
01971             return false;
01972         }
01973         throw Xapian::DatabaseOpeningError("Failed to open table for reading");
01974     }
01975 
01976     for (int j = 0; j <= level; j++) {
01977         C[j].n = BLK_UNUSED;
01978         C[j].p = new byte[block_size];
01979         if (C[j].p == 0) {
01980             throw std::bad_alloc();
01981         }
01982     }
01983 
01984     read_root();
01985     return true;
01986 }
01987 
01988 void
01989 FlintTable::open()
01990 {
01991     DEBUGCALL(DB, void, "FlintTable::open", "");
01992     DEBUGLINE(DB, "opening at path " << name);
01993     close();
01994 
01995     if (!writable) {
01996         // Any errors are thrown if revision_supplied is false
01997         (void)do_open_to_read(false, 0);
01998         return;
01999     }
02000 
02001     // Any errors are thrown if revision_supplied is false.
02002     (void)do_open_to_write(false, 0);
02003 }
02004 
02005 bool
02006 FlintTable::open(flint_revision_number_t revision)
02007 {
02008     DEBUGCALL(DB, bool, "FlintTable::open", revision);
02009     DEBUGLINE(DB, "opening for particular revision at path " << name);
02010     close();
02011 
02012     if (!writable) {
02013         if (do_open_to_read(true, revision)) {
02014             AssertEq(revision_number, revision);
02015             RETURN(true);
02016         } else {
02017             close();
02018             RETURN(false);
02019         }
02020     }
02021 
02022     if (!do_open_to_write(true, revision)) {
02023         // Can't open at the requested revision.
02024         close();
02025         RETURN(false);
02026     }
02027 
02028     AssertEq(revision_number, revision);
02029     RETURN(true);
02030 }
02031 
02032 bool
02033 FlintTable::prev_for_sequential(Cursor_ * C_, int /*dummy*/) const
02034 {
02035     int c = C_[0].c;
02036     if (c == DIR_START) {
02037         byte * p = C_[0].p;
02038         Assert(p);
02039         uint4 n = C_[0].n;
02040         while (true) {
02041             if (n == 0) return false;
02042             n--;
02043             if (writable) {
02044                 if (n == C[0].n) {
02045                     // Block is a leaf block in the built-in cursor
02046                     // (potentially in modified form).
02047                     memcpy(p, C[0].p, block_size);
02048                 } else {
02049                     // Blocks in the built-in cursor may not have been written
02050                     // to disk yet, so we have to check that the block number
02051                     // isn't in the built-in cursor or we'll read an
02052                     // uninitialised block (for which GET_LEVEL(p) will
02053                     // probably return 0).
02054                     int j;
02055                     for (j = 1; j <= level; ++j) {
02056                         if (n == C[j].n) break;
02057                     }
02058                     if (j <= level) continue;
02059 
02060                     // Block isn't in the built-in cursor, so the form on disk
02061                     // is valid, so read it to check if it's the next level 0
02062                     // block.
02063                     read_block(n, p);
02064                 }
02065             } else {
02066                 read_block(n, p);
02067             }
02068             if (writable) AssertEq(revision_number, latest_revision_number);
02069             if (REVISION(p) > revision_number + writable) {
02070                 set_overwritten();
02071                 return false;
02072             }
02073             if (GET_LEVEL(p) == 0) break;
02074         }
02075         c = DIR_END(p);
02076         C_[0].n = n;
02077     }
02078     c -= D2;
02079     C_[0].c = c;
02080     return true;
02081 }
02082 
02083 bool
02084 FlintTable::next_for_sequential(Cursor_ * C_, int /*dummy*/) const
02085 {
02086     byte * p = C_[0].p;
02087     Assert(p);
02088     int c = C_[0].c;
02089     c += D2;
02090     Assert((unsigned)c < block_size);
02091     if (c == DIR_END(p)) {
02092         uint4 n = C_[0].n;
02093         while (true) {
02094             n++;
02095             if (n > base.get_last_block()) return false;
02096             if (writable) {
02097                 if (n == C[0].n) {
02098                     // Block is a leaf block in the built-in cursor
02099                     // (potentially in modified form).
02100                     memcpy(p, C[0].p, block_size);
02101                 } else {
02102                     // Blocks in the built-in cursor may not have been written
02103                     // to disk yet, so we have to check that the block number
02104                     // isn't in the built-in cursor or we'll read an
02105                     // uninitialised block (for which GET_LEVEL(p) will
02106                     // probably return 0).
02107                     int j;
02108                     for (j = 1; j <= level; ++j) {
02109                         if (n == C[j].n) break;
02110                     }
02111                     if (j <= level) continue;
02112 
02113                     // Block isn't in the built-in cursor, so the form on disk
02114                     // is valid, so read it to check if it's the next level 0
02115                     // block.
02116                     read_block(n, p);
02117                 }
02118             } else {
02119                 read_block(n, p);
02120             }
02121 
02122             if (writable) AssertEq(revision_number, latest_revision_number);
02123             if (REVISION(p) > revision_number + writable) {
02124                 set_overwritten();
02125                 return false;
02126             }
02127             if (GET_LEVEL(p) == 0) break;
02128         }
02129         c = DIR_START;
02130         C_[0].n = n;
02131     }
02132     C_[0].c = c;
02133     return true;
02134 }
02135 
02136 bool
02137 FlintTable::prev_default(Cursor_ * C_, int j) const
02138 {
02139     byte * p = C_[j].p;
02140     int c = C_[j].c;
02141     Assert(c >= DIR_START);
02142     Assert((unsigned)c < block_size);
02143     Assert(c <= DIR_END(p));
02144     if (c == DIR_START) {
02145         if (j == level) return false;
02146         if (!prev_default(C_, j + 1)) return false;
02147         c = DIR_END(p);
02148     }
02149     c -= D2;
02150     C_[j].c = c;
02151     if (j > 0) {
02152         block_to_cursor(C_, j - 1, Item_(p, c).block_given_by());
02153     }
02154     return true;
02155 }
02156 
02157 bool
02158 FlintTable::next_default(Cursor_ * C_, int j) const
02159 {
02160     byte * p = C_[j].p;
02161     int c = C_[j].c;
02162     Assert(c >= DIR_START);
02163     c += D2;
02164     Assert((unsigned)c < block_size);
02165     // Sometimes c can be DIR_END(p) + 2 here it appears...
02166     if (c > DIR_END(p)) c = DIR_END(p);
02167     Assert(c <= DIR_END(p));
02168     if (c == DIR_END(p)) {
02169         if (j == level) return false;
02170         if (!next_default(C_, j + 1)) return false;
02171         c = DIR_START;
02172     }
02173     C_[j].c = c;
02174     if (j > 0) {
02175         block_to_cursor(C_, j - 1, Item_(p, c).block_given_by());
02176 #ifdef BTREE_DEBUG_FULL
02177         printf("Block in FlintTable:next_default");
02178         report_block_full(j - 1, C_[j - 1].n, C_[j - 1].p);
02179 #endif /* BTREE_DEBUG_FULL */
02180     }
02181     return true;
02182 }
02183 
02199 bool Key_::operator<(Key_ key2) const
02200 {
02201     DEBUGCALL(DB, bool, "Key_::operator<", static_cast<const void*>(key2.p));
02202     int key1_len = length();
02203     int key2_len = key2.length();
02204     if (key1_len == key2_len) {
02205         // The keys are the same length, so we can compare the counts
02206         // in the same operation since they're stored as 2 byte
02207         // bigendian numbers.
02208         RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) < 0);
02209     }
02210 
02211     int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
02212 
02213     // Compare the common part of the keys
02214     int diff = memcmp(p + K1, key2.p + K1, k_smaller);
02215     if (diff != 0) RETURN(diff < 0);
02216 
02217     // We dealt with the "same length" case above so we never need to check
02218     // the count here.
02219     RETURN(key1_len < key2_len);
02220 }
02221 
02222 bool Key_::operator==(Key_ key2) const
02223 {
02224     DEBUGCALL(DB, bool, "Key_::operator==", static_cast<const void*>(key2.p));
02225     int key1_len = length();
02226     if (key1_len != key2.length()) return false;
02227     // The keys are the same length, so we can compare the counts
02228     // in the same operation since they're stored as 2 byte
02229     // bigendian numbers.
02230     RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) == 0);
02231 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.