backends/quartz/btree_base.cc

Go to the documentation of this file.
00001 /* btree_base.cc: Btree base file implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2006 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #include <config.h>
00023 
00024 #include "safeerrno.h"
00025 
00026 #include "btree_base.h"
00027 #include "quartz_utils.h"
00028 #include "utils.h"
00029 #include <xapian/error.h>
00030 #include "omassert.h"
00031 
00032 #include <limits.h>
00033 
00034 #include <algorithm>
00035 
00036 using namespace std;
00037 
00038 /************ Base file parameters ************/
00039 
00067 #define CURR_FORMAT 5U
00068 
00069 Btree_base::Btree_base()
00070         : revision(0),
00071           block_size(0),
00072           root(0),
00073           level(0),
00074           bit_map_size(0),
00075           item_count(0),
00076           last_block(0),
00077           have_fakeroot(false),
00078           sequential(false),
00079           bit_map_low(0),
00080           bit_map0(0),
00081           bit_map(0)
00082 {
00083 }
00084 
00085 Btree_base::Btree_base(const string &name_, char ch)
00086         : revision(0),
00087           block_size(0),
00088           root(0),
00089           level(0),
00090           bit_map_size(0),
00091           item_count(0),
00092           last_block(0),
00093           have_fakeroot(false),
00094           sequential(false),
00095           bit_map_low(0),
00096           bit_map0(0),
00097           bit_map(0)
00098 {
00099     string err_msg;
00100     if (!read(name_, ch, err_msg)) {
00101         throw Xapian::DatabaseOpeningError(err_msg);
00102     }
00103 }
00104 
00105 Btree_base::Btree_base(const Btree_base &other)
00106         : revision(other.revision),
00107           block_size(other.block_size),
00108           root(other.root),
00109           level(other.level),
00110           bit_map_size(other.bit_map_size),
00111           item_count(other.item_count),
00112           last_block(other.last_block),
00113           have_fakeroot(other.have_fakeroot),
00114           sequential(other.sequential),
00115           bit_map_low(other.bit_map_low),
00116           bit_map0(0),
00117           bit_map(0)
00118 {
00119     try {
00120         bit_map0 = new byte[bit_map_size];
00121         bit_map = new byte[bit_map_size];
00122 
00123         memcpy(bit_map0, other.bit_map0, bit_map_size);
00124         memcpy(bit_map, other.bit_map, bit_map_size);
00125     } catch (...) {
00126         delete [] bit_map0;
00127         delete [] bit_map;
00128     }
00129 }
00130 
00131 void
00132 Btree_base::swap(Btree_base &other)
00133 {
00134     std::swap(revision, other.revision);
00135     std::swap(block_size, other.block_size);
00136     std::swap(root, other.root);
00137     std::swap(level, other.level);
00138     std::swap(bit_map_size, other.bit_map_size);
00139     std::swap(item_count, other.item_count);
00140     std::swap(last_block, other.last_block);
00141     std::swap(have_fakeroot, other.have_fakeroot);
00142     std::swap(sequential, other.sequential);
00143     std::swap(bit_map_low, other.bit_map_low);
00144     std::swap(bit_map0, other.bit_map0);
00145     std::swap(bit_map, other.bit_map);
00146 }
00147 
00148 Btree_base::~Btree_base()
00149 {
00150     delete [] bit_map;
00151     bit_map = 0;
00152     delete [] bit_map0;
00153     bit_map0 = 0;
00154 }
00155 
00156 bool
00157 Btree_base::do_unpack_uint(const char **start, const char *end,
00158                            uint4 *dest, string &err_msg, 
00159                            const string &basename,
00160                            const char *varname)
00161 {
00162     bool result = unpack_uint(start, end, dest);
00163     if (!result) {
00164         err_msg += "Unable to read " + string(varname) + " from " +
00165                     basename + "\n";
00166     }
00167     return result;
00168 }
00169 
00170 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
00171 do { \
00172     if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
00173         return false; \
00174     } \
00175 } while(0)
00176 
00177 /* How much of the base file to read at the first go (in bytes).
00178  * This should be big enough that the base file without bitmap
00179  * will fit in to this size with no problem.  Other than that
00180  * it's fairly arbitrary, but shouldn't be big enough to cause
00181  * serious memory problems!
00182  */
00183 #define REASONABLE_BASE_SIZE 1024
00184 
00185 bool
00186 Btree_base::read(const string & name, char ch, string &err_msg)
00187 {
00188     string basename = name + "base" + ch;
00189     int h = sys_open_to_read_no_except(basename);
00190     fdcloser closefd(h);
00191     if (h == -1) {
00192         err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
00193         return false;
00194     }
00195     string buf(sys_read_all_bytes(h, REASONABLE_BASE_SIZE));
00196 
00197     const char *start = buf.data();
00198     const char *end = start + buf.length();
00199 
00200     DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
00201     uint4 format;
00202     DO_UNPACK_UINT_ERRCHECK(&start, end, format);
00203     if (format != CURR_FORMAT) {
00204         err_msg += "Bad base file format " + om_tostring(format) + " in " +
00205                     basename + "\n";
00206         return false;
00207     }
00208     DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
00209     DO_UNPACK_UINT_ERRCHECK(&start, end, root);
00210     DO_UNPACK_UINT_ERRCHECK(&start, end, level);
00211     DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
00212     /* Now that we know the size of the bit map, (possibly)
00213      * read in more data from the base file in case
00214      * REASONABLE_BASE_SIZE is too small.  We need to update
00215      * start and end.
00216      */
00217     {
00218         unsigned long start_offset = start - buf.data();
00219         buf += sys_read_all_bytes(h, bit_map_size);
00220         start = buf.data() + start_offset;
00221         end = buf.data() + buf.length();
00222     }
00223     DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
00224     DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
00225     uint4 have_fakeroot_;
00226     DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
00227     have_fakeroot = have_fakeroot_;
00228 
00229     uint4 sequential_;
00230     DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
00231     sequential = sequential_;
00232 
00233     if (have_fakeroot && !sequential) {
00234         sequential = true; // FIXME : work out why we need this...
00235         /*
00236         err_msg += "Corrupt base file, `" + basename + "':\n"
00237                 "sequential must be set whenever have_fakeroot is set.\n" +
00238                 "sequential=" + (sequential?"true":"false") +
00239                 ", have_fakeroot=" + (have_fakeroot?"true":"false") + "\n";
00240         return false;
00241         */
00242     }
00243 
00244     uint4 revision2;
00245     DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
00246     if (revision != revision2) {
00247         err_msg += "Revision number mismatch in " +
00248                 basename + ": " +
00249                 om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
00250         return false;
00251     }
00252 
00253     /* Read the bitmap */
00254     if (uint4(end - start) <= bit_map_size) {
00255         err_msg += "Not enough space for bitmap in base file " +
00256                 basename + "\n";
00257         return false;
00258     }
00259 
00260     /* It's ok to delete a zero pointer */
00261     delete [] bit_map0;
00262     bit_map0 = 0;
00263     delete [] bit_map;
00264     bit_map = 0;
00265 
00266     bit_map0 = new byte[bit_map_size];
00267     bit_map = new byte[bit_map_size];
00268     memcpy(bit_map0, start, bit_map_size);
00269     memcpy(bit_map, bit_map0, bit_map_size);
00270     start += bit_map_size;
00271 
00272     uint4 revision3;
00273     if (!unpack_uint(&start, end, &revision3)) {
00274         err_msg += "Couldn't read revision2 from base file " +
00275         basename + "\n";
00276         return false;
00277     }
00278 
00279     if (revision != revision3) {
00280         err_msg += "Revision number mismatch in " +
00281                 basename + ": " +
00282                 om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
00283         return false;
00284     }
00285 
00286     if (start != end) {
00287         err_msg += "Junk at end of base file " + basename + "\n";
00288         return false;
00289     }
00290 
00291     return true;
00292 }
00293 
00294 void
00295 Btree_base::write_to_file(const string &filename)
00296 {
00297     calculate_last_block();
00298 
00299     string buf;
00300     buf += pack_uint(revision);
00301     buf += pack_uint(CURR_FORMAT);
00302     buf += pack_uint(block_size);
00303     buf += pack_uint(static_cast<uint4>(root));
00304     buf += pack_uint(static_cast<uint4>(level));
00305     buf += pack_uint(static_cast<uint4>(bit_map_size));
00306     buf += pack_uint(static_cast<uint4>(item_count));
00307     buf += pack_uint(static_cast<uint4>(last_block));
00308     buf += pack_uint(have_fakeroot);
00309     buf += pack_uint(sequential);
00310     buf += pack_uint(revision);
00311     if (bit_map_size) {
00312         buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
00313     }
00314     buf += pack_uint(revision);  // REVISION2
00315 
00316     int h = sys_open_to_write(filename);
00317     fdcloser closefd(h);
00318 
00319     sys_write_string(h, buf);
00320     sys_flush(h);
00321 }
00322 
00323 /*
00324    block_free_at_start(B, n) is true iff (if and only if) block n was free at
00325    the start of the transaction on the B-tree.
00326 */
00327 
00328 bool
00329 Btree_base::block_free_at_start(uint4 n) const
00330 {
00331     size_t i = n / CHAR_BIT;
00332     int bit = 0x1 << n % CHAR_BIT;
00333     return (bit_map0[i] & bit) == 0;
00334 }
00335 
00336 /* free_block(B, n) causes block n to be marked free in the bit map.
00337    B->bit_map_low is the lowest byte in the bit map known to have a free bit
00338    set. Searching starts from there when looking for a free block.
00339 */
00340 
00341 void
00342 Btree_base::free_block(uint4 n)
00343 {
00344     uint4 i = n / CHAR_BIT;
00345     int bit = 0x1 << n % CHAR_BIT;
00346     bit_map[i] &= ~ bit;
00347 
00348     if (bit_map_low > i)
00349         if ((bit_map0[i] & bit) == 0) /* free at start */
00350             bit_map_low = i;
00351 }
00352 
00353 /* extend(B) increases the size of the two bit maps in an obvious way.
00354    The bitmap file grows and shrinks as the DB file grows and shrinks in
00355    internal usage. But the DB file itself does not reduce in size, no matter
00356    how many blocks are freed.
00357 */
00358 
00359 #define BIT_MAP_INC 1000
00360     /* increase the bit map by this number of bytes if it overflows */
00361 
00362 void
00363 Btree_base::extend_bit_map()
00364 {
00365     int n = bit_map_size + BIT_MAP_INC;
00366     byte *new_bit_map0 = 0;
00367     byte *new_bit_map = 0;
00368 
00369     try {
00370         new_bit_map0 = new byte[n];
00371         new_bit_map = new byte[n];
00372 
00373         memcpy(new_bit_map0, bit_map0, bit_map_size);
00374         memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
00375         
00376         memcpy(new_bit_map, bit_map, bit_map_size);
00377         memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
00378     } catch (...) {
00379         delete [] new_bit_map0;
00380         delete [] new_bit_map;
00381         throw;
00382     }
00383     delete [] bit_map0;
00384     bit_map0 = new_bit_map0;
00385     delete [] bit_map;
00386     bit_map = new_bit_map;
00387     bit_map_size = n;
00388 }
00389 
00390 /* next_free_block(B) returns the number of the next available free block in
00391    the bitmap, marking it as 'in use' before returning. More precisely, we get
00392    a block that is both free now (in bit_map) and was free at the beginning of
00393    the transaction on the B-tree (in bit_map0).
00394 
00395    Starting at bit_map_low we go up byte at a time until we find a byte with a
00396    free (zero) bit, and then go up that byte bit at a time. If the bit map has
00397    no free bits it is extended so that it will have.
00398 */
00399 
00400 uint4
00401 Btree_base::next_free_block()
00402 {
00403     uint4 i;
00404     int x;
00405     for (i = bit_map_low;; i++)
00406     {  
00407         if (i >= bit_map_size) {
00408             extend_bit_map();
00409         }
00410         x = bit_map0[i] | bit_map[i];
00411         if (x != UCHAR_MAX) break;
00412     }
00413     uint4 n = i * CHAR_BIT;
00414     int d = 0x1;
00415     while ((x & d) != 0) { d <<= 1; n++; }
00416     bit_map[i] |= d;   /* set as 'in use' */
00417     bit_map_low = i;
00418     if (n > last_block) {
00419         last_block = n;
00420     }
00421     return n;
00422 }
00423 
00424 bool
00425 Btree_base::block_free_now(uint4 n)
00426 {
00427     uint4 i = n / CHAR_BIT;
00428     int bit = 0x1 << n % CHAR_BIT;
00429     return (bit_map[i] & bit) == 0;
00430 }
00431 
00432 void
00433 Btree_base::calculate_last_block()
00434 {
00435     if (bit_map_size == 0) {
00436         last_block = 0;
00437         return;
00438     }
00439     int i = bit_map_size - 1;
00440     while (bit_map[i] == 0 && i > 0) {
00441         i--;
00442     }
00443     bit_map_size = i + 1;
00444 
00445     int x = bit_map[i];
00446 
00447     /* Check for when there are no blocks */
00448     if (x == 0) {
00449         last_block = 0;
00450         return;
00451     }
00452     uint4 n = (i + 1) * CHAR_BIT - 1;
00453     int d = 0x1 << (CHAR_BIT - 1);
00454     while ((x & d) == 0) { d >>= 1; n--; }
00455 
00456     last_block = n;
00457 }
00458 
00459 bool
00460 Btree_base::is_empty() const
00461 {
00462     for (uint4 i = 0; i < bit_map_size; i++) {
00463         if (bit_map[i] != 0) {
00464             return false;
00465         }
00466     }
00467     return true;
00468 }
00469 
00470 void
00471 Btree_base::clear_bit_map()
00472 {
00473     memset(bit_map, 0, bit_map_size);
00474 }
00475 
00476 // We've commited, so "bitmap at start" needs to be reset to the current bitmap.
00477 void
00478 Btree_base::commit()
00479 {
00480     memcpy(bit_map0, bit_map, bit_map_size);
00481     bit_map_low = 0;
00482 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.