backends/flint/flint_btreebase.cc

Go to the documentation of this file.
00001 /* flint_btreebase.cc: Btree base file implementation
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002,2003,2004,2006 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00019  * USA
00020  */
00021 
00022 #include <config.h>
00023 
00024 #include "safeerrno.h"
00025 #ifdef __WIN32__
00026 # include "msvc_posix_wrapper.h"
00027 #endif
00028 
00029 #include "flint_btreebase.h"
00030 #include "flint_io.h"
00031 #include "flint_utils.h"
00032 #include "utils.h"
00033 #include <xapian/error.h>
00034 #include "omassert.h"
00035 
00036 #include <limits.h>
00037 
00038 #include <algorithm>
00039 #include <cstring>
00040 
00041 using namespace std;
00042 
00046 class fdcloser {
00047     public:
00048         fdcloser(int fd_) : fd(fd_) {}
00049         ~fdcloser() {
00050             if (fd >= 0) {
00051                 (void)close(fd);
00052             }
00053         }
00054     private:
00055         int fd;
00056 };
00057 
00058 /************ Base file parameters ************/
00059 
00087 #define CURR_FORMAT 5U
00088 
00089 FlintTable_base::FlintTable_base()
00090         : revision(0),
00091           block_size(0),
00092           root(0),
00093           level(0),
00094           bit_map_size(0),
00095           item_count(0),
00096           last_block(0),
00097           have_fakeroot(false),
00098           sequential(false),
00099           bit_map_low(0),
00100           bit_map0(0),
00101           bit_map(0)
00102 {
00103 }
00104 
00105 FlintTable_base::FlintTable_base(const FlintTable_base &other)
00106         : revision(other.revision),
00107           block_size(other.block_size),
00108           root(other.root),
00109           level(other.level),
00110           bit_map_size(other.bit_map_size),
00111           item_count(other.item_count),
00112           last_block(other.last_block),
00113           have_fakeroot(other.have_fakeroot),
00114           sequential(other.sequential),
00115           bit_map_low(other.bit_map_low),
00116           bit_map0(0),
00117           bit_map(0)
00118 {
00119     try {
00120         bit_map0 = new byte[bit_map_size];
00121         bit_map = new byte[bit_map_size];
00122 
00123         memcpy(bit_map0, other.bit_map0, bit_map_size);
00124         memcpy(bit_map, other.bit_map, bit_map_size);
00125     } catch (...) {
00126         delete [] bit_map0;
00127         delete [] bit_map;
00128     }
00129 }
00130 
00131 void
00132 FlintTable_base::swap(FlintTable_base &other)
00133 {
00134     std::swap(revision, other.revision);
00135     std::swap(block_size, other.block_size);
00136     std::swap(root, other.root);
00137     std::swap(level, other.level);
00138     std::swap(bit_map_size, other.bit_map_size);
00139     std::swap(item_count, other.item_count);
00140     std::swap(last_block, other.last_block);
00141     std::swap(have_fakeroot, other.have_fakeroot);
00142     std::swap(sequential, other.sequential);
00143     std::swap(bit_map_low, other.bit_map_low);
00144     std::swap(bit_map0, other.bit_map0);
00145     std::swap(bit_map, other.bit_map);
00146 }
00147 
00148 FlintTable_base::~FlintTable_base()
00149 {
00150     delete [] bit_map;
00151     bit_map = 0;
00152     delete [] bit_map0;
00153     bit_map0 = 0;
00154 }
00155 
00156 bool
00157 FlintTable_base::do_unpack_uint(const char **start, const char *end,
00158                            uint4 *dest, string &err_msg, 
00159                            const string &basename,
00160                            const char *varname)
00161 {
00162     bool result = unpack_uint(start, end, dest);
00163     if (!result) {
00164         err_msg += "Unable to read " + string(varname) + " from " +
00165                     basename + "\n";
00166     }
00167     return result;
00168 }
00169 
00170 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
00171 do { \
00172     if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
00173         return false; \
00174     } \
00175 } while(0)
00176 
00177 /* How much of the base file to read at the first go (in bytes).
00178  * This must be big enough that the base file without bitmap
00179  * will fit in to this size with no problem.  Other than that
00180  * it's fairly arbitrary, but shouldn't be big enough to cause
00181  * serious memory problems!
00182  */
00183 #define REASONABLE_BASE_SIZE 1024
00184 
00185 bool
00186 FlintTable_base::read(const string & name, char ch, string &err_msg)
00187 {
00188     string basename = name + "base" + ch;
00189 #ifdef __WIN32__
00190     int h = msvc_posix_open(basename.c_str(), O_RDONLY | O_BINARY);
00191 #else
00192     int h = open(basename.c_str(), O_RDONLY | O_BINARY);
00193 #endif
00194 
00195     if (h == -1) {
00196         err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
00197         return false;
00198     }
00199     fdcloser closefd(h);
00200 
00201     char buf[REASONABLE_BASE_SIZE];
00202 
00203     const char *start = buf;
00204     const char *end = buf + flint_io_read(h, buf, REASONABLE_BASE_SIZE, 0);
00205 
00206     DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
00207     uint4 format;
00208     DO_UNPACK_UINT_ERRCHECK(&start, end, format);
00209     if (format != CURR_FORMAT) {
00210         err_msg += "Bad base file format " + om_tostring(format) + " in " +
00211                     basename + "\n";
00212         return false;
00213     }
00214     DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
00215     DO_UNPACK_UINT_ERRCHECK(&start, end, root);
00216     DO_UNPACK_UINT_ERRCHECK(&start, end, level);
00217     DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
00218     DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
00219     DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
00220     uint4 have_fakeroot_;
00221     DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
00222     have_fakeroot = have_fakeroot_;
00223 
00224     uint4 sequential_;
00225     DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
00226     sequential = sequential_;
00227 
00228     if (have_fakeroot && !sequential) {
00229         sequential = true; // FIXME : work out why we need this...
00230         /*
00231         err_msg += "Corrupt base file, `" + basename + "':\n"
00232                 "sequential must be set whenever have_fakeroot is set.\n" +
00233                 "sequential=" + (sequential?"true":"false") +
00234                 ", have_fakeroot=" + (have_fakeroot?"true":"false") + "\n";
00235         return false;
00236         */
00237     }
00238 
00239     uint4 revision2;
00240     DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
00241     if (revision != revision2) {
00242         err_msg += "Revision number mismatch in " +
00243                 basename + ": " +
00244                 om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
00245         return false;
00246     }
00247 
00248     /* It's ok to delete a zero pointer */
00249     delete [] bit_map0;
00250     bit_map0 = 0;
00251     delete [] bit_map;
00252     bit_map = 0;
00253 
00254     bit_map0 = new byte[bit_map_size];
00255     bit_map = new byte[bit_map_size];
00256 
00257     size_t n = end - start;
00258     if (n < bit_map_size) {
00259         memcpy(bit_map0, start, n);
00260         (void)flint_io_read(h, reinterpret_cast<char *>(bit_map0) + n,
00261                             bit_map_size - n, bit_map_size - n);
00262         n = 0;
00263     } else {
00264         memcpy(bit_map0, start, bit_map_size);
00265         n -= bit_map_size;
00266         if (n) memmove(buf, start + bit_map_size, n);
00267     }
00268     memcpy(bit_map, bit_map0, bit_map_size);
00269 
00270     start = buf;
00271     end = buf + n;
00272     end += flint_io_read(h, buf + n, REASONABLE_BASE_SIZE - n, 0);
00273 
00274     uint4 revision3;
00275     if (!unpack_uint(&start, end, &revision3)) {
00276         err_msg += "Couldn't read revision3 from base file " +
00277         basename + "\n";
00278         return false;
00279     }
00280 
00281     if (revision != revision3) {
00282         err_msg += "Revision number mismatch in " +
00283                 basename + ": " +
00284                 om_tostring(revision) + " vs " + om_tostring(revision3) + "\n";
00285         return false;
00286     }
00287 
00288     if (start != end) {
00289         err_msg += "Junk at end of base file " + basename + "\n";
00290         return false;
00291     }
00292 
00293     return true;
00294 }
00295 
00296 void
00297 FlintTable_base::write_to_file(const string &filename)
00298 {
00299     calculate_last_block();
00300 
00301     string buf;
00302     buf += pack_uint(revision);
00303     buf += pack_uint(CURR_FORMAT);
00304     buf += pack_uint(block_size);
00305     buf += pack_uint(static_cast<uint4>(root));
00306     buf += pack_uint(static_cast<uint4>(level));
00307     buf += pack_uint(static_cast<uint4>(bit_map_size));
00308     buf += pack_uint(static_cast<uint4>(item_count));
00309     buf += pack_uint(static_cast<uint4>(last_block));
00310     buf += pack_uint(have_fakeroot);
00311     buf += pack_uint(sequential);
00312     buf += pack_uint(revision);
00313     if (bit_map_size > 0) {
00314         buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
00315     }
00316     buf += pack_uint(revision);  // REVISION2
00317 
00318 #ifdef __WIN32__
00319     int h = msvc_posix_open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
00320 #else
00321     int h = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
00322 #endif
00323     if (h < 0) {
00324         string message = string("Couldn't open base ")
00325                 + filename + " to write: " + strerror(errno);
00326         throw Xapian::DatabaseOpeningError(message);
00327     }
00328     fdcloser closefd(h);
00329 
00330     flint_io_write(h, buf.data(), buf.size());
00331     flint_io_sync(h);
00332 }
00333 
00334 /*
00335    block_free_at_start(B, n) is true iff (if and only if) block n was free at
00336    the start of the transaction on the B-tree.
00337 */
00338 
00339 bool
00340 FlintTable_base::block_free_at_start(uint4 n) const
00341 {
00342     size_t i = n / CHAR_BIT;
00343     int bit = 0x1 << n % CHAR_BIT;
00344     return (bit_map0[i] & bit) == 0;
00345 }
00346 
00347 /* free_block(B, n) causes block n to be marked free in the bit map.
00348    B->bit_map_low is the lowest byte in the bit map known to have a free bit
00349    set. Searching starts from there when looking for a free block.
00350 */
00351 
00352 void
00353 FlintTable_base::free_block(uint4 n)
00354 {
00355     uint4 i = n / CHAR_BIT;
00356     int bit = 0x1 << n % CHAR_BIT;
00357     bit_map[i] &= ~ bit;
00358 
00359     if (bit_map_low > i)
00360         if ((bit_map0[i] & bit) == 0) /* free at start */
00361             bit_map_low = i;
00362 }
00363 
00364 /* extend(B) increases the size of the two bit maps in an obvious way.
00365    The bitmap file grows and shrinks as the DB file grows and shrinks in
00366    internal usage. But the DB file itself does not reduce in size, no matter
00367    how many blocks are freed.
00368 */
00369 
00370 #define BIT_MAP_INC 1000
00371     /* increase the bit map by this number of bytes if it overflows */
00372 
00373 void
00374 FlintTable_base::extend_bit_map()
00375 {
00376     int n = bit_map_size + BIT_MAP_INC;
00377     byte *new_bit_map0 = 0;
00378     byte *new_bit_map = 0;
00379 
00380     try {
00381         new_bit_map0 = new byte[n];
00382         new_bit_map = new byte[n];
00383 
00384         memcpy(new_bit_map0, bit_map0, bit_map_size);
00385         memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
00386         
00387         memcpy(new_bit_map, bit_map, bit_map_size);
00388         memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
00389     } catch (...) {
00390         delete [] new_bit_map0;
00391         delete [] new_bit_map;
00392         throw;
00393     }
00394     delete [] bit_map0;
00395     bit_map0 = new_bit_map0;
00396     delete [] bit_map;
00397     bit_map = new_bit_map;
00398     bit_map_size = n;
00399 }
00400 
00401 /* next_free_block(B) returns the number of the next available free block in
00402    the bitmap, marking it as 'in use' before returning. More precisely, we get
00403    a block that is both free now (in bit_map) and was free at the beginning of
00404    the transaction on the B-tree (in bit_map0).
00405 
00406    Starting at bit_map_low we go up byte at a time until we find a byte with a
00407    free (zero) bit, and then go up that byte bit at a time. If the bit map has
00408    no free bits it is extended so that it will have.
00409 */
00410 
00411 uint4
00412 FlintTable_base::next_free_block()
00413 {
00414     uint4 i;
00415     int x;
00416     for (i = bit_map_low;; i++) {
00417         if (i >= bit_map_size) {
00418             extend_bit_map();
00419         }
00420         x = bit_map0[i] | bit_map[i];
00421         if (x != UCHAR_MAX) break;
00422     }
00423     uint4 n = i * CHAR_BIT;
00424     int d = 0x1;
00425     while ((x & d) != 0) { d <<= 1; n++; }
00426     bit_map[i] |= d;   /* set as 'in use' */
00427     bit_map_low = i;
00428     if (n > last_block) {
00429         last_block = n;
00430     }
00431     return n;
00432 }
00433 
00434 bool
00435 FlintTable_base::block_free_now(uint4 n)
00436 {
00437     uint4 i = n / CHAR_BIT;
00438     int bit = 0x1 << n % CHAR_BIT;
00439     return (bit_map[i] & bit) == 0;
00440 }
00441 
00442 void
00443 FlintTable_base::calculate_last_block()
00444 {
00445     if (bit_map_size == 0) {
00446         last_block = 0;
00447         return;
00448     }
00449     int i = bit_map_size - 1;
00450     while (bit_map[i] == 0 && i > 0) {
00451         i--;
00452     }
00453     bit_map_size = i + 1;
00454 
00455     int x = bit_map[i];
00456 
00457     /* Check for when there are no blocks */
00458     if (x == 0) {
00459         last_block = 0;
00460         return;
00461     }
00462     uint4 n = (i + 1) * CHAR_BIT - 1;
00463     int d = 0x1 << (CHAR_BIT - 1);
00464     while ((x & d) == 0) { d >>= 1; n--; }
00465 
00466     last_block = n;
00467 }
00468 
00469 bool
00470 FlintTable_base::is_empty() const
00471 {
00472     for (uint4 i = 0; i < bit_map_size; i++) {
00473         if (bit_map[i] != 0) {
00474             return false;
00475         }
00476     }
00477     return true;
00478 }
00479 
00480 void
00481 FlintTable_base::clear_bit_map()
00482 {
00483     memset(bit_map, 0, bit_map_size);
00484 }
00485 
00486 // We've commited, so "bitmap at start" needs to be reset to the current bitmap.
00487 void
00488 FlintTable_base::commit()
00489 {
00490     memcpy(bit_map0, bit_map, bit_map_size);
00491     bit_map_low = 0;
00492 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.