00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "safeerrno.h"
00026 #ifdef __WIN32__
00027 # include "msvc_posix_wrapper.h"
00028 #endif
00029
00030 #include <xapian/error.h>
00031
00032 #include "chert_btreebase.h"
00033 #include "io_utils.h"
00034 #include "omassert.h"
00035 #include "pack.h"
00036 #include "str.h"
00037 #include "utils.h"
00038
00039 #include <algorithm>
00040 #include <climits>
00041 #include <cstring>
00042
00043 using namespace std;
00044
00045
00046
00074 #define CURR_FORMAT 5U
00075
00076 ChertTable_base::ChertTable_base()
00077 : revision(0),
00078 block_size(0),
00079 root(0),
00080 level(0),
00081 bit_map_size(0),
00082 item_count(0),
00083 last_block(0),
00084 have_fakeroot(false),
00085 sequential(false),
00086 bit_map_low(0),
00087 bit_map0(0),
00088 bit_map(0)
00089 {
00090 }
00091
00092 ChertTable_base::ChertTable_base(const ChertTable_base &other)
00093 : revision(other.revision),
00094 block_size(other.block_size),
00095 root(other.root),
00096 level(other.level),
00097 bit_map_size(other.bit_map_size),
00098 item_count(other.item_count),
00099 last_block(other.last_block),
00100 have_fakeroot(other.have_fakeroot),
00101 sequential(other.sequential),
00102 bit_map_low(other.bit_map_low),
00103 bit_map0(0),
00104 bit_map(0)
00105 {
00106 try {
00107 bit_map0 = new byte[bit_map_size];
00108 bit_map = new byte[bit_map_size];
00109
00110 memcpy(bit_map0, other.bit_map0, bit_map_size);
00111 memcpy(bit_map, other.bit_map, bit_map_size);
00112 } catch (...) {
00113 delete [] bit_map0;
00114 delete [] bit_map;
00115 }
00116 }
00117
00118 void
00119 ChertTable_base::swap(ChertTable_base &other)
00120 {
00121 std::swap(revision, other.revision);
00122 std::swap(block_size, other.block_size);
00123 std::swap(root, other.root);
00124 std::swap(level, other.level);
00125 std::swap(bit_map_size, other.bit_map_size);
00126 std::swap(item_count, other.item_count);
00127 std::swap(last_block, other.last_block);
00128 std::swap(have_fakeroot, other.have_fakeroot);
00129 std::swap(sequential, other.sequential);
00130 std::swap(bit_map_low, other.bit_map_low);
00131 std::swap(bit_map0, other.bit_map0);
00132 std::swap(bit_map, other.bit_map);
00133 }
00134
00135 ChertTable_base::~ChertTable_base()
00136 {
00137 delete [] bit_map;
00138 bit_map = 0;
00139 delete [] bit_map0;
00140 bit_map0 = 0;
00141 }
00142
00144 static bool
00145 do_unpack_uint(const char **start, const char *end,
00146 uint4 *dest, string &err_msg,
00147 const string &basename,
00148 const char *varname)
00149 {
00150 bool result = unpack_uint(start, end, dest);
00151 if (rare(!result)) {
00152 err_msg += "Unable to read ";
00153 err_msg += varname;
00154 err_msg += " from ";
00155 err_msg += basename;
00156 err_msg += '\n';
00157 }
00158 return result;
00159 }
00160
00161 static bool
00162 do_unpack_uint(const char **start, const char *end,
00163 chert_tablesize_t *dest, string &err_msg,
00164 const string &basename,
00165 const char *varname)
00166 {
00167 bool result = unpack_uint(start, end, dest);
00168 if (rare(!result)) {
00169 err_msg += "Unable to read ";
00170 err_msg += varname;
00171 err_msg += " from ";
00172 err_msg += basename;
00173 err_msg += '\n';
00174 }
00175 return result;
00176 }
00177
00178 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
00179 do { \
00180 if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
00181 return false; \
00182 } \
00183 } while(0)
00184
00185
00186
00187
00188
00189
00190
00191 #define REASONABLE_BASE_SIZE 1024
00192
00193 bool
00194 ChertTable_base::read(const string & name, char ch, bool read_bitmap,
00195 string &err_msg)
00196 {
00197 string basename = name + "base" + ch;
00198 #ifdef __WIN32__
00199 int h = msvc_posix_open(basename.c_str(), O_RDONLY | O_BINARY);
00200 #else
00201 int h = open(basename.c_str(), O_RDONLY | O_BINARY);
00202 #endif
00203
00204 if (h == -1) {
00205 err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
00206 return false;
00207 }
00208 fdcloser closefd(h);
00209
00210 char buf[REASONABLE_BASE_SIZE];
00211
00212 const char *start = buf;
00213 const char *end = buf + io_read(h, buf, REASONABLE_BASE_SIZE, 0);
00214
00215 DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
00216 uint4 format;
00217 DO_UNPACK_UINT_ERRCHECK(&start, end, format);
00218 if (format != CURR_FORMAT) {
00219 err_msg += "Bad base file format " + str(format) + " in " +
00220 basename + "\n";
00221 return false;
00222 }
00223 DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
00224 DO_UNPACK_UINT_ERRCHECK(&start, end, root);
00225 DO_UNPACK_UINT_ERRCHECK(&start, end, level);
00226 DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
00227 DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
00228 DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
00229 uint4 have_fakeroot_;
00230 DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
00231 have_fakeroot = have_fakeroot_;
00232
00233 uint4 sequential_;
00234 DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
00235 sequential = sequential_;
00236
00237 if (have_fakeroot && !sequential) {
00238 sequential = true;
00239
00240
00241
00242
00243
00244
00245
00246 }
00247
00248 uint4 revision2;
00249 DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
00250 if (revision != revision2) {
00251 err_msg += "Revision number mismatch in " +
00252 basename + ": " +
00253 str(revision) + " vs " + str(revision2) + "\n";
00254 return false;
00255 }
00256
00257
00258 delete [] bit_map0;
00259 bit_map0 = 0;
00260 delete [] bit_map;
00261 bit_map = 0;
00262
00263 if (!read_bitmap)
00264 return true;
00265
00266 bit_map0 = new byte[bit_map_size];
00267 bit_map = new byte[bit_map_size];
00268
00269 size_t n = end - start;
00270 if (n < bit_map_size) {
00271 memcpy(bit_map0, start, n);
00272 (void)io_read(h, reinterpret_cast<char *>(bit_map0) + n,
00273 bit_map_size - n, bit_map_size - n);
00274 n = 0;
00275 } else {
00276 memcpy(bit_map0, start, bit_map_size);
00277 n -= bit_map_size;
00278 if (n) memmove(buf, start + bit_map_size, n);
00279 }
00280 memcpy(bit_map, bit_map0, bit_map_size);
00281
00282 start = buf;
00283 end = buf + n;
00284 end += io_read(h, buf + n, REASONABLE_BASE_SIZE - n, 0);
00285
00286 uint4 revision3;
00287 if (!unpack_uint(&start, end, &revision3)) {
00288 err_msg += "Couldn't read revision3 from base file " +
00289 basename + "\n";
00290 return false;
00291 }
00292
00293 if (revision != revision3) {
00294 err_msg += "Revision number mismatch in " +
00295 basename + ": " +
00296 str(revision) + " vs " + str(revision3) + "\n";
00297 return false;
00298 }
00299
00300 if (start != end) {
00301 err_msg += "Junk at end of base file " + basename + "\n";
00302 return false;
00303 }
00304
00305 return true;
00306 }
00307
00308 void
00309 ChertTable_base::write_to_file(const string &filename,
00310 char base_letter,
00311 const string &tablename,
00312 int changes_fd,
00313 const string * changes_tail)
00314 {
00315 calculate_last_block();
00316
00317 string buf;
00318 pack_uint(buf, revision);
00319 pack_uint(buf, CURR_FORMAT);
00320 pack_uint(buf, block_size);
00321 pack_uint(buf, static_cast<uint4>(root));
00322 pack_uint(buf, static_cast<uint4>(level));
00323 pack_uint(buf, static_cast<uint4>(bit_map_size));
00324 pack_uint(buf, item_count);
00325 pack_uint(buf, static_cast<uint4>(last_block));
00326 pack_uint(buf, have_fakeroot);
00327 pack_uint(buf, sequential);
00328 pack_uint(buf, revision);
00329 if (bit_map_size > 0) {
00330 buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
00331 }
00332 pack_uint(buf, revision);
00333
00334 #ifdef __WIN32__
00335 int h = msvc_posix_open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
00336 #else
00337 int h = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
00338 #endif
00339 if (h < 0) {
00340 string message = string("Couldn't open base ")
00341 + filename + " to write: " + strerror(errno);
00342 throw Xapian::DatabaseOpeningError(message);
00343 }
00344 fdcloser closefd(h);
00345
00346 if (changes_fd >= 0) {
00347 string changes_buf;
00348 pack_uint(changes_buf, 1u);
00349 pack_string(changes_buf, tablename);
00350 changes_buf += base_letter;
00351 pack_uint(changes_buf, buf.size());
00352 io_write(changes_fd, changes_buf.data(), changes_buf.size());
00353 io_write(changes_fd, buf.data(), buf.size());
00354 if (changes_tail != NULL) {
00355 io_write(changes_fd, changes_tail->data(), changes_tail->size());
00356
00357 io_sync(changes_fd);
00358 }
00359 }
00360
00361 io_write(h, buf.data(), buf.size());
00362 io_sync(h);
00363 }
00364
00365
00366
00367
00368
00369
00370 bool
00371 ChertTable_base::block_free_at_start(uint4 n) const
00372 {
00373 size_t i = n / CHAR_BIT;
00374 int bit = 0x1 << n % CHAR_BIT;
00375 return (bit_map0[i] & bit) == 0;
00376 }
00377
00378
00379
00380
00381
00382
00383 void
00384 ChertTable_base::free_block(uint4 n)
00385 {
00386 uint4 i = n / CHAR_BIT;
00387 int bit = 0x1 << n % CHAR_BIT;
00388 bit_map[i] &= ~ bit;
00389
00390 if (bit_map_low > i)
00391 if ((bit_map0[i] & bit) == 0)
00392 bit_map_low = i;
00393 }
00394
00395
00396
00397
00398
00399
00400
00401 #define BIT_MAP_INC 1000
00402
00403
00404 void
00405 ChertTable_base::extend_bit_map()
00406 {
00407 int n = bit_map_size + BIT_MAP_INC;
00408 byte *new_bit_map0 = 0;
00409 byte *new_bit_map = 0;
00410
00411 try {
00412 new_bit_map0 = new byte[n];
00413 new_bit_map = new byte[n];
00414
00415 memcpy(new_bit_map0, bit_map0, bit_map_size);
00416 memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
00417
00418 memcpy(new_bit_map, bit_map, bit_map_size);
00419 memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
00420 } catch (...) {
00421 delete [] new_bit_map0;
00422 delete [] new_bit_map;
00423 throw;
00424 }
00425 delete [] bit_map0;
00426 bit_map0 = new_bit_map0;
00427 delete [] bit_map;
00428 bit_map = new_bit_map;
00429 bit_map_size = n;
00430 }
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442 uint4
00443 ChertTable_base::next_free_block()
00444 {
00445 uint4 i;
00446 int x;
00447 for (i = bit_map_low;; i++) {
00448 if (i >= bit_map_size) {
00449 extend_bit_map();
00450 }
00451 x = bit_map0[i] | bit_map[i];
00452 if (x != UCHAR_MAX) break;
00453 }
00454 uint4 n = i * CHAR_BIT;
00455 int d = 0x1;
00456 while ((x & d) != 0) { d <<= 1; n++; }
00457 bit_map[i] |= d;
00458 bit_map_low = i;
00459 if (n > last_block) {
00460 last_block = n;
00461 }
00462 return n;
00463 }
00464
00465 bool
00466 ChertTable_base::find_changed_block(uint4 * n)
00467 {
00468
00469
00470 while ((*n) <= last_block) {
00471 size_t offset = (*n) / CHAR_BIT;
00472 int bit = 0x1 << (*n) % CHAR_BIT;
00473
00474 if (((bit_map0[offset] & bit) == 0) && ((bit_map[offset] & bit) != 0)) {
00475 return true;
00476 }
00477 ++(*n);
00478 }
00479
00480 return false;
00481 }
00482
00483 bool
00484 ChertTable_base::block_free_now(uint4 n)
00485 {
00486 uint4 i = n / CHAR_BIT;
00487 int bit = 0x1 << n % CHAR_BIT;
00488 return (bit_map[i] & bit) == 0;
00489 }
00490
00491 void
00492 ChertTable_base::calculate_last_block()
00493 {
00494 if (bit_map_size == 0) {
00495 last_block = 0;
00496 return;
00497 }
00498 int i = bit_map_size - 1;
00499 while (bit_map[i] == 0 && i > 0) {
00500 i--;
00501 }
00502 bit_map_size = i + 1;
00503
00504 int x = bit_map[i];
00505
00506
00507 if (x == 0) {
00508 last_block = 0;
00509 return;
00510 }
00511 uint4 n = (i + 1) * CHAR_BIT - 1;
00512 int d = 0x1 << (CHAR_BIT - 1);
00513 while ((x & d) == 0) { d >>= 1; n--; }
00514
00515 last_block = n;
00516 }
00517
00518 bool
00519 ChertTable_base::is_empty() const
00520 {
00521 for (uint4 i = 0; i < bit_map_size; i++) {
00522 if (bit_map[i] != 0) {
00523 return false;
00524 }
00525 }
00526 return true;
00527 }
00528
00529 void
00530 ChertTable_base::clear_bit_map()
00531 {
00532 memset(bit_map, 0, bit_map_size);
00533 }
00534
00535
00536 void
00537 ChertTable_base::commit()
00538 {
00539 memcpy(bit_map0, bit_map, bit_map_size);
00540 bit_map_low = 0;
00541 }