00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "safeerrno.h"
00026 #ifdef __WIN32__
00027 # include "msvc_posix_wrapper.h"
00028 #endif
00029
00030 #include <xapian/error.h>
00031
00032 #include "chert_btreebase.h"
00033 #include "io_utils.h"
00034 #include "omassert.h"
00035 #include "pack.h"
00036 #include "str.h"
00037 #include "utils.h"
00038
00039 #include <algorithm>
00040 #include <climits>
00041 #include <cstring>
00042
00043 using namespace std;
00044
00045
00046
00074 #define CURR_FORMAT 5U
00075
00076 ChertTable_base::ChertTable_base()
00077 : revision(0),
00078 block_size(0),
00079 root(0),
00080 level(0),
00081 bit_map_size(0),
00082 item_count(0),
00083 last_block(0),
00084 have_fakeroot(false),
00085 sequential(false),
00086 bit_map_low(0),
00087 bit_map0(0),
00088 bit_map(0)
00089 {
00090 }
00091
00092 void
00093 ChertTable_base::swap(ChertTable_base &other)
00094 {
00095 std::swap(revision, other.revision);
00096 std::swap(block_size, other.block_size);
00097 std::swap(root, other.root);
00098 std::swap(level, other.level);
00099 std::swap(bit_map_size, other.bit_map_size);
00100 std::swap(item_count, other.item_count);
00101 std::swap(last_block, other.last_block);
00102 std::swap(have_fakeroot, other.have_fakeroot);
00103 std::swap(sequential, other.sequential);
00104 std::swap(bit_map_low, other.bit_map_low);
00105 std::swap(bit_map0, other.bit_map0);
00106 std::swap(bit_map, other.bit_map);
00107 }
00108
00109 ChertTable_base::~ChertTable_base()
00110 {
00111 delete [] bit_map;
00112 bit_map = 0;
00113 delete [] bit_map0;
00114 bit_map0 = 0;
00115 }
00116
00118 static bool
00119 do_unpack_uint(const char **start, const char *end,
00120 uint4 *dest, string &err_msg,
00121 const string &basename,
00122 const char *varname)
00123 {
00124 bool result = unpack_uint(start, end, dest);
00125 if (rare(!result)) {
00126 err_msg += "Unable to read ";
00127 err_msg += varname;
00128 err_msg += " from ";
00129 err_msg += basename;
00130 err_msg += '\n';
00131 }
00132 return result;
00133 }
00134
00135 static bool
00136 do_unpack_uint(const char **start, const char *end,
00137 chert_tablesize_t *dest, string &err_msg,
00138 const string &basename,
00139 const char *varname)
00140 {
00141 bool result = unpack_uint(start, end, dest);
00142 if (rare(!result)) {
00143 err_msg += "Unable to read ";
00144 err_msg += varname;
00145 err_msg += " from ";
00146 err_msg += basename;
00147 err_msg += '\n';
00148 }
00149 return result;
00150 }
00151
00152 #define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
00153 do { \
00154 if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
00155 return false; \
00156 } \
00157 } while(0)
00158
00159
00160
00161
00162
00163
00164
00165 #define REASONABLE_BASE_SIZE 1024
00166
00167 bool
00168 ChertTable_base::read(const string & name, char ch, bool read_bitmap,
00169 string &err_msg)
00170 {
00171 string basename = name + "base" + ch;
00172 #ifdef __WIN32__
00173 int h = msvc_posix_open(basename.c_str(), O_RDONLY | O_BINARY);
00174 #else
00175 int h = open(basename.c_str(), O_RDONLY | O_BINARY);
00176 #endif
00177
00178 if (h == -1) {
00179 err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
00180 return false;
00181 }
00182 fdcloser closefd(h);
00183
00184 char buf[REASONABLE_BASE_SIZE];
00185
00186 const char *start = buf;
00187 const char *end = buf + io_read(h, buf, REASONABLE_BASE_SIZE, 0);
00188
00189 DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
00190 uint4 format;
00191 DO_UNPACK_UINT_ERRCHECK(&start, end, format);
00192 if (format != CURR_FORMAT) {
00193 err_msg += "Bad base file format " + str(format) + " in " +
00194 basename + "\n";
00195 return false;
00196 }
00197 DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
00198 DO_UNPACK_UINT_ERRCHECK(&start, end, root);
00199 DO_UNPACK_UINT_ERRCHECK(&start, end, level);
00200 DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
00201 DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
00202 DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
00203 uint4 have_fakeroot_;
00204 DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
00205 have_fakeroot = have_fakeroot_;
00206
00207 uint4 sequential_;
00208 DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
00209 sequential = sequential_;
00210
00211 if (have_fakeroot && !sequential) {
00212 sequential = true;
00213
00214
00215
00216
00217
00218
00219
00220 }
00221
00222 uint4 revision2;
00223 DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
00224 if (revision != revision2) {
00225 err_msg += "Revision number mismatch in " +
00226 basename + ": " +
00227 str(revision) + " vs " + str(revision2) + "\n";
00228 return false;
00229 }
00230
00231
00232 delete [] bit_map0;
00233 bit_map0 = 0;
00234 delete [] bit_map;
00235 bit_map = 0;
00236
00237 if (!read_bitmap)
00238 return true;
00239
00240 bit_map0 = new byte[bit_map_size];
00241 bit_map = new byte[bit_map_size];
00242
00243 size_t n = end - start;
00244 if (n < bit_map_size) {
00245 memcpy(bit_map0, start, n);
00246 (void)io_read(h, reinterpret_cast<char *>(bit_map0) + n,
00247 bit_map_size - n, bit_map_size - n);
00248 n = 0;
00249 } else {
00250 memcpy(bit_map0, start, bit_map_size);
00251 n -= bit_map_size;
00252 if (n) memmove(buf, start + bit_map_size, n);
00253 }
00254 memcpy(bit_map, bit_map0, bit_map_size);
00255
00256 start = buf;
00257 end = buf + n;
00258 end += io_read(h, buf + n, REASONABLE_BASE_SIZE - n, 0);
00259
00260 uint4 revision3;
00261 if (!unpack_uint(&start, end, &revision3)) {
00262 err_msg += "Couldn't read revision3 from base file " +
00263 basename + "\n";
00264 return false;
00265 }
00266
00267 if (revision != revision3) {
00268 err_msg += "Revision number mismatch in " +
00269 basename + ": " +
00270 str(revision) + " vs " + str(revision3) + "\n";
00271 return false;
00272 }
00273
00274 if (start != end) {
00275 err_msg += "Junk at end of base file " + basename + "\n";
00276 return false;
00277 }
00278
00279 return true;
00280 }
00281
00282 void
00283 ChertTable_base::write_to_file(const string &filename,
00284 char base_letter,
00285 const string &tablename,
00286 int changes_fd,
00287 const string * changes_tail)
00288 {
00289 calculate_last_block();
00290
00291 string buf;
00292 pack_uint(buf, revision);
00293 pack_uint(buf, CURR_FORMAT);
00294 pack_uint(buf, block_size);
00295 pack_uint(buf, static_cast<uint4>(root));
00296 pack_uint(buf, static_cast<uint4>(level));
00297 pack_uint(buf, static_cast<uint4>(bit_map_size));
00298 pack_uint(buf, item_count);
00299 pack_uint(buf, static_cast<uint4>(last_block));
00300 pack_uint(buf, have_fakeroot);
00301 pack_uint(buf, sequential);
00302 pack_uint(buf, revision);
00303 if (bit_map_size > 0) {
00304 buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
00305 }
00306 pack_uint(buf, revision);
00307
00308 #ifdef __WIN32__
00309 int h = msvc_posix_open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
00310 #else
00311 int h = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
00312 #endif
00313 if (h < 0) {
00314 string message = string("Couldn't open base ")
00315 + filename + " to write: " + strerror(errno);
00316 throw Xapian::DatabaseOpeningError(message);
00317 }
00318 fdcloser closefd(h);
00319
00320 if (changes_fd >= 0) {
00321 string changes_buf;
00322 pack_uint(changes_buf, 1u);
00323 pack_string(changes_buf, tablename);
00324 changes_buf += base_letter;
00325 pack_uint(changes_buf, buf.size());
00326 io_write(changes_fd, changes_buf.data(), changes_buf.size());
00327 io_write(changes_fd, buf.data(), buf.size());
00328 if (changes_tail != NULL) {
00329 io_write(changes_fd, changes_tail->data(), changes_tail->size());
00330
00331 io_sync(changes_fd);
00332 }
00333 }
00334
00335 io_write(h, buf.data(), buf.size());
00336 io_sync(h);
00337 }
00338
00339
00340
00341
00342
00343
00344 bool
00345 ChertTable_base::block_free_at_start(uint4 n) const
00346 {
00347 size_t i = n / CHAR_BIT;
00348 int bit = 0x1 << n % CHAR_BIT;
00349 return (bit_map0[i] & bit) == 0;
00350 }
00351
00352
00353
00354
00355
00356
00357 void
00358 ChertTable_base::free_block(uint4 n)
00359 {
00360 uint4 i = n / CHAR_BIT;
00361 int bit = 0x1 << n % CHAR_BIT;
00362 bit_map[i] &= ~ bit;
00363
00364 if (bit_map_low > i)
00365 if ((bit_map0[i] & bit) == 0)
00366 bit_map_low = i;
00367 }
00368
00369
00370
00371
00372
00373
00374
00375 #define BIT_MAP_INC 1000
00376
00377
00378 void
00379 ChertTable_base::extend_bit_map()
00380 {
00381 int n = bit_map_size + BIT_MAP_INC;
00382 byte *new_bit_map0 = 0;
00383 byte *new_bit_map = 0;
00384
00385 try {
00386 new_bit_map0 = new byte[n];
00387 new_bit_map = new byte[n];
00388
00389 memcpy(new_bit_map0, bit_map0, bit_map_size);
00390 memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
00391
00392 memcpy(new_bit_map, bit_map, bit_map_size);
00393 memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
00394 } catch (...) {
00395 delete [] new_bit_map0;
00396 delete [] new_bit_map;
00397 throw;
00398 }
00399 delete [] bit_map0;
00400 bit_map0 = new_bit_map0;
00401 delete [] bit_map;
00402 bit_map = new_bit_map;
00403 bit_map_size = n;
00404 }
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416 uint4
00417 ChertTable_base::next_free_block()
00418 {
00419 uint4 i;
00420 int x;
00421 for (i = bit_map_low;; i++) {
00422 if (i >= bit_map_size) {
00423 extend_bit_map();
00424 }
00425 x = bit_map0[i] | bit_map[i];
00426 if (x != UCHAR_MAX) break;
00427 }
00428 uint4 n = i * CHAR_BIT;
00429 int d = 0x1;
00430 while ((x & d) != 0) { d <<= 1; n++; }
00431 bit_map[i] |= d;
00432 bit_map_low = i;
00433 if (n > last_block) {
00434 last_block = n;
00435 }
00436 return n;
00437 }
00438
00439 bool
00440 ChertTable_base::find_changed_block(uint4 * n)
00441 {
00442
00443
00444 while ((*n) <= last_block) {
00445 size_t offset = (*n) / CHAR_BIT;
00446 int bit = 0x1 << (*n) % CHAR_BIT;
00447
00448 if (((bit_map0[offset] & bit) == 0) && ((bit_map[offset] & bit) != 0)) {
00449 return true;
00450 }
00451 ++(*n);
00452 }
00453
00454 return false;
00455 }
00456
00457 bool
00458 ChertTable_base::block_free_now(uint4 n)
00459 {
00460 uint4 i = n / CHAR_BIT;
00461 int bit = 0x1 << n % CHAR_BIT;
00462 return (bit_map[i] & bit) == 0;
00463 }
00464
00465 void
00466 ChertTable_base::calculate_last_block()
00467 {
00468 if (bit_map_size == 0) {
00469 last_block = 0;
00470 return;
00471 }
00472 int i = bit_map_size - 1;
00473 while (bit_map[i] == 0 && i > 0) {
00474 i--;
00475 }
00476 bit_map_size = i + 1;
00477
00478 int x = bit_map[i];
00479
00480
00481 if (x == 0) {
00482 last_block = 0;
00483 return;
00484 }
00485 uint4 n = (i + 1) * CHAR_BIT - 1;
00486 int d = 0x1 << (CHAR_BIT - 1);
00487 while ((x & d) == 0) { d >>= 1; n--; }
00488
00489 last_block = n;
00490 }
00491
00492 bool
00493 ChertTable_base::is_empty() const
00494 {
00495 for (uint4 i = 0; i < bit_map_size; i++) {
00496 if (bit_map[i] != 0) {
00497 return false;
00498 }
00499 }
00500 return true;
00501 }
00502
00503 void
00504 ChertTable_base::clear_bit_map()
00505 {
00506 memset(bit_map, 0, bit_map_size);
00507 }
00508
00509
00510 void
00511 ChertTable_base::commit()
00512 {
00513 memcpy(bit_map0, bit_map, bit_map_size);
00514 bit_map_low = 0;
00515 }