00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef OM_HGUARD_CHERT_TABLE_H
00024 #define OM_HGUARD_CHERT_TABLE_H
00025
00026 #include <xapian/error.h>
00027 #include <xapian/visibility.h>
00028
00029 #include "chert_types.h"
00030 #include "chert_btreebase.h"
00031 #include "chert_cursor.h"
00032
00033 #include "noreturn.h"
00034 #include "omassert.h"
00035 #include "str.h"
00036 #include "stringutils.h"
00037 #include "unaligned.h"
00038
00039 #include <algorithm>
00040 #include <string>
00041
00042 #include <zlib.h>
00043
00044 #define DONT_COMPRESS -1
00045
00051 #define CHERT_BTREE_MAX_KEY_LEN 252
00052
00055 #define BLOCK_CAPACITY 4
00056
00057
00058 #define BYTES_PER_BLOCK_NUMBER 4
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070 #define K1 1
00071 #define I2 2
00072 #define D2 2
00073 #define C2 2
00074
00075
00076
00077 #define getK(p, c) getint1(p, c)
00078 #define setD(p, c, x) setint2(p, c, x)
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 #define REVISION(b) static_cast<unsigned int>(getint4(b, 0))
00101 #define GET_LEVEL(b) getint1(b, 4)
00102 #define MAX_FREE(b) getint2(b, 5)
00103 #define TOTAL_FREE(b) getint2(b, 7)
00104 #define DIR_END(b) getint2(b, 9)
00105 #define DIR_START 11
00106
00107 #define SET_REVISION(b, x) setint4(b, 0, x)
00108 #define SET_LEVEL(b, x) setint1(b, 4, x)
00109 #define SET_MAX_FREE(b, x) setint2(b, 5, x)
00110 #define SET_TOTAL_FREE(b, x) setint2(b, 7, x)
00111 #define SET_DIR_END(b, x) setint2(b, 9, x)
00112
00113 class XAPIAN_VISIBILITY_DEFAULT Key {
00114 const byte *p;
00115 public:
00116 explicit Key(const byte * p_) : p(p_) { }
00117 const byte * get_address() const { return p; }
00118 void read(std::string * key) const {
00119 key->assign(reinterpret_cast<const char *>(p + K1), length());
00120 }
00121 bool operator==(Key key2) const;
00122 bool operator!=(Key key2) const { return !(*this == key2); }
00123 bool operator<(Key key2) const;
00124 bool operator>=(Key key2) const { return !(*this < key2); }
00125 bool operator>(Key key2) const { return key2 < *this; }
00126 bool operator<=(Key key2) const { return !(key2 < *this); }
00127 int length() const {
00128 AssertRel(getK(p, 0),>=,3);
00129 return getK(p, 0) - C2 - K1;
00130 }
00131 char operator[](size_t i) const {
00132 AssertRel(i,<,(size_t)length());
00133 return p[i + K1];
00134 }
00135 };
00136
00137
00138
00139 template <class T> class Item_base {
00140 protected:
00141 T p;
00142 public:
00143
00144 Item_base(T p_, int c) : p(p_ + getint2(p_, c)) { }
00145 Item_base(T p_) : p(p_) { }
00146 T get_address() const { return p; }
00148 int size() const {
00149 int item_size = getint2(p, 0) & 0x7fff;
00150 AssertRel(item_size,>=,5);
00151 return item_size;
00152 }
00153 bool get_compressed() const { return *p & 0x80; }
00154 int component_of() const {
00155 return getint2(p, getK(p, I2) + I2 - C2);
00156 }
00157 int components_of() const {
00158 return getint2(p, getK(p, I2) + I2);
00159 }
00160 Key key() const { return Key(p + I2); }
00161 void append_chunk(std::string * tag) const {
00162
00163 int cd = getK(p, I2) + I2 + C2;
00164 int l = size() - cd;
00165 tag->append(reinterpret_cast<const char *>(p + cd), l);
00166 }
00170 uint4 block_given_by() const {
00171 AssertRel(size(),>=,BYTES_PER_BLOCK_NUMBER);
00172 return getint4(p, size() - BYTES_PER_BLOCK_NUMBER);
00173 }
00174 };
00175
00176 class Item : public Item_base<const byte *> {
00177 public:
00178
00179 Item(const byte * p_, int c) : Item_base<const byte *>(p_, c) { }
00180 Item(const byte * p_) : Item_base<const byte *>(p_) { }
00181 };
00182
00183 class Item_wr : public Item_base<byte *> {
00184 void set_key_len(int x) { setint1(p, I2, x); }
00185 public:
00186
00187 Item_wr(byte * p_, int c) : Item_base<byte *>(p_, c) { }
00188 Item_wr(byte * p_) : Item_base<byte *>(p_) { }
00189 void set_component_of(int i) {
00190 setint2(p, getK(p, I2) + I2 - C2, i);
00191 }
00192 void set_components_of(int m) {
00193 setint2(p, getK(p, I2) + I2, m);
00194 }
00195
00196 void set_key_and_block(Key newkey, int truncate_size, uint4 n) {
00197 int i = truncate_size;
00198
00199
00200 int newkey_len = newkey.length();
00201 AssertRel(i,<=,newkey_len);
00202 int newsize = I2 + K1 + i + C2;
00203
00204 setint2(p, 0, newsize + BYTES_PER_BLOCK_NUMBER);
00205
00206 setint1(p, I2, newsize - I2);
00207
00208 std::memmove(p + I2 + K1, newkey.get_address() + K1, i);
00209
00210 std::memmove(p + I2 + K1 + i, newkey.get_address() + K1 + newkey_len, C2);
00211
00212
00213 setint4(p, newsize, n);
00214 }
00215
00219 void set_block_given_by(uint4 n) {
00220 setint4(p, size() - BYTES_PER_BLOCK_NUMBER, n);
00221 }
00222 void set_size(int l) {
00223 AssertRel(l,>=,5);
00224 setint2(p, 0, l);
00225 }
00228 void form_null_key(uint4 n) {
00229 setint4(p, I2 + K1, n);
00230 set_key_len(K1);
00231 set_size(I2 + K1 + BYTES_PER_BLOCK_NUMBER);
00232 }
00233 void form_key(const std::string & key_) {
00234 std::string::size_type key_len = key_.length();
00235 if (key_len > CHERT_BTREE_MAX_KEY_LEN) {
00236
00237
00238
00239 std::string msg("Key too long: length was ");
00240 msg += str(key_len);
00241 msg += " bytes, maximum length of a key is "
00242 STRINGIZE(CHERT_BTREE_MAX_KEY_LEN) " bytes";
00243 throw Xapian::InvalidArgumentError(msg);
00244 }
00245
00246 set_key_len(key_len + K1 + C2);
00247 std::memmove(p + I2 + K1, key_.data(), key_len);
00248 set_component_of(1);
00249 }
00250
00251 void set_tag(int cd, const char *start, int len, bool compressed) {
00252 std::memmove(p + cd, start, len);
00253 set_size(cd + len);
00254 if (compressed) *p |= 0x80;
00255 }
00256 void fake_root_item() {
00257 set_key_len(K1 + C2);
00258 set_size(I2 + K1 + 2 * C2);
00259 set_component_of(1);
00260 set_components_of(1);
00261 }
00262 };
00263
00264
00265
00266
00267 #define BTREE_CURSOR_LEVELS 10
00268
00289 class XAPIAN_VISIBILITY_DEFAULT ChertTable {
00290 friend class ChertCursor;
00291 private:
00293 ChertTable(const ChertTable &);
00294
00296 ChertTable & operator=(const ChertTable &);
00297
00299 bool really_empty() const;
00300
00301 public:
00318 ChertTable(const char * tablename_, const std::string & path_,
00319 bool readonly_, int compress_strategy_ = DONT_COMPRESS,
00320 bool lazy = false);
00321
00327 ~ChertTable();
00328
00334 void close(bool permanent=false);
00335
00338 bool exists() const;
00339
00348 void open();
00349
00367 bool open(chert_revision_number_t revision_);
00368
00373 bool is_open() const { return handle >= 0; }
00374
00380 void flush_db();
00381
00398 void commit(chert_revision_number_t revision, int changes_fd = -1,
00399 const std::string * changes_tail = NULL);
00400
00405 void write_changed_blocks(int changes_fd);
00406
00412 void cancel();
00413
00428 bool get_exact_entry(const std::string & key, std::string & tag) const;
00429
00441 bool key_exists(const std::string &key) const;
00442
00451 bool read_tag(Cursor * C_, std::string *tag, bool keep_compressed) const;
00452
00469 void add(const std::string &key, std::string tag, bool already_compressed = false);
00470
00486 bool del(const std::string &key);
00487
00489 void erase();
00490
00495 void set_block_size(unsigned int block_size_);
00496
00499 unsigned int get_block_size() const { return block_size; }
00500
00524 void create_and_open(unsigned int blocksize);
00525
00526 void set_full_compaction(bool parity);
00527
00538 chert_revision_number_t get_latest_revision_number() const {
00539 return latest_revision_number;
00540 }
00541
00550 chert_revision_number_t get_open_revision_number() const {
00551 return revision_number;
00552 }
00553
00563 chert_tablesize_t get_entry_count() const {
00564 return item_count;
00565 }
00566
00568 bool empty() const {
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578 return (level == 0) && really_empty();
00579 }
00580
00586 ChertCursor * cursor_get() const;
00587
00593 bool is_modified() const { return Btree_modified; }
00594
00600 void set_max_item_size(size_t block_capacity) {
00601 if (block_capacity > BLOCK_CAPACITY) block_capacity = BLOCK_CAPACITY;
00602 max_item_size = (block_size - DIR_START - block_capacity * D2)
00603 / block_capacity;
00604 }
00605
00606 protected:
00607
00612 bool do_open_to_read(bool revision_supplied, chert_revision_number_t revision_);
00613
00618 bool do_open_to_write(bool revision_supplied,
00619 chert_revision_number_t revision_,
00620 bool create_db = false);
00621 bool basic_open(bool revision_supplied, chert_revision_number_t revision);
00622
00623 bool find(Cursor *) const;
00624 int delete_kt();
00625 void read_block(uint4 n, byte *p) const;
00626 void write_block(uint4 n, const byte *p) const;
00627 XAPIAN_NORETURN(void set_overwritten() const);
00628 void block_to_cursor(Cursor *C_, int j, uint4 n) const;
00629 void alter();
00630 void compact(byte *p);
00631 void enter_key(int j, Key prevkey, Key newkey);
00632 int mid_point(byte *p);
00633 void add_item_to_block(byte *p, Item_wr kt, int c);
00634 void add_item(Item_wr kt, int j);
00635 void delete_item(int j, bool repeatedly);
00636 int add_kt(bool found);
00637 void read_root();
00638 void split_root(uint4 split_n);
00639 void form_key(const std::string & key) const;
00640
00641 char other_base_letter() const {
00642 return (base_letter == 'A') ? 'B' : 'A';
00643 }
00644
00646 const char * tablename;
00647
00649 void lazy_alloc_deflate_zstream() const;
00650
00652 void lazy_alloc_inflate_zstream() const;
00653
00655 chert_revision_number_t revision_number;
00656
00658 chert_tablesize_t item_count;
00659
00661 unsigned int block_size;
00662
00666 mutable chert_revision_number_t latest_revision_number;
00667
00672 mutable bool both_bases;
00673
00675 char base_letter;
00676
00681 bool faked_root_block;
00682
00686 bool sequential;
00687
00695 int handle;
00696
00698 int level;
00699
00701 uint4 root;
00702
00704 mutable Item_wr kt;
00705
00707 byte * buffer;
00708
00710 ChertTable_base base;
00711
00713 std::string name;
00714
00718 int seq_count;
00719
00721 uint4 changed_n;
00722
00725 int changed_c;
00726
00728 size_t max_item_size;
00729
00731 mutable bool Btree_modified;
00732
00734 bool full_compaction;
00735
00737 bool writable;
00738
00740 mutable bool cursor_created_since_last_modification;
00741
00743 unsigned long cursor_version;
00744
00745
00746 bool prev(Cursor *C_, int j) const {
00747 if (sequential) return prev_for_sequential(C_, j);
00748 return prev_default(C_, j);
00749 }
00750
00751 bool next(Cursor *C_, int j) const {
00752 if (sequential) return next_for_sequential(C_, j);
00753 return next_default(C_, j);
00754 }
00755
00756
00757 bool prev_default(Cursor *C_, int j) const;
00758 bool next_default(Cursor *C_, int j) const;
00759
00760
00761 bool prev_for_sequential(Cursor *C_, int dummy) const;
00762 bool next_for_sequential(Cursor *C_, int dummy) const;
00763
00764 static int find_in_block(const byte * p, Key key, bool leaf, int c);
00765
00769 static uint4 block_given_by(const byte * p, int c);
00770
00771 mutable Cursor C[BTREE_CURSOR_LEVELS];
00772
00778 byte * split_p;
00779
00782 int compress_strategy;
00783
00785 mutable z_stream *deflate_zstream;
00786
00788 mutable z_stream *inflate_zstream;
00789
00791 bool lazy;
00792
00793
00794
00795
00797 XAPIAN_NORETURN(static void throw_database_closed());
00798 };
00799
00800 #endif