xapian-core
1.4.27
|
Class managing a Btree table in a Chert database. More...
#include <chert_table.h>
Public Member Functions | |
ChertTable (const char *tablename_, const std::string &path_, bool readonly_, int compress_strategy_=DONT_COMPRESS, bool lazy=false) | |
Create a new Btree object. More... | |
~ChertTable () | |
Close the Btree. More... | |
void | close (bool permanent=false) |
Close the Btree. More... | |
bool | readahead_key (const string &key) const |
bool | exists () const |
Determine whether the btree exists on disk. More... | |
void | open () |
Open the btree at the latest revision. More... | |
bool | open (chert_revision_number_t revision_) |
Open the btree at a given revision. More... | |
bool | is_open () const |
Return true if this table is open. More... | |
void | flush_db () |
Flush any outstanding changes to the DB file of the table. More... | |
void | commit (chert_revision_number_t revision, int changes_fd=-1, const std::string *changes_tail=NULL) |
Commit any outstanding changes to the table. More... | |
void | write_changed_blocks (int changes_fd) |
Append the list of blocks changed to a changeset file. More... | |
void | cancel () |
Cancel any outstanding changes. More... | |
bool | get_exact_entry (const std::string &key, std::string &tag) const |
Read an entry from the table, if and only if it is exactly that being asked for. More... | |
bool | key_exists (const std::string &key) const |
Check if a key exists in the Btree. More... | |
bool | read_tag (Cursor *C_, std::string *tag, bool keep_compressed) const |
Read the tag value for the key pointed to by cursor C_. More... | |
void | add (const std::string &key, std::string tag, bool already_compressed=false) |
Add a key/tag pair to the table, replacing any existing pair with the same key. More... | |
bool | del (const std::string &key) |
Delete an entry from the table. More... | |
void | erase () |
Erase this table from disk. More... | |
void | set_block_size (unsigned int block_size_) |
Set the block size. More... | |
unsigned int | get_block_size () const |
Get the block size. More... | |
void | create_and_open (unsigned int blocksize) |
Create a new empty btree structure on disk and open it at the initial revision. More... | |
void | set_full_compaction (bool parity) |
chert_revision_number_t | get_latest_revision_number () const |
Get the latest revision number stored in this table. More... | |
chert_revision_number_t | get_open_revision_number () const |
Get the revision number at which this table is currently open. More... | |
chert_tablesize_t | get_entry_count () const |
Return a count of the number of entries in the table. More... | |
bool | empty () const |
Return true if there are no entries in the table. More... | |
ChertCursor * | cursor_get () const |
Get a cursor for reading from the table. More... | |
bool | is_modified () const |
Determine whether the object contains uncommitted modifications. More... | |
void | set_max_item_size (size_t block_capacity) |
Set the maximum item size given the block capacity. More... | |
string | get_path () const |
Static Public Member Functions | |
static void | throw_database_closed () |
Throw an exception indicating that the database is closed. More... | |
Protected Member Functions | |
bool | do_open_to_read (bool revision_supplied, chert_revision_number_t revision_) |
Perform the opening operation to read. More... | |
bool | do_open_to_write (bool revision_supplied, chert_revision_number_t revision_, bool create_db=false) |
Perform the opening operation to write. More... | |
bool | basic_open (bool revision_supplied, chert_revision_number_t revision) |
bool | find (Cursor *) const |
find(C_) searches for the key of B->kt in the B-tree. More... | |
int | delete_kt () |
void | read_block (uint4 n, uint8_t *p) const |
read_block(n, p) reads block n of the DB file to address p. More... | |
void | write_block (uint4 n, const uint8_t *p) const |
write_block(n, p) writes block n in the DB file from address p. More... | |
void | set_overwritten () const |
void | block_to_cursor (Cursor *C_, int j, uint4 n) const |
void | alter () |
Btree::alter(); is called when the B-tree is to be altered. More... | |
void | compact (uint8_t *p) |
compact(p) compact the block at p by shuffling all the items up to the end. More... | |
void | enter_key (int j, Key prevkey, Key newkey) |
enter_key(j, prevkey, newkey) is called after a block split. More... | |
int | mid_point (uint8_t *p) const |
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in the block at p. More... | |
void | add_item_to_block (uint8_t *p, Item_wr kt, int c) |
add_item_to_block(p, kt_, c) adds item kt_ to the block at p. More... | |
void | add_item (Item_wr kt, int j) |
ChertTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j]. More... | |
void | delete_item (int j, bool repeatedly) |
ChertTable::delete_item(j, repeatedly) is (almost) the converse of add_item. More... | |
int | add_kt (bool found) |
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C. More... | |
void | read_root () |
void | split_root (uint4 split_n) |
Btree needs to gain a new level to insert more items: so split root block and construct a new one. More... | |
void | form_key (const std::string &key) const |
char | other_base_letter () const |
void | lazy_alloc_deflate_zstream () const |
Allocate the zstream for deflating, if not already allocated. More... | |
void | lazy_alloc_inflate_zstream () const |
Allocate the zstream for inflating, if not already allocated. More... | |
bool | prev (Cursor *C_, int j) const |
bool | next (Cursor *C_, int j) const |
bool | prev_default (Cursor *C_, int j) const |
bool | next_default (Cursor *C_, int j) const |
bool | prev_for_sequential (Cursor *C_, int dummy) const |
bool | next_for_sequential (Cursor *C_, int dummy) const |
Static Protected Member Functions | |
static int | find_in_block (const uint8_t *p, Key key, bool leaf, int c) |
find_in_block(p, key, leaf, c) searches for the key in the block at p. More... | |
static uint4 | block_given_by (const uint8_t *p, int c) |
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value as an integer. More... | |
Protected Attributes | |
const char * | tablename |
The name of the table (used when writing changesets). More... | |
chert_revision_number_t | revision_number |
revision number of the opened B-tree. More... | |
chert_tablesize_t | item_count |
keeps a count of the number of items in the B-tree. More... | |
unsigned int | block_size |
block size of the B tree in bytes More... | |
chert_revision_number_t | latest_revision_number |
Revision number of the other base, or zero if there is only one base file. More... | |
bool | both_bases |
set to true if baseA and baseB both exist as valid bases. More... | |
char | base_letter |
the value 'A' or 'B' of the current base More... | |
bool | faked_root_block |
true if the root block is faked (not written to disk). More... | |
bool | sequential |
true iff the data has been written in a single write in sequential order. More... | |
int | handle |
File descriptor of the table. More... | |
int | level |
number of levels, counting from 0 More... | |
uint4 | root |
the root block of the B-tree More... | |
Item_wr | kt |
buffer of size block_size for making up key-tag items More... | |
uint8_t * | buffer |
buffer of size block_size for reforming blocks More... | |
ChertTable_base | base |
For writing back as file baseA or baseB. More... | |
std::string | name |
The path name of the B tree. More... | |
int | seq_count |
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POINT (neg) and going up to zero. More... | |
uint4 | changed_n |
the last block to be changed by an addition More... | |
int | changed_c |
directory offset corresponding to last block to be changed by an addition More... | |
size_t | max_item_size |
maximum size of an item (key-tag pair) More... | |
bool | Btree_modified |
Set to true the first time the B-tree is modified. More... | |
bool | full_compaction |
set to true when full compaction is to be achieved More... | |
bool | writable |
Set to true when the database is opened to write. More... | |
bool | cursor_created_since_last_modification |
Flag for tracking when cursors need to rebuild. More... | |
unsigned long | cursor_version |
Version count for tracking when cursors need to rebuild. More... | |
Cursor | C [BTREE_CURSOR_LEVELS] |
uint8_t * | split_p |
Buffer used when splitting a block. More... | |
int | compress_strategy |
DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY, Z_RLE. More... | |
z_stream * | deflate_zstream |
Zlib state object for deflating. More... | |
z_stream * | inflate_zstream |
Zlib state object for inflating. More... | |
bool | lazy |
If true, don't create the table until it's needed. More... | |
uint4 | last_readahead |
Last block readahead_key() preread. More... | |
Private Member Functions | |
ChertTable (const ChertTable &) | |
Copying not allowed. More... | |
ChertTable & | operator= (const ChertTable &) |
Assignment not allowed. More... | |
bool | really_empty () const |
Return true if there are no entries in the table. More... | |
Friends | |
class | ChertCursor |
Class managing a Btree table in a Chert database.
A table is a store holding a set of key/tag pairs.
A key is used to access a block of data in a chert table.
Keys are of limited length.
Keys may not be empty (each Btree has a special empty key for internal use).
A tag is a piece of data associated with a given key. The contents of the tag are opaque to the Btree.
Tags may be of arbitrary length (the Btree imposes a very large limit). Note though that they will be loaded into memory in their entirety, so should not be permitted to grow without bound in normal usage.
Tags which are null strings are valid, and are different from a tag simply not being in the table.
Definition at line 347 of file chert_table.h.
|
private |
Copying not allowed.
ChertTable::ChertTable | ( | const char * | tablename_, |
const std::string & | path_, | ||
bool | readonly_, | ||
int | compress_strategy_ = DONT_COMPRESS , |
||
bool | lazy = false |
||
) |
Create a new Btree object.
This does not create the table on disk - the create_and_open() method must be called to create the table on disk.
This also does not open the table - either the create_and_open() or open() methods must be called before use is made of the table.
tablename_ | The name of the table (used in changesets). |
path_ | Path at which the table is stored. |
readonly_ | whether to open the table for read only access. |
compress_strategy_ | DONT_COMPRESS, Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY, or Z_RLE. |
lazy | If true, don't create the table until it's needed. |
Definition at line 1562 of file chert_table.cc.
References LOGCALL_CTOR.
ChertTable::~ChertTable | ( | ) |
Close the Btree.
Any outstanding changes (ie, changes made without commit() having subsequently been called) will be lost.
Definition at line 1754 of file chert_table.cc.
References close(), deflate_zstream, inflate_zstream, and LOGCALL_DTOR.
void ChertTable::add | ( | const std::string & | key, |
std::string | tag, | ||
bool | already_compressed = false |
||
) |
Add a key/tag pair to the table, replacing any existing pair with the same key.
If an error occurs during the operation, an exception will be thrown.
If key is empty, then the null item is replaced.
e.g. btree.add("TODAY", "Mon 9 Oct 2000");
key | The key to store in the table. |
tag | The tag to store in the table. |
already_compressed | true if tag is already compressed, for example because it is being opaquely copied (default: false). |
Definition at line 978 of file chert_table.cc.
References Assert, BYTE_PAIR_RANGE, C, C2, COMPRESS_MIN, D2, DONT_COMPRESS, I2, K1, LOGCALL_VOID, and TOTAL_FREE().
Referenced by Chert::PostlistChunkWriter::flush(), ChertCompact::merge_docid_keyed(), ChertCompact::merge_postlists(), ChertCompact::merge_spellings(), ChertCompact::merge_synonyms(), ChertRecordTable::replace_record(), ChertWritableDatabase::set_metadata(), ChertDatabaseStats::write(), and ValueUpdater::write_tag().
|
protected |
ChertTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
If there is not enough room the block splits and the item is then added to the appropriate half.
Definition at line 672 of file chert_table.cc.
References Assert, AssertRel, C, D2, DIR_END(), DIR_START, LOGCALL_VOID, MAX_FREE(), SET_DIR_END(), Item_base< T >::size(), and TOTAL_FREE().
|
protected |
add_item_to_block(p, kt_, c) adds item kt_ to the block at p.
c is the offset in the directory that needs to be expanded to accommodate the new entry for the item. We know before this is called that there is enough contiguous room for the item in the block, so it's just a matter of shuffling up any directory entries after where we're inserting and copying in the item.
Definition at line 636 of file chert_table.cc.
References Assert, AssertRel, D2, DIR_END(), DIR_START, Item_base< T >::get_address(), LOGCALL_VOID, MAX_FREE(), SET_DIR_END(), SET_MAX_FREE(), SET_TOTAL_FREE(), setD(), Item_base< T >::size(), and TOTAL_FREE().
|
protected |
add_kt(found) adds the item (key-tag pair) at B->kt into the B-tree, using cursor C.
found == find() is handed over as a parameter from Btree::add. Btree::alter() prepares for the alteration to the B-tree. Then there are a number of cases to consider:
If an item with the same key is in the B-tree (found is true), the new kt replaces it.
If then kt is smaller, or the same size as, the item it replaces, kt is put in the same place as the item it replaces, and the TOTAL_FREE measure is reduced.
If kt is larger than the item it replaces it is put in the MAX_FREE space if there is room, and the directory entry and space counts are adjusted accordingly.
If the key of kt is not in the B-tree (found is false), the new kt is added in with add_item.
Definition at line 846 of file chert_table.cc.
References Assert, AssertRel, C, Item_base< T >::components_of(), D2, DIR_END(), DIR_START, Item_base< T >::get_address(), LOGCALL, MAX_FREE(), RETURN, SEQ_START_POINT, SET_MAX_FREE(), SET_TOTAL_FREE(), setD(), Item_base< T >::size(), and TOTAL_FREE().
|
protected |
Btree::alter(); is called when the B-tree is to be altered.
It causes new blocks to be forced for the current set of blocks in the cursor.
The point is that if a block at level 0 is to be altered it may get a new number. Then the pointer to this block from level 1 will need changing. So the block at level 1 needs altering and may get a new block number. Then the pointer to this block from level 2 will need changing ... and so on back to the root.
The clever bit here is spotting the cases when we can make an early exit from this process. If C[j].rewrite is true, C[j+k].rewrite will be true for k = 1,2 ... We have been through all this before, and there is no need to do it again. If C[j].n was free at the start of the transaction, we can copy it back to the same place without violating the integrity of the B-tree. We don't then need a new n and can return. The corresponding C[j].rewrite may be true or false in that case.
Definition at line 338 of file chert_table.cc.
References Assert, C, LOGCALL_VOID, REVISION(), Item_wr::set_block_given_by(), and SET_REVISION().
|
protected |
Definition at line 1333 of file chert_table.cc.
References Assert, BLOCK_CAPACITY, BTREE_CURSOR_LEVELS, close(), ChertTable_base::get_revision(), LOGCALL, LOGLINE, name, rare, ChertTable_base::read(), RETURN, and zeroed_new().
Referenced by do_open_to_read().
|
staticprotected |
block_given_by(p, c) finds the item at block address p, directory offset c, and returns its tag value as an integer.
Referenced by next_default(), and prev_default().
Definition at line 272 of file chert_table.cc.
References Assert, C, GET_LEVEL(), LOGCALL_VOID, Cursor::n, Cursor::p, rare, REVISION(), Cursor::rewrite, and Xapian::Internal::str().
Referenced by next_default(), and prev_default().
void ChertTable::cancel | ( | ) |
Cancel any outstanding changes.
This will discard any modifications which haven't been committed by calling commit().
Definition at line 1953 of file chert_table.cc.
References Assert, base, base_letter, BLK_UNUSED, block_size, BTREE_CURSOR_LEVELS, Btree_modified, C, changed_c, changed_n, cursor_created_since_last_modification, cursor_version, DIR_START, faked_root_block, ChertTable_base::get_block_size(), ChertTable_base::get_have_fakeroot(), ChertTable_base::get_item_count(), ChertTable_base::get_level(), ChertTable_base::get_revision(), ChertTable_base::get_root(), ChertTable_base::get_sequential(), handle, item_count, latest_revision_number, level, LOGCALL_VOID, Cursor::n, rare, ChertTable_base::read(), read_root(), revision_number, Cursor::rewrite, root, seq_count, SEQ_START_POINT, sequential, throw_database_closed(), and writable.
Referenced by ChertSynonymTable::cancel(), ChertSpellingTable::cancel(), and ChertDatabase::cancel().
void ChertTable::close | ( | bool | permanent = false | ) |
Close the Btree.
This closes and frees any of the btree structures which have been created and opened.
permanent | If true, the Btree will not reopen on demand. |
Definition at line 1773 of file chert_table.cc.
References buffer, C, Item_base< T >::get_address(), handle, kt, level, LOGCALL_VOID, Cursor::p, and split_p.
Referenced by ChertDatabase::close(), commit(), create_and_open(), do_open_to_read(), erase(), open(), and ~ChertTable().
void ChertTable::commit | ( | chert_revision_number_t | revision, |
int | changes_fd = -1 , |
||
const std::string * | changes_tail = NULL |
||
) |
Commit any outstanding changes to the table.
Commit changes made by calling add() and del() to the Btree.
If an error occurs during the operation, this will be signalled by an exception. In case of error, changes made will not be committed to the Btree - they will be discarded.
new_revision | The new revision number to store. This must be greater than the latest revision number (see get_latest_revision_number()), or an exception will be thrown. |
changes_fd | The file descriptor to write changes to. Defaults to -1, meaning no changes will be written. |
Definition at line 1826 of file chert_table.cc.
References Assert, base, base_letter, BLK_UNUSED, both_bases, BTREE_CURSOR_LEVELS, Btree_modified, Cursor::c, C, changed_c, changed_n, ChertTable_base::clear_bit_map(), close(), ChertTable_base::commit(), DIR_START, faked_root_block, handle, io_full_sync(), io_sync(), io_tmp_rename(), item_count, latest_revision_number, level, LOGCALL_VOID, Cursor::n, name, other_base_letter(), read_root(), Xapian::revision(), revision_number, Cursor::rewrite, root, seq_count, SEQ_START_POINT, sequential, ChertTable_base::set_have_fakeroot(), ChertTable_base::set_item_count(), ChertTable_base::set_level(), ChertTable_base::set_revision(), ChertTable_base::set_root(), ChertTable_base::set_sequential(), tablename, throw_database_closed(), writable, and ChertTable_base::write_to_file().
Referenced by ChertDatabase::compact(), ChertCompact::multimerge_postlists(), and ChertDatabase::set_revision_number().
|
protected |
compact(p) compact the block at p by shuffling all the items up to the end.
MAX_FREE(p) is then maximized, and is equal to TOTAL_FREE(p).
Definition at line 469 of file chert_table.cc.
References Assert, D2, DIR_END(), DIR_START, Item_base< T >::get_address(), LOGCALL_VOID, SET_MAX_FREE(), SET_TOTAL_FREE(), setD(), and Item_base< T >::size().
void ChertTable::create_and_open | ( | unsigned int | blocksize | ) |
Create a new empty btree structure on disk and open it at the initial revision.
The table must be writable - it doesn't make sense to create a table that is read-only!
The block size must be less than 64K, where K = 1024. It is unwise to use a small block size (less than 1024 perhaps), so we enforce a minimum block size of 2K.
Example:
Btree btree("X-"); btree.create_and_open(8192); // Files will be X-DB, X-baseA (and X-baseB).
blocksize | - Size of blocks to use. |
Xapian::DatabaseCreateError | if the table can't be created. |
Xapian::InvalidArgumentError | if the requested blocksize is unsuitable. |
Definition at line 1719 of file chert_table.cc.
References Assert, block_size, close(), do_open_to_write(), handle, io_unlink(), LOGCALL_VOID, revision_number, ChertTable_base::set_block_size(), set_block_size(), ChertTable_base::set_have_fakeroot(), ChertTable_base::set_revision(), ChertTable_base::set_sequential(), throw_database_closed(), writable, and ChertTable_base::write_to_file().
Referenced by ChertDatabase::compact(), ChertTermListTable::create_and_open(), ChertDatabase::create_and_open_tables(), and ChertCompact::multimerge_postlists().
ChertCursor * ChertTable::cursor_get | ( | ) | const |
Get a cursor for reading from the table.
The cursor is owned by the caller - it is the caller's responsibility to ensure that it is deleted.
Definition at line 1318 of file chert_table.cc.
References LOGCALL, RETURN, and throw_database_closed().
Referenced by check_chert_table(), Chert::PostlistChunkWriter::flush(), ChertDatabase::get_postlist_cursor(), ChertAllTermsList::next(), ChertDatabase::open_metadata_keylist(), ChertDatabase::open_spelling_wordlist(), ChertDatabase::open_synonym_keylist(), ChertAllTermsList::skip_to(), and ValueUpdater::update().
bool ChertTable::del | ( | const std::string & | key | ) |
Delete an entry from the table.
The entry will be removed from the table, if it exists. If it does not exist, no action will be taken. The item with an empty key can't be removed, and false is returned.
If an error occurs during the operation, this will be signalled by an exception.
e.g. bool deleted = btree.del("TODAY")
key | The key to remove from the table. |
Definition at line 1105 of file chert_table.cc.
References Assert, CHERT_BTREE_MAX_KEY_LEN, LOGCALL, RETURN, and throw_database_closed().
Referenced by ChertRecordTable::delete_record(), Chert::PostlistChunkWriter::flush(), ChertWritableDatabase::set_metadata(), and ValueUpdater::write_tag().
|
protected |
ChertTable::delete_item(j, repeatedly) is (almost) the converse of add_item.
If repeatedly is true, the process repeats at the next level when a block has been completely emptied, freeing the block and taking out the pointer to it. Emptied root blocks are also removed, which reduces the number of levels in the B-tree.
Definition at line 771 of file chert_table.cc.
References Assert, AssertRel, BLK_UNUSED, Item_base< T >::block_given_by(), C, D2, DIR_END(), DIR_START, LOGCALL_VOID, MAX_FREE(), SET_DIR_END(), SET_MAX_FREE(), SET_TOTAL_FREE(), Item_base< T >::size(), and TOTAL_FREE().
|
protected |
Definition at line 914 of file chert_table.cc.
References Assert, C, Item_base< T >::components_of(), LOGCALL, RETURN, and SEQ_START_POINT.
|
protected |
Perform the opening operation to read.
Return true iff the open succeeded.
Definition at line 2007 of file chert_table.cc.
References basic_open(), BLK_UNUSED, block_size, C, close(), errno_to_string(), handle, io_open_block_rd(), lazy, level, LOGCALL, Cursor::n, name, Cursor::p, read_root(), RETURN, revision_number, and throw_database_closed().
Referenced by open().
|
protected |
Perform the opening operation to write.
Return true iff the open succeeded.
Definition at line 1509 of file chert_table.cc.
References BLK_UNUSED, C, close(), DIR_START, errno_to_string(), io_open_block_wr(), LOGCALL, name, RETURN, SEQ_START_POINT, throw_database_closed(), and zeroed_new().
Referenced by create_and_open(), and open().
|
inline |
Return true if there are no entries in the table.
Definition at line 628 of file chert_table.h.
References ChertCursor::level.
Referenced by ChertDatabase::has_positions(), ChertCompact::merge_docid_keyed(), ChertCompact::merge_postlists(), ChertCompact::merge_spellings(), and ChertCompact::merge_synonyms().
enter_key(j, prevkey, newkey) is called after a block split.
It enters in the block at level C[j] a separating key for the block at level C[j - 1]. The key itself is newkey. prevkey is the preceding key, and at level 1 newkey can be trimmed down to the first point of difference to prevkey for entry in C[j].
This code looks longer than it really is. If j exceeds the number of B-tree levels the root block has split and we have to construct a new one, but this is a rare event.
The key is constructed in b, with block number C[j - 1].n as tag, and this is added in with add_item. add_item may itself cause a block split, with a further call to enter_key. Hence the recursion.
Definition at line 538 of file chert_table.cc.
References Assert, AssertEq, AssertRel, C, C2, D2, Item_wr::form_null_key(), Key::get_address(), getint4(), I2, K1, Item_base< T >::key(), Key::length(), LOGCALL_VOID, Item_wr::set_key_and_block(), SET_TOTAL_FREE(), and TOTAL_FREE().
void ChertTable::erase | ( | ) |
Erase this table from disk.
Definition at line 1696 of file chert_table.cc.
References close(), io_unlink(), and LOGCALL_VOID.
Referenced by ChertDatabase::compact(), and ChertLazyTable::create_and_open().
bool ChertTable::exists | ( | ) | const |
Determine whether the btree exists on disk.
Definition at line 1689 of file chert_table.cc.
References file_exists(), LOGCALL, and RETURN.
Referenced by ChertDatabase::database_exists().
|
protected |
find(C_) searches for the key of B->kt in the B-tree.
Result is true if found, false otherwise. When false, the B_tree cursor is positioned at the last key in the B-tree <= the search key. Goes to first (null) item in B-tree when key length == 0.
Note: The cursor can be left with C_[0].c == DIR_START - D2 if the requested key doesn't exist and is less than the smallest key in a leaf block, but after the dividing key. The caller needs to fix up C_[0].c in this case, either explicitly or by performing an operation which gives C_[0].c a valid value.
Definition at line 433 of file chert_table.cc.
|
staticprotected |
find_in_block(p, key, leaf, c) searches for the key in the block at p.
leaf is true for a data block, and false for an index block (when the first key is dummy and never needs to be tested). What we get is the directory entry to the last key <= the key being searched for.
The lookup is by binary chop, with i and j set to the left and right ends of the search area. In sequential addition, c will often be the answer, so we test the keys round c and move i and j towards c if possible.
The returned value is < DIR_END(p). If leaf is false, the returned value is >= DIR_START; if leaf is true, it can also be == DIR_START - D2.
Definition at line 386 of file chert_table.cc.
References Assert, AssertRel, D2, DIR_END(), DIR_START, Key::get_address(), Item_base< T >::key(), LOGCALL_STATIC, and RETURN.
void ChertTable::flush_db | ( | ) |
Flush any outstanding changes to the DB file of the table.
This must be called before commit, to ensure that the DB file is ready to be switched to a new version by the commit.
Definition at line 1803 of file chert_table.cc.
References Assert, Btree_modified, C, faked_root_block, handle, level, LOGCALL_VOID, throw_database_closed(), writable, and write_block().
Referenced by ChertDatabase::compact(), ChertSynonymTable::flush_db(), ChertSpellingTable::flush_db(), ChertCompact::multimerge_postlists(), and ChertDatabase::set_revision_number().
|
protected |
Definition at line 948 of file chert_table.cc.
References LOGCALL_VOID.
|
inline |
Get the block size.
Definition at line 559 of file chert_table.h.
Referenced by ChertDatabase::open_tables(), and ChertDatabase::open_tables_consistent().
|
inline |
Return a count of the number of entries in the table.
The count does not include the ever-present item with null key.
Use empty() if you only want to know if the table is empty or not.
Definition at line 623 of file chert_table.h.
Referenced by ChertTableCheck::check(), check_chert_table(), and ChertRecordTable::get_doccount().
bool ChertTable::get_exact_entry | ( | const std::string & | key, |
std::string & | tag | ||
) | const |
Read an entry from the table, if and only if it is exactly that being asked for.
If the key is found in the table, then the tag is copied to tag. If the key is not found tag is left unchanged.
The result is true iff the specified key is found in the Btree.
key | The key to look for in the table. |
tag | A tag object to fill with the value if found. |
Definition at line 1187 of file chert_table.cc.
References Assert, C, CHERT_BTREE_MAX_KEY_LEN, LOGCALL, RETURN, and throw_database_closed().
Referenced by ChertTermList::ChertTermList(), Chert::PostlistChunkWriter::flush(), ChertPostListTable::get_freqs(), ChertDatabase::get_metadata(), ChertRecordTable::get_record(), ChertDatabaseStats::read(), and ChertPositionList::read_data().
|
inline |
Get the latest revision number stored in this table.
This gives the higher of the revision numbers held in the base files of the B-tree, or just the revision number if there's only one base file.
It is possible that there are other, older, revisions of this table available, and indeed that the revision currently open is one of these older revisions.
Definition at line 598 of file chert_table.h.
Referenced by ChertDatabase::ChertDatabase(), and ChertDatabase::get_next_revision_number().
|
inline |
Get the revision number at which this table is currently open.
It is possible that there are other, more recent or older revisions available.
Definition at line 610 of file chert_table.h.
Referenced by ChertDatabaseReplicator::apply_changeset_from_conn(), ChertTableCheck::check(), ChertDatabase::ChertDatabase(), ChertDatabase::create_and_open_tables(), ChertDatabase::get_revision_number(), and ChertDatabase::open_tables_consistent().
|
inline |
Definition at line 672 of file chert_table.h.
Referenced by ChertDatabase::compact().
|
inline |
Determine whether the object contains uncommitted modifications.
Definition at line 653 of file chert_table.h.
Referenced by ChertDatabase::apply(), ChertWritableDatabase::has_uncommitted_changes(), ChertSynonymTable::is_modified(), and ChertSpellingTable::is_modified().
|
inline |
Return true if this table is open.
NB If the table is lazy and doesn't yet exist, returns false.
Definition at line 433 of file chert_table.h.
References ChertCursor::read_tag(), and Xapian::revision().
Referenced by ChertWritableDatabase::add_document_(), ChertWritableDatabase::delete_document(), ChertDatabase::open_term_list(), ChertWritableDatabase::replace_document(), and ChertDatabase::throw_termlist_table_close_exception().
bool ChertTable::key_exists | ( | const std::string & | key | ) | const |
Check if a key exists in the Btree.
This is just like get_exact_entry() except it doesn't read the tag value so is more efficient if you only want to check that the key exists.
key | The key to look for in the table. |
Definition at line 1210 of file chert_table.cc.
References Assert, C, CHERT_BTREE_MAX_KEY_LEN, LOGCALL, and RETURN.
|
protected |
Allocate the zstream for deflating, if not already allocated.
Definition at line 1614 of file chert_table.cc.
References compress_strategy, deflate_zstream, rare, Xapian::Internal::str(), and usual.
|
protected |
Allocate the zstream for inflating, if not already allocated.
Definition at line 1652 of file chert_table.cc.
References inflate_zstream, rare, Xapian::Internal::str(), and usual.
|
protected |
mid_point(p) finds the directory entry in c that determines the approximate mid point of the data in the block at p.
Definition at line 604 of file chert_table.cc.
References Assert, D2, DIR_END(), DIR_START, LOGCALL, RETURN, Item_base< T >::size(), and TOTAL_FREE().
|
inlineprotected |
Definition at line 821 of file chert_table.h.
References dummy.
Referenced by ChertSynonymTermList::ChertSynonymTermList(), and Chert::PostlistChunkReader::is_at_end().
|
protected |
Definition at line 2226 of file chert_table.cc.
References AssertRel, block_given_by(), block_size, block_to_cursor(), Cursor::c, D2, DIR_END(), DIR_START, level, LOGCALL, Cursor::p, and RETURN.
|
protected |
Definition at line 2149 of file chert_table.cc.
References Assert, AssertEq, AssertRel, base, block_size, Cursor::c, C, D2, DIR_END(), DIR_START, ChertTable_base::get_last_block(), GET_LEVEL(), latest_revision_number, level, LOGCALL, Cursor::n, Cursor::p, read_block(), RETURN, REVISION(), revision_number, set_overwritten(), and writable.
void ChertTable::open | ( | ) |
Open the btree at the latest revision.
Xapian::DatabaseCorruptError | will be thrown if the table is in a corrupt state. |
Xapian::DatabaseOpeningError | will be thrown if the table cannot be opened (but is not corrupt - eg, permission problems, not present, etc). |
Definition at line 2050 of file chert_table.cc.
References close(), do_open_to_read(), do_open_to_write(), LOGCALL_VOID, LOGLINE, and writable.
Referenced by ChertDatabaseReplicator::apply_changeset_from_conn(), ChertTableCheck::check(), check_chert_table(), ChertPostListTable::open(), ChertDatabase::open_tables(), and ChertDatabase::open_tables_consistent().
bool ChertTable::open | ( | chert_revision_number_t | revision_ | ) |
Open the btree at a given revision.
Like Btree::open, but try to open at the given revision number and fail if that isn't possible.
revision_ | - revision number to open. |
Xapian::DatabaseCorruptError | will be thrown if the table is in a corrupt state. |
Xapian::DatabaseOpeningError | will be thrown if the table cannot be opened (but is not corrupt - eg, permission problems, not present, etc). |
Definition at line 2067 of file chert_table.cc.
References AssertEq, close(), do_open_to_read(), do_open_to_write(), LOGCALL, LOGLINE, RETURN, revision_number, and writable.
|
private |
Assignment not allowed.
|
inlineprotected |
Definition at line 711 of file chert_table.h.
Referenced by commit().
|
inlineprotected |
Definition at line 816 of file chert_table.h.
|
protected |
Definition at line 2203 of file chert_table.cc.
References AssertRel, block_given_by(), block_size, block_to_cursor(), Cursor::c, D2, DIR_END(), DIR_START, level, LOGCALL, Cursor::p, and RETURN.
|
protected |
Definition at line 2094 of file chert_table.cc.
References Assert, AssertEq, AssertRel, block_size, Cursor::c, C, D2, DIR_END(), DIR_START, GET_LEVEL(), latest_revision_number, level, LOGCALL, Cursor::n, Cursor::p, read_block(), RETURN, REVISION(), revision_number, set_overwritten(), and writable.
|
protected |
read_block(n, p) reads block n of the DB file to address p.
Definition at line 170 of file chert_table.cc.
References Assert, DIR_END(), DIR_START, io_read_block(), LOGCALL_VOID, rare, Xapian::Internal::str(), and throw_database_closed().
Referenced by next_for_sequential(), prev_for_sequential(), and write_changed_blocks().
|
protected |
Definition at line 1465 of file chert_table.cc.
References Assert, C, C2, D2, DIR_START, Item_wr::fake_root_item(), I2, K1, LOGCALL_VOID, REVISION(), SET_DIR_END(), SET_LEVEL(), SET_MAX_FREE(), SET_REVISION(), SET_TOTAL_FREE(), and setD().
Referenced by cancel(), commit(), and do_open_to_read().
bool ChertTable::read_tag | ( | Cursor * | C_, |
std::string * | tag, | ||
bool | keep_compressed | ||
) | const |
Read the tag value for the key pointed to by cursor C_.
keep_compressed | Don't uncompress the tag - e.g. useful if it's just being opaquely copied. |
Definition at line 1223 of file chert_table.cc.
References aligned_write4(), Item_base< T >::append_chunk(), C2, Item_base< T >::components_of(), Item_base< T >::get_compressed(), I2, K1, LOGCALL, LOGLINE, RETURN, and Xapian::Internal::str().
bool ChertTable::readahead_key | ( | const string & | key | ) | const |
Definition at line 1141 of file chert_table.cc.
References Assert, Item_base< T >::block_given_by(), C, CHERT_BTREE_MAX_KEY_LEN, io_readahead_block(), LOGCALL, and RETURN.
Referenced by ChertDatabase::readahead_for_query(), and ChertRecordTable::readahead_for_record().
|
private |
Return true if there are no entries in the table.
Definition at line 1600 of file chert_table.cc.
References ChertCursor::find_entry(), handle, ChertCursor::next(), and throw_database_closed().
void ChertTable::set_block_size | ( | unsigned int | block_size_ | ) |
Set the block size.
It's only safe to do this before the table is created.
Definition at line 1707 of file chert_table.cc.
References block_size, BYTE_PAIR_RANGE, CHERT_DEFAULT_BLOCK_SIZE, and LOGCALL_VOID.
Referenced by ChertDatabase::compact(), ChertLazyTable::create_and_open(), create_and_open(), ChertDatabase::open_tables(), and ChertDatabase::open_tables_consistent().
void ChertTable::set_full_compaction | ( | bool | parity | ) |
Definition at line 1309 of file chert_table.cc.
References Assert, and LOGCALL_VOID.
Referenced by ChertDatabase::compact().
|
inline |
Set the maximum item size given the block capacity.
At least this many items of maximum size must fit into a block. The default is BLOCK_CAPACITY (which is currently 4).
Definition at line 660 of file chert_table.h.
References BLOCK_CAPACITY, CHERT_MAX_ITEM_SIZE, D2, DIR_START, and throw_database_closed().
Referenced by ChertDatabase::compact().
|
protected |
Definition at line 251 of file chert_table.cc.
References LOGCALL_VOID.
Referenced by next_for_sequential(), and prev_for_sequential().
|
protected |
Btree needs to gain a new level to insert more items: so split root block and construct a new one.
Definition at line 493 of file chert_table.cc.
References BTREE_CURSOR_LEVELS, C, DIR_START, Item_wr::form_null_key(), LOGCALL_VOID, SET_DIR_END(), SET_LEVEL(), SET_REVISION(), STRINGIZE, and zeroed_new().
|
static |
Throw an exception indicating that the database is closed.
Definition at line 2257 of file chert_table.cc.
Referenced by cancel(), commit(), create_and_open(), cursor_get(), del(), do_open_to_read(), do_open_to_write(), flush_db(), ChertValueManager::get_all_values(), get_exact_entry(), read_block(), really_empty(), and ChertDatabase::throw_termlist_table_close_exception().
|
protected |
write_block(n, p) writes block n in the DB file from address p.
When writing we check to see if the DB file has already been modified. If not (so this is the first write) the old base is deleted. This prevents the possibility of it being opened subsequently as an invalid base.
Definition at line 199 of file chert_table.cc.
References Assert, AssertEqParanoid, AssertParanoid, io_unlink(), io_write_block(), LOGCALL_VOID, and REVISION().
Referenced by flush_db().
void ChertTable::write_changed_blocks | ( | int | changes_fd | ) |
Append the list of blocks changed to a changeset file.
changes_fd | The file descriptor to write changes to. |
Definition at line 1909 of file chert_table.cc.
References Assert, base, block_size, ChertTable_base::calculate_last_block(), faked_root_block, ChertTable_base::find_changed_block(), handle, io_write(), LOGCALL_VOID, pack_string(), pack_uint(), read_block(), and tablename.
Referenced by ChertDatabase::set_revision_number().
|
friend |
Definition at line 348 of file chert_table.h.
|
protected |
For writing back as file baseA or baseB.
Definition at line 780 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), commit(), next_for_sequential(), and write_changed_blocks().
|
protected |
the value 'A' or 'B' of the current base
Definition at line 745 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), and commit().
|
protected |
block size of the B tree in bytes
Definition at line 731 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), create_and_open(), do_open_to_read(), next_default(), next_for_sequential(), prev_default(), prev_for_sequential(), set_block_size(), and write_changed_blocks().
|
mutableprotected |
set to true if baseA and baseB both exist as valid bases.
The unused base is deleted as soon as a write to the Btree takes place.
Definition at line 742 of file chert_table.h.
Referenced by commit().
|
mutableprotected |
Set to true the first time the B-tree is modified.
Definition at line 801 of file chert_table.h.
Referenced by cancel(), commit(), and flush_db().
|
protected |
buffer of size block_size for reforming blocks
Definition at line 777 of file chert_table.h.
Referenced by close().
|
mutableprotected |
Definition at line 841 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), close(), commit(), do_open_to_read(), flush_db(), next_for_sequential(), and prev_for_sequential().
|
protected |
directory offset corresponding to last block to be changed by an addition
Definition at line 795 of file chert_table.h.
|
protected |
the last block to be changed by an addition
Definition at line 791 of file chert_table.h.
|
protected |
DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY, Z_RLE.
Definition at line 852 of file chert_table.h.
Referenced by lazy_alloc_deflate_zstream().
|
mutableprotected |
Flag for tracking when cursors need to rebuild.
Definition at line 810 of file chert_table.h.
Referenced by cancel().
|
protected |
Version count for tracking when cursors need to rebuild.
Definition at line 813 of file chert_table.h.
Referenced by cancel().
|
mutableprotected |
Zlib state object for deflating.
Definition at line 855 of file chert_table.h.
Referenced by lazy_alloc_deflate_zstream(), and ~ChertTable().
|
protected |
true if the root block is faked (not written to disk).
false otherwise. This is true when the btree hasn't been modified yet.
Definition at line 751 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), commit(), flush_db(), and write_changed_blocks().
|
protected |
set to true when full compaction is to be achieved
Definition at line 804 of file chert_table.h.
|
protected |
File descriptor of the table.
If the table is lazily created and doesn't yet exist, this will be -1.
If close() has been called, this will be -2.
Definition at line 765 of file chert_table.h.
Referenced by cancel(), close(), commit(), create_and_open(), do_open_to_read(), flush_db(), really_empty(), and write_changed_blocks().
|
mutableprotected |
Zlib state object for inflating.
Definition at line 858 of file chert_table.h.
Referenced by lazy_alloc_inflate_zstream(), and ~ChertTable().
|
protected |
keeps a count of the number of items in the B-tree.
Definition at line 728 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), and commit().
|
mutableprotected |
buffer of size block_size for making up key-tag items
Definition at line 774 of file chert_table.h.
Referenced by close().
|
mutableprotected |
Last block readahead_key() preread.
Definition at line 864 of file chert_table.h.
|
mutableprotected |
Revision number of the other base, or zero if there is only one base file.
Definition at line 736 of file chert_table.h.
Referenced by cancel(), commit(), next_for_sequential(), and prev_for_sequential().
|
protected |
If true, don't create the table until it's needed.
Definition at line 861 of file chert_table.h.
Referenced by do_open_to_read().
|
protected |
number of levels, counting from 0
Definition at line 768 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), close(), commit(), do_open_to_read(), flush_db(), next_default(), next_for_sequential(), prev_default(), and prev_for_sequential().
|
protected |
maximum size of an item (key-tag pair)
Definition at line 798 of file chert_table.h.
|
protected |
The path name of the B tree.
Definition at line 783 of file chert_table.h.
Referenced by commit(), and do_open_to_read().
|
protected |
revision number of the opened B-tree.
Definition at line 725 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), commit(), create_and_open(), do_open_to_read(), next_for_sequential(), open(), and prev_for_sequential().
|
protected |
the root block of the B-tree
Definition at line 771 of file chert_table.h.
|
protected |
count of the number of successive instances of purely sequential addition, starting at SEQ_START_POINT (neg) and going up to zero.
Definition at line 788 of file chert_table.h.
|
protected |
true iff the data has been written in a single write in sequential order.
Definition at line 756 of file chert_table.h.
Referenced by cancel(), ChertTableCheck::check(), and commit().
|
protected |
Buffer used when splitting a block.
This buffer holds the split off part of the block. It's only used when updating (in ChertTable::add_item().
Definition at line 848 of file chert_table.h.
Referenced by close().
|
protected |
The name of the table (used when writing changesets).
Definition at line 716 of file chert_table.h.
Referenced by commit(), and write_changed_blocks().
|
protected |
Set to true when the database is opened to write.
Definition at line 807 of file chert_table.h.
Referenced by cancel(), commit(), create_and_open(), flush_db(), next_for_sequential(), open(), and prev_for_sequential().