49 const char * p = tag.data();
51 termfreq_ptr, collfreq_ptr);
108 first_did = first_did_;
109 current_did = current_did_;
145 LOGLINE(DB,
"ChertPostList data ran out");
149 LOGLINE(DB,
"ChertPostList value too large");
164 if (keyend - *keypos >= 2 && (*keypos)[0] ==
'\0' && (*keypos)[1] ==
'\xe0') {
173 return tname_in_key == tname;
179 if (*keypos == keyend)
return false;
191 LOGCALL_STATIC(DB,
Xapian::docid,
"read_start_of_first_chunk", (
const void *)posptr | (
const void *)end | (
void *)number_of_entries_ptr | (
void *)collection_freq_ptr);
194 number_of_entries_ptr, collection_freq_ptr);
195 if (number_of_entries_ptr)
196 LOGVALUE(DB, *number_of_entries_ptr);
197 if (collection_freq_ptr)
215 *did_ptr += did_increase + 1;
230 bool * is_last_chunk_ptr)
232 LOGCALL_STATIC(DB,
Xapian::docid,
"read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
233 Assert(is_last_chunk_ptr);
244 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
246 RETURN(last_did_in_chunk);
271 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.
empty()), did(first_did)
273 if (!at_end)
read_wdf(&pos, end, &wdf);
295 PostlistChunkReader::next()
305 PostlistChunkWriter::PostlistChunkWriter(
const string &orig_key_,
306 bool is_first_chunk_,
307 const string &tname_,
309 : orig_key(orig_key_),
310 tname(tname_), is_first_chunk(is_first_chunk_),
311 is_last_chunk(is_last_chunk_),
314 LOGCALL_CTOR(DB,
"PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
365 Assert(new_final_did >= new_first_did);
368 pack_uint(chunk, new_final_did - new_first_did);
374 unsigned int start_of_chunk_header,
375 unsigned int end_of_chunk_header,
380 Assert((
size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
382 chunk.replace(start_of_chunk_header,
383 end_of_chunk_header - start_of_chunk_header,
410 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting chunk");
413 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting first chunk");
427 AutoPtr<ChertCursor> cursor(table->
cursor_get());
429 if (!cursor->find_entry(
orig_key)) {
446 const char *tagpos = cursor->current_tag.data();
447 const char *tagend = tagpos + cursor->current_tag.size();
450 &num_ent, &coll_freq);
455 if (cursor->after_end()) {
458 const char *kpos = cursor->current_key.data();
459 const char *kend = kpos + cursor->current_key.size();
471 const char *tagpos = cursor->current_tag.data();
472 const char *tagend = tagpos + cursor->current_tag.size();
475 bool new_is_last_chunk;
480 string chunk_data(tagpos, tagend);
483 table->
del(cursor->current_key);
490 new_last_did_in_chunk);
496 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting secondary chunk");
503 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting secondary last chunk");
505 AutoPtr<ChertCursor> cursor(table->
cursor_get());
513 const char * keypos = cursor->current_key.data();
514 const char * keyend = keypos + cursor->current_key.size();
519 bool is_prev_first_chunk = (keypos == keyend);
523 string tag = cursor->current_tag;
525 const char *tagpos = tag.data();
526 const char *tagend = tagpos + tag.size();
530 if (is_prev_first_chunk) {
537 bool wrong_is_last_chunk;
538 string::size_type start_of_chunk_header = tagpos - tag.data();
541 &wrong_is_last_chunk);
542 string::size_type end_of_chunk_header = tagpos - tag.data();
546 start_of_chunk_header,
551 table->
add(cursor->current_key, tag);
554 LOGLINE(DB,
"PostlistChunkWriter::flush(): updating chunk which still has items in it");
570 LOGLINE(DB,
"PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
580 const char * tagpos = tag.data();
581 const char * tagend = tagpos + tag.size();
583 &num_ent, &coll_freq);
590 table->
add(key, tag);
594 LOGLINE(DB,
"PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
606 const char *keypos =
orig_key.data();
607 const char *keyend = keypos +
orig_key.size();
631 table->
add(new_key, tag);
644 if (!
unpack_uint(posptr, end, number_of_entries_ptr))
646 if (!
unpack_uint(posptr, end, collection_freq_ptr))
670 const string & term_,
673 this_db(keep_reference ? this_db_ : NULL),
676 cursor(this_db_->postlist_table.cursor_get())
678 LOGCALL_CTOR(DB,
"ChertPostList", this_db_.
get() | term_ | keep_reference);
680 int found =
cursor->find_entry(key);
682 LOGLINE(DB,
"postlist for term not found");
736 LOGCALL(DB,
bool,
"ChertPostList::next_in_chunk", NO_ARGS);
760 if (
cursor->after_end()) {
765 const char * keypos =
cursor->current_key.data();
766 const char * keyend = keypos +
cursor->current_key.size();
781 ") is not greater than final document ID in previous chunk (" +
837 LOGCALL(DB,
bool,
"ChertPostList::current_chunk_contains", desired_did);
848 LOGCALL_VOID(DB,
"ChertPostList::move_to_chunk_containing", desired_did);
852 const char * keypos =
cursor->current_key.data();
853 const char * keyend = keypos +
cursor->current_key.size();
867 if (keypos == keyend) {
869 #ifdef XAPIAN_ASSERTIONS 896 LOGCALL(DB,
bool,
"ChertPostList::move_forward_in_chunk_to_at_least", desired_did);
897 if (
did >= desired_did)
903 if (
did >= desired_did) {
922 LOGCALL(DB,
PostList *,
"ChertPostList::skip_to", desired_did | w_min);
947 LOGLINE(DB,
"Skipped to docid " <<
did <<
", wdf = " <<
wdf);
957 LOGCALL(DB,
bool,
"ChertPostList::jump_to", desired_did);
1000 AutoPtr<ChertCursor>
cursor(cursor_get());
1002 (void)cursor->find_entry(key);
1003 Assert(!cursor->after_end());
1005 const char * keypos = cursor->current_key.data();
1006 const char * keyend = keypos + cursor->current_key.size();
1020 bool is_first_chunk = (keypos == keyend);
1024 const char *
pos = cursor->current_tag.data();
1025 const char *
end = pos + cursor->current_tag.size();
1027 if (is_first_chunk) {
1040 if (did > last_did_in_chunk) {
1045 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1054 if (cursor->after_end()) {
1057 const char *kpos = cursor->current_key.data();
1058 const char *kend = kpos + cursor->current_key.size();
1068 RETURN(first_did_of_next_chunk - 1);
1073 const map<
string, map<
Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
1074 const map<Xapian::docid, Xapian::termcount> & doclens,
1075 const map<
string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
1077 LOGCALL_VOID(DB,
"ChertPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
1083 if (!doclens.empty()) {
1085 string current_key =
make_key(
string());
1086 if (!key_exists(current_key)) {
1087 LOGLINE(DB,
"Adding dummy first chunk");
1090 add(current_key, newtag);
1093 map<Xapian::docid, Xapian::termcount>::const_iterator j;
1094 j = doclens.begin();
1095 Assert(j != doclens.end());
1100 max_did = get_chunk(
string(), j->first,
true, &from, &to);
1102 for ( ; j != doclens.end(); ++j) {
1106 LOGLINE(DB,
"Updating doclens, did=" << did);
1109 if (copy_did >= did) {
1110 if (copy_did == did) from->
next();
1116 if ((!from || from->
is_at_end()) && did > max_did) {
1120 max_did = get_chunk(
string(), did,
false, &from, &to);
1121 goto next_doclen_chunk;
1125 if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1126 to->
append(
this, did, new_doclen);
1141 map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
1142 for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
1143 if (i->second.empty())
continue;
1144 string tname = i->first;
1148 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
1149 Assert(deltas != freq_deltas.end());
1151 string current_key =
make_key(tname);
1153 (void)get_exact_entry(current_key, tag);
1156 const char *
pos = tag.data();
1157 const char *
end = pos + tag.size();
1170 &termfreq, &collfreq);
1175 termfreq += deltas->second.first;
1176 if (termfreq == 0) {
1187 if (!found)
continue;
1188 while (cursor.
del()) {
1190 const char *kend = kpos + cursor.
current_key.size();
1195 collfreq += deltas->second.second;
1201 add(current_key, newhdr);
1203 Assert((
size_t)(pos - tag.data()) <= tag.size());
1204 tag.replace(0, pos - tag.data(), newhdr);
1205 add(current_key, tag);
1208 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
1209 j = i->second.begin();
1210 Assert(j != i->second.end());
1215 max_did = get_chunk(tname, j->first, j->second.first ==
'A',
1217 for ( ; j != i->second.end(); ++j) {
1221 LOGLINE(DB,
"Updating tname=" << tname <<
", did=" << did);
1224 if (copy_did >= did) {
1225 if (copy_did == did) {
1226 Assert(j->second.first !=
'A');
1234 if ((!from || from->
is_at_end()) && did > max_did) {
1238 max_did = get_chunk(tname, did,
false, &from, &to);
1242 if (j->second.first !=
'D') {
1244 to->
append(
this, did, new_wdf);
1265 AutoPtr<ChertCursor> cur(cursor_get());
1273 const char * p = cur->current_tag.data();
1274 const char * e = p + cur->current_tag.size();
1279 Assert(!cur->after_end());
1281 const char * keypos = cur->current_key.data();
1282 const char * keyend = keypos + cur->current_key.size();
1292 p = cur->current_tag.data();
1293 e = p + cur->current_tag.size();
1296 if (keypos == keyend) {
1297 start_of_last_chunk = first;
1302 &start_of_last_chunk)) {
void pack_bool(std::string &s, bool value)
Append an encoded bool to a string.
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry.
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Xapian::docid get_chunk(const string &tname, Xapian::docid did, bool adding, Chert::PostlistChunkReader **from, Chert::PostlistChunkWriter **to)
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
Define the XAPIAN_NORETURN macro.
~ChertPostList()
Destructor.
bool is_last_chunk
True if this is the last chunk.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
Abstract base class for postlists.
A position list in a chert database.
Xapian::doccount number_of_entries
The number of entries in the posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
void flush(ChertTable *table)
Flush the chunk to the buffered table.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
const char * end
Pointer to byte after end of current chunk.
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const ChertDatabase > db) const
Returns the length of document did.
Xapian::termcount get_doclength() const
Returns the length of current document.
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader.
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk.
Class managing a Btree table in a Chert database.
static void report_read_error(const char *position)
Report an error when reading the posting list.
#define LOGCALL_DTOR(CATEGORY, CLASS)
std::string get_description() const
Get a description of the document.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Convert types to std::string.
Xapian::docid get_docid() const
ChertPositionListTable position_table
Table storing position lists.
Abstract base class for leaf postlists.
void get_freqs(const std::string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
std::string term
The term name for this postlist (empty for an alldocs postlist).
Xapian::termcount wdf
The wdf of the current document.
Xapian::termcount get_wdf_upper_bound() const
void next_chunk()
Move to the next chunk.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk.
ChertPostList(const ChertPostList &)
Copying is not allowed.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Virtual methods of Database::Internal.
bool del(const std::string &key)
Delete an entry from the table.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
RangeError indicates an attempt to access outside the bounds of a container.
bool have_started
Whether we've started reading the list yet.
void append(ChertTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk.
bool C_unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode an "sort preserved" unsigned integer from a string.
Interface to Btree cursors.
AutoPtr< ChertCursor > cursor
Cursor pointing to current chunk of postlist.
bool next(Cursor *C_, int j) const
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry.
bool read_data(const ChertTable *table, Xapian::docid did, const string &tname)
Fill list with data, and move the position to the start.
Internal * next()
Advance the current position to the next document in the postlist.
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
string str(int value)
Convert int to std::string.
bool is_at_end
Whether we've run off the end of the list yet.
bool empty() const
Return true if there are no entries in the table.
#define CHERT_MAX_DOCID
The largest docid value supported by chert.
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk.
void merge_changes(const map< string, map< Xapian::docid, pair< char, Xapian::termcount > > > &mod_plists, const map< Xapian::docid, Xapian::termcount > &doclens, const map< string, pair< Xapian::termcount_diff, Xapian::termcount_diff > > &freq_deltas)
Merge added, removed, and changed entries.
PositionList * read_position_list()
Get the list of positions of the term in the current document.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
Xapian::docid current_did
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const ChertDatabase > db) const
Check if document did exists.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
AutoPtr< ChertPostList > doclen_pl
PostList for looking up document lengths.
C++ class definition for chert database.
Indicates an attempt to access a document not present in the database.
static bool check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
DatabaseCorruptError indicates database corruption was detected.
std::string pack_chert_postlist_key(const std::string &term)
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
string current_key
Current key pointed to by cursor.
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk.
Xapian::Internal::intrusive_ptr< const ChertDatabase > this_db
The database we are searching.
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal.
Postlists in chert databases.
std::string pack_glass_postlist_key(const std::string &term)
const unsigned int CHUNKSIZE
Xapian::docid did
Document id we're currently at.
Pack types into strings and unpack them again.
static bool get_tname_from_key(const char **src, const char *end, string &tname)
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
ChertPositionList positionlist
The position list object for this posting list.
A postlist in a chert database.
Xapian::termcount get_wdf() const
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
bool next_in_chunk()
Move to the next item in the chunk, if possible.
Abstract base class for iterating term positions in a document.
A smart pointer that uses intrusive reference counting.
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
string make_key(Xapian::docid did)
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
const char * pos
Position of iteration through current chunk.
void next()
Advance to the next entry.