45 LOGLINE(DB,
"GlassPostList data ran out");
47 "reading posting list");
50 LOGLINE(DB,
"GlassPostList value too large");
67 if (keyend - *keypos >= 2 && (*keypos)[0] ==
'\0' && (*keypos)[1] ==
'\xe0') {
76 return tname_in_key == tname;
82 if (*keypos == keyend)
return false;
94 LOGCALL_STATIC(DB,
Xapian::docid,
"read_start_of_first_chunk", (
const void*)posptr | (
const void*)end | (
void*)termfreq_ptr | (
void*)collection_freq_ptr);
99 if (collection_freq_ptr)
117 *did_ptr += did_increase + 1;
132 bool * is_last_chunk_ptr)
134 LOGCALL_STATIC(DB,
Xapian::docid,
"read_start_of_chunk",
reinterpret_cast<const void*
>(posptr) |
reinterpret_cast<const void*
>(end) | first_did_in_chunk |
reinterpret_cast<const void*
>(is_last_chunk_ptr));
135 Assert(is_last_chunk_ptr);
146 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
148 RETURN(last_did_in_chunk);
157 string key = make_key(
term);
159 if (!get_exact_entry(key, tag)) {
167 const char *
p = tag.data();
168 const char * e =
p + tag.size();
177 if (cf == 0 || tf == 1) {
189 *wdfub_ptr = max(cf - first_wdf, first_wdf);
203 if (!doclen_pl->jump_to(did))
205 return doclen_pl->get_wdf();
217 return (doclen_pl->jump_to(did));
236 bool is_first_chunk_,
238 bool is_last_chunk_);
248 first_did = first_did_;
249 current_did = current_did_;
299 : data(data_),
pos(data.data()), end(
pos + data.length()), at_end(data.empty()), did(first_did)
323 PostlistChunkReader::next()
333 PostlistChunkWriter::PostlistChunkWriter(string_view orig_key_,
334 bool is_first_chunk_,
337 : orig_key(orig_key_),
338 tname(tname_), is_first_chunk(is_first_chunk_),
339 is_last_chunk(is_last_chunk_),
342 LOGCALL_CTOR(DB,
"PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
393 Assert(new_final_did >= new_first_did);
396 pack_uint(chunk, new_final_did - new_first_did);
402 unsigned int start_of_chunk_header,
403 unsigned int end_of_chunk_header,
408 Assert(
size_t(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
410 chunk.replace(start_of_chunk_header,
411 end_of_chunk_header - start_of_chunk_header,
438 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting chunk");
441 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting first chunk");
455 unique_ptr<GlassCursor> cursor(table->
cursor_get());
457 if (!cursor->find_entry(
orig_key)) {
474 const char *tagpos = cursor->current_tag.data();
475 const char *tagend = tagpos + cursor->current_tag.size();
478 &num_ent, &coll_freq);
482 if (!cursor->next()) {
485 const char *kpos = cursor->current_key.data();
486 const char *kend = kpos + cursor->current_key.size();
498 const char *tagpos = cursor->current_tag.data();
499 const char *tagend = tagpos + cursor->current_tag.size();
502 bool new_is_last_chunk;
507 string chunk_data(tagpos, tagend);
510 table->
del(cursor->current_key);
517 new_last_did_in_chunk);
523 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting secondary chunk");
530 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting secondary last chunk");
532 unique_ptr<GlassCursor> cursor(table->
cursor_get());
540 const char * keypos = cursor->current_key.data();
541 const char * keyend = keypos + cursor->current_key.size();
546 bool is_prev_first_chunk = (keypos == keyend);
550 string tag = cursor->current_tag;
552 const char *tagpos = tag.data();
553 const char *tagend = tagpos + tag.size();
557 if (is_prev_first_chunk) {
564 bool wrong_is_last_chunk;
565 string::size_type start_of_chunk_header = tagpos - tag.data();
568 &wrong_is_last_chunk);
569 string::size_type end_of_chunk_header = tagpos - tag.data();
573 start_of_chunk_header,
578 table->
add(cursor->current_key, tag);
581 LOGLINE(DB,
"PostlistChunkWriter::flush(): updating chunk which still has items in it");
597 LOGLINE(DB,
"PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
607 const char * tagpos = tag.data();
608 const char * tagend = tagpos + tag.size();
610 &num_ent, &coll_freq);
617 table->
add(key, tag);
621 LOGLINE(DB,
"PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
633 const char *keypos =
orig_key.data();
634 const char *keyend = keypos +
orig_key.size();
658 table->
add(new_key, tag);
673 if (!
unpack_uint(posptr, end, collection_freq_ptr))
700 this_db(keep_reference ? this_db_ : NULL),
703 cursor(this_db_->postlist_table.cursor_get())
705 LOGCALL_CTOR(DB,
"GlassPostList", this_db_.
get() | term_ | keep_reference);
726 int found =
cursor->find_entry(key);
728 LOGLINE(DB,
"postlist for term not found");
768 LOGCALL(DB,
bool,
"GlassPostList::open_nearby_postlist", term_ | need_read_pos |
Literal(
"LeafPostList*&"));
785 LOGCALL(DB,
bool,
"GlassPostList::next_in_chunk", NO_ARGS);
813 const char * keypos =
cursor->current_key.data();
814 const char * keyend = keypos +
cursor->current_key.size();
829 ") is not greater than final document ID in previous chunk (" +
890 LOGCALL(DB,
bool,
"GlassPostList::current_chunk_contains", desired_did);
901 LOGCALL_VOID(DB,
"GlassPostList::move_to_chunk_containing", desired_did);
905 const char * keypos =
cursor->current_key.data();
906 const char * keyend = keypos +
cursor->current_key.size();
920 if (keypos == keyend) {
922 #ifdef XAPIAN_ASSERTIONS
949 LOGCALL(DB,
bool,
"GlassPostList::move_forward_in_chunk_to_at_least", desired_did);
950 if (
did >= desired_did)
956 if (
did >= desired_did) {
975 LOGCALL(DB,
PostList *,
"GlassPostList::skip_to", desired_did | w_min);
1000 LOGLINE(DB,
"Skipped to docid " <<
did <<
", wdf = " <<
wdf);
1010 LOGCALL(DB,
bool,
"GlassPostList::jump_to", desired_did);
1071 unique_ptr<GlassCursor> cursor(
cursor_get());
1073 (void)cursor->find_entry(key);
1074 Assert(!cursor->after_end());
1076 const char * keypos = cursor->current_key.data();
1077 const char * keyend = keypos + cursor->current_key.size();
1086 "an entry in a non-existent "
1088 "for "s.append(tname));
1097 bool is_first_chunk = (keypos == keyend);
1101 const char *
pos = cursor->current_tag.data();
1102 const char * end =
pos + cursor->current_tag.size();
1104 if (is_first_chunk) {
1117 if (did > last_did_in_chunk) {
1122 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1130 if (!cursor->next()) {
1133 const char *kpos = cursor->current_key.data();
1134 const char *kend = kpos + cursor->current_key.size();
1144 RETURN(first_did_of_next_chunk - 1);
1150 LOGCALL_VOID(DB,
"GlassPostListTable::merge_doclen_changes", doclens);
1156 if (doclens.empty())
return;
1161 LOGLINE(DB,
"Adding dummy first chunk");
1163 add(current_key,
"\0\0\0\x31\0"s);
1166 map<Xapian::docid, Xapian::termcount>::const_iterator j;
1167 j = doclens.begin();
1168 Assert(j != doclens.end());
1173 max_did =
get_chunk({}, j->first,
true, &from, &to);
1175 for ( ; j != doclens.end(); ++j) {
1179 LOGLINE(DB,
"Updating doclens, did=" << did);
1182 if (copy_did >= did) {
1183 if (copy_did == did) from->
next();
1189 if ((!from || from->
is_at_end()) && did > max_did) {
1193 max_did =
get_chunk({}, did,
false, &from, &to);
1194 goto next_doclen_chunk;
1199 to->
append(
this, did, new_doclen);
1226 const char *
pos = tag.data();
1227 const char *end =
pos + tag.size();
1236 firstdid = lastdid = 1;
1240 &termfreq, &collfreq);
1246 if (termfreq == 0) {
1258 while (cursor.
del()) {
1260 const char *kend = kpos + cursor.
current_key.size();
1271 add(current_key, newhdr);
1273 Assert(
size_t(
pos - tag.data()) <= tag.size());
1274 tag.replace(0,
pos - tag.data(), newhdr);
1275 add(current_key, tag);
1278 map<Xapian::docid, Xapian::termcount>::const_iterator j;
1286 for ( ; j != changes.
pl_changes.end(); ++j) {
1290 LOGLINE(DB,
"Updating term=" <<
term <<
", did=" << did);
1293 if (copy_did >= did) {
1294 if (copy_did == did) {
1302 if ((!from || from->
is_at_end()) && did > max_did) {
1312 to->
append(
this, did, new_wdf);
1340 const char *
p = cur->current_tag.data();
1341 const char * e =
p + cur->current_tag.size();
1346 Assert(!cur->after_end());
1348 const char * keypos = cur->current_key.data();
1349 const char * keyend = keypos + cur->current_key.size();
1359 p = cur->current_tag.data();
1360 e =
p + cur->current_tag.size();
1363 if (keypos == keyend) {
1364 start_of_last_chunk = first;
1369 &start_of_last_chunk)) {
1385 #ifdef DISABLE_GPL_LIBXAPIAN
1386 # error GPL source we cannot relicense included in libxapian
A cursor pointing to a position in a Btree table, for reading several entries in order,...
string current_key
Current key pointed to by cursor.
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
PositionList * open_position_list(Xapian::docid did, std::string_view term) const
virtual void read_position_list(GlassRePositionList *pos_list, Xapian::docid did, std::string_view term) const
GlassPositionListTable position_table
Table storing position lists.
GlassPostListTable postlist_table
Table storing posting lists.
Xapian::docid get_chunk(std::string_view tname, Xapian::docid did, bool adding, Glass::PostlistChunkReader **from, Glass::PostlistChunkWriter **to)
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Check if document did exists.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
static std::string make_key(std::string_view term, Xapian::docid did)
Compose a key from a termname and docid.
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Returns the length of document did.
void merge_changes(std::string_view term, const Inverter::PostingChanges &changes)
Merge changes for a term.
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr, Xapian::termcount *wdfub_ptr=NULL) const
Returns frequencies for a term.
void merge_doclen_changes(const std::map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
std::unique_ptr< GlassPostList > doclen_pl
PostList for looking up document lengths.
A postlist in a glass database.
bool have_started
Whether we've started reading the list yet.
Xapian::docid did
Document id we're currently at.
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
bool is_at_end
Whether we've run off the end of the list yet.
bool next_in_chunk()
Move to the next item in the chunk, if possible.
static void read_freqs(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the term frequency and collection frequency.
std::string get_description() const
Get a description of the document.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
Xapian::termcount get_wdf_upper_bound() const
bool open_nearby_postlist(std::string_view term_, bool need_read_pos, LeafPostList *&pl) const
Open another postlist from the same database.
GlassPostList(const GlassPostList &)
Copying is not allowed.
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
~GlassPostList()
Destructor.
GlassRePositionList * positionlist
The position list object for this posting list.
PositionList * read_position_list()
Get the list of positions of the term in the current document.
const char * end
Pointer to byte after end of current chunk.
const char * pos
Position of iteration through current chunk.
bool is_last_chunk
True if this is the last chunk.
void next_chunk()
Move to the next chunk.
std::unique_ptr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
Xapian::termcount wdf
The wdf of the current document.
A reusable position list in a glass database.
Class managing a Btree table in a Glass database.
bool key_exists(std::string_view key) const
Check if a key exists in the Btree.
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
bool get_exact_entry(std::string_view key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
bool del(std::string_view key)
Delete an entry from the table.
bool is_writable() const
Return true if this table is writable.
void add(std::string_view key, std::string_view tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
void next()
Advance to the next entry.
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader.
Xapian::termcount get_wdf() const
Xapian::docid get_docid() const
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk,...
Xapian::docid current_did
void flush(GlassTable *table)
Flush the chunk to the buffered table.
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk.
void append(GlassTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk.
Class for storing the changes in frequencies for a term.
Xapian::termcount get_tfdelta() const
Get the term frequency delta.
Xapian::termcount get_cfdelta() const
Get the collection frequency delta.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term's postlist.
Abstract base class for leaf postlists.
Xapian::termcount collfreq
The collection frequency of the term.
std::string term
The term name for this postlist (empty for an alldocs postlist).
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry.
DatabaseCorruptError indicates database corruption was detected.
Indicates an attempt to access a document not present in the database.
Abstract base class for postlists.
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
PostList * next()
Advance the current position to the next document in the postlist.
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
A smart pointer that uses intrusive reference counting.
Abstract base class for iterating term positions in a document.
RangeError indicates an attempt to access outside the bounds of a container.
#define UNSIGNED_OVERFLOW_OK(X)
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
#define LOGCALL_DTOR(CATEGORY, CLASS)
Append a string to an object description, escaping invalid UTF-8.
Interface to Btree cursors.
C++ class definition for glass database.
#define GLASS_MAX_DOCID
The largest docid value supported by glass.
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk.
static void report_read_error(const char *position)
Report an error when reading the posting list.
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk.
static bool get_tname_from_key(const char **src, const char *end, string &tname)
static bool check_tname_in_key(const char **keypos, const char *keyend, string_view tname)
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, string_view tname)
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk.
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *termfreq_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list.
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry.
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
const unsigned int CHUNKSIZE
Postlists in glass databases.
string str(int value)
Convert int to std::string.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Pack types into strings and unpack them again.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
void pack_bool(std::string &s, bool value)
Append an encoded bool to a string.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
std::string pack_glass_postlist_key(std::string_view term)
Convert types to std::string.
void description_append(std::string &desc, std::string_view s)