46 LOGLINE(DB,
"GlassPostList data ran out");
48 "reading posting list");
51 LOGLINE(DB,
"GlassPostList value too large");
66 if (keyend - *keypos >= 2 && (*keypos)[0] ==
'\0' && (*keypos)[1] ==
'\xe0') {
75 return tname_in_key == tname;
81 if (*keypos == keyend)
return false;
93 LOGCALL_STATIC(DB,
Xapian::docid,
"read_start_of_first_chunk", (
const void *)posptr | (
const void *)end | (
void *)number_of_entries_ptr | (
void *)collection_freq_ptr);
96 number_of_entries_ptr,
98 if (number_of_entries_ptr)
99 LOGVALUE(DB, *number_of_entries_ptr);
100 if (collection_freq_ptr)
118 *did_ptr += did_increase + 1;
133 bool * is_last_chunk_ptr)
135 LOGCALL_STATIC(DB,
Xapian::docid,
"read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
136 Assert(is_last_chunk_ptr);
147 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
149 RETURN(last_did_in_chunk);
168 const char * p = tag.data();
169 const char * e = p + tag.size();
178 if (cf == 0 || tf == 1) {
190 *wdfub_ptr = max(cf - first_wdf, first_wdf);
237 bool is_first_chunk_,
238 const string &tname_,
239 bool is_last_chunk_);
249 first_did = first_did_;
250 current_did = current_did_;
300 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.
empty()), did(first_did)
302 if (!at_end)
read_wdf(&pos, end, &wdf);
324 PostlistChunkReader::next()
334 PostlistChunkWriter::PostlistChunkWriter(
const string &orig_key_,
335 bool is_first_chunk_,
336 const string &tname_,
338 : orig_key(orig_key_),
339 tname(tname_), is_first_chunk(is_first_chunk_),
340 is_last_chunk(is_last_chunk_),
343 LOGCALL_CTOR(DB,
"PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
394 Assert(new_final_did >= new_first_did);
397 pack_uint(chunk, new_final_did - new_first_did);
403 unsigned int start_of_chunk_header,
404 unsigned int end_of_chunk_header,
409 Assert(
size_t(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
411 chunk.replace(start_of_chunk_header,
412 end_of_chunk_header - start_of_chunk_header,
439 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting chunk");
442 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting first chunk");
456 AutoPtr<GlassCursor> cursor(table->
cursor_get());
458 if (!cursor->find_entry(
orig_key)) {
475 const char *tagpos = cursor->current_tag.data();
476 const char *tagend = tagpos + cursor->current_tag.size();
479 &num_ent, &coll_freq);
484 if (cursor->after_end()) {
487 const char *kpos = cursor->current_key.data();
488 const char *kend = kpos + cursor->current_key.size();
500 const char *tagpos = cursor->current_tag.data();
501 const char *tagend = tagpos + cursor->current_tag.size();
504 bool new_is_last_chunk;
509 string chunk_data(tagpos, tagend);
512 table->
del(cursor->current_key);
519 new_last_did_in_chunk);
525 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting secondary chunk");
532 LOGLINE(DB,
"PostlistChunkWriter::flush(): deleting secondary last chunk");
534 AutoPtr<GlassCursor> cursor(table->
cursor_get());
542 const char * keypos = cursor->current_key.data();
543 const char * keyend = keypos + cursor->current_key.size();
548 bool is_prev_first_chunk = (keypos == keyend);
552 string tag = cursor->current_tag;
554 const char *tagpos = tag.data();
555 const char *tagend = tagpos + tag.size();
559 if (is_prev_first_chunk) {
566 bool wrong_is_last_chunk;
567 string::size_type start_of_chunk_header = tagpos - tag.data();
570 &wrong_is_last_chunk);
571 string::size_type end_of_chunk_header = tagpos - tag.data();
575 start_of_chunk_header,
580 table->
add(cursor->current_key, tag);
583 LOGLINE(DB,
"PostlistChunkWriter::flush(): updating chunk which still has items in it");
599 LOGLINE(DB,
"PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
609 const char * tagpos = tag.data();
610 const char * tagend = tagpos + tag.size();
612 &num_ent, &coll_freq);
619 table->
add(key, tag);
623 LOGLINE(DB,
"PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
635 const char *keypos =
orig_key.data();
636 const char *keyend = keypos +
orig_key.size();
660 table->
add(new_key, tag);
673 if (!
unpack_uint(posptr, end, number_of_entries_ptr))
675 if (!
unpack_uint(posptr, end, collection_freq_ptr))
699 const string & term_,
702 this_db(keep_reference ? this_db_ : NULL),
705 cursor(this_db_->postlist_table.cursor_get())
707 LOGCALL_CTOR(DB,
"GlassPostList", this_db_.
get() | term_ | keep_reference);
712 const string & term_,
728 int found =
cursor->find_entry(key);
730 LOGLINE(DB,
"postlist for term not found");
793 LOGCALL(DB,
bool,
"GlassPostList::next_in_chunk", NO_ARGS);
817 if (
cursor->after_end()) {
822 const char * keypos =
cursor->current_key.data();
823 const char * keyend = keypos +
cursor->current_key.size();
838 ") is not greater than final document ID in previous chunk (" +
894 LOGCALL(DB,
bool,
"GlassPostList::current_chunk_contains", desired_did);
905 LOGCALL_VOID(DB,
"GlassPostList::move_to_chunk_containing", desired_did);
909 const char * keypos =
cursor->current_key.data();
910 const char * keyend = keypos +
cursor->current_key.size();
924 if (keypos == keyend) {
926 #ifdef XAPIAN_ASSERTIONS 953 LOGCALL(DB,
bool,
"GlassPostList::move_forward_in_chunk_to_at_least", desired_did);
954 if (
did >= desired_did)
960 if (
did >= desired_did) {
979 LOGCALL(DB,
PostList *,
"GlassPostList::skip_to", desired_did | w_min);
1002 LOGLINE(DB,
"Skipped to end");
1004 LOGLINE(DB,
"Skipped to docid " <<
did <<
", wdf = " <<
wdf);
1014 LOGCALL(DB,
bool,
"GlassPostList::jump_to", desired_did);
1062 AutoPtr<GlassCursor>
cursor(cursor_get());
1064 (void)cursor->find_entry(key);
1065 Assert(!cursor->after_end());
1067 const char * keypos = cursor->current_key.data();
1068 const char * keyend = keypos + cursor->current_key.size();
1085 bool is_first_chunk = (keypos == keyend);
1089 const char *
pos = cursor->current_tag.data();
1090 const char *
end = pos + cursor->current_tag.size();
1092 if (is_first_chunk) {
1105 if (did > last_did_in_chunk) {
1110 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1119 if (cursor->after_end()) {
1122 const char *kpos = cursor->current_key.data();
1123 const char *kend = kpos + cursor->current_key.size();
1133 RETURN(first_did_of_next_chunk - 1);
1139 LOGCALL_VOID(DB,
"GlassPostListTable::merge_doclen_changes", doclens);
1145 if (doclens.empty())
return;
1148 string current_key =
make_key(
string());
1149 if (!key_exists(current_key)) {
1150 LOGLINE(DB,
"Adding dummy first chunk");
1153 add(current_key, newtag);
1156 map<Xapian::docid, Xapian::termcount>::const_iterator j;
1157 j = doclens.begin();
1158 Assert(j != doclens.end());
1163 max_did = get_chunk(
string(), j->first,
true, &from, &to);
1165 for ( ; j != doclens.end(); ++j) {
1169 LOGLINE(DB,
"Updating doclens, did=" << did);
1172 if (copy_did >= did) {
1173 if (copy_did == did) from->
next();
1179 if ((!from || from->
is_at_end()) && did > max_did) {
1183 max_did = get_chunk(
string(), did,
false, &from, &to);
1184 goto next_doclen_chunk;
1188 if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1189 to->
append(
this, did, new_doclen);
1211 string current_key =
make_key(term);
1213 (void)get_exact_entry(current_key, tag);
1216 const char *
pos = tag.data();
1217 const char *
end = pos + tag.size();
1230 &termfreq, &collfreq);
1236 if (termfreq == 0) {
1248 while (cursor.
del()) {
1250 const char *kend = kpos + cursor.
current_key.size();
1261 add(current_key, newhdr);
1263 Assert(
size_t(pos - tag.data()) <= tag.size());
1264 tag.replace(0, pos - tag.data(), newhdr);
1265 add(current_key, tag);
1268 map<Xapian::docid, Xapian::termcount>::const_iterator j;
1275 max_did = get_chunk(term, j->first,
false, &from, &to);
1276 for ( ; j != changes.
pl_changes.end(); ++j) {
1280 LOGLINE(DB,
"Updating term=" << term <<
", did=" << did);
1283 if (copy_did >= did) {
1284 if (copy_did == did) {
1292 if ((!from || from->
is_at_end()) && did > max_did) {
1296 max_did = get_chunk(term, did,
false, &from, &to);
1302 to->
append(
this, did, new_wdf);
1322 AutoPtr<GlassCursor> cur(cursor_get());
1330 const char * p = cur->current_tag.data();
1331 const char * e = p + cur->current_tag.size();
1336 Assert(!cur->after_end());
1338 const char * keypos = cur->current_key.data();
1339 const char * keyend = keypos + cur->current_key.size();
1349 p = cur->current_tag.data();
1350 e = p + cur->current_tag.size();
1353 if (keypos == keyend) {
1354 start_of_last_chunk = first;
1359 &start_of_last_chunk)) {
void pack_bool(std::string &s, bool value)
Append an encoded bool to a string.
void append(GlassTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk.
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Xapian::termcount get_wdf() const
Define the XAPIAN_NORETURN macro.
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency.
Abstract base class for postlists.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Virtual methods of Database::Internal.
static bool check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
void next_chunk()
Move to the next chunk.
void merge_doclen_changes(const map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
Xapian::docid get_docid() const
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
Xapian::termcount_diff get_tfdelta() const
Get the term frequency delta.
#define GLASS_MAX_DOCID
The largest docid value supported by glass.
const char * pos
Position of iteration through current chunk.
bool empty() const
Return true if there are no entries in the table.
GlassPostList(const GlassPostList &)
Copying is not allowed.
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk.
Class managing a Btree table in a Glass database.
AutoPtr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
#define LOGCALL_DTOR(CATEGORY, CLASS)
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Class for storing the changes in frequencies for a term.
Postlists in glass databases.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
Convert types to std::string.
void add(const std::string &key, const std::string &tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
static bool get_tname_from_key(const char **src, const char *end, string &tname)
Abstract base class for leaf postlists.
std::string term
The term name for this postlist (empty for an alldocs postlist).
static void report_read_error(const char *position)
Report an error when reading the posting list.
Xapian::doccount number_of_entries
The number of entries in the posting list.
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Returns the length of document did.
AutoPtr< GlassPostList > doclen_pl
PostList for looking up document lengths.
bool is_at_end
Whether we've run off the end of the list yet.
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list.
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
bool next(Glass::Cursor *C_, int j) const
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
virtual void read_position_list(GlassPositionList *pos_list, Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
bool is_last_chunk
True if this is the last chunk.
RangeError indicates an attempt to access outside the bounds of a container.
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Check if document did exists.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
void description_append(std::string &desc, const std::string &s)
Xapian::termcount get_unique_terms() const
Returns the number of unique terms in the current document.
const char * end
Pointer to byte after end of current chunk.
string current_key
Current key pointed to by cursor.
bool next_in_chunk()
Move to the next item in the chunk, if possible.
GlassPostListTable postlist_table
Table storing posting lists.
Xapian::termcount_diff get_cfdelta() const
Get the collection frequency delta.
Xapian::termcount get_wdf_upper_bound() const
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk.
PositionList * open_position_list(Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
std::string get_description() const
Get a description of the document.
Internal * next()
Advance the current position to the next document in the postlist.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
~GlassPostList()
Destructor.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
void flush(GlassTable *table)
Flush the chunk to the buffered table.
string str(int value)
Convert int to std::string.
C++ class definition for glass database.
A postlist in a glass database.
Xapian::docid get_chunk(const string &tname, Xapian::docid did, bool adding, Glass::PostlistChunkReader **from, Glass::PostlistChunkWriter **to)
void merge_changes(const string &term, const Inverter::PostingChanges &changes)
Merge changes for a term.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term's postlist.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
const unsigned int CHUNKSIZE
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
Indicates an attempt to access a document not present in the database.
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
DatabaseCorruptError indicates database corruption was detected.
Append a string to an object description, escaping invalid UTF-8.
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader.
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
PositionList * read_position_list()
Get the list of positions of the term in the current document.
Interface to Btree cursors.
bool have_started
Whether we've started reading the list yet.
Xapian::docid current_did
std::string pack_glass_postlist_key(const std::string &term)
Xapian::docid did
Document id we're currently at.
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk.
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal.
LeafPostList * open_nearby_postlist(const std::string &term_) const
Open another postlist from the same database.
Xapian::termcount wdf
The wdf of the current document.
void get_freqs(const std::string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr, Xapian::termcount *wdfub_ptr=NULL) const
Returns frequencies for a term.
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Abstract base class for iterating term positions in a document.
A smart pointer that uses intrusive reference counting.
Xapian::termcount get_doclength() const
Returns the length of current document.
GlassPositionList positionlist
The position list object for this posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
bool is_writable() const
Return true if this table is writable.
bool del(const std::string &key)
Delete an entry from the table.
string make_key(Xapian::docid did)
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
void next()
Advance to the next entry.