49         const char * p = tag.data();
    51                                               termfreq_ptr, collfreq_ptr);
   108             first_did = first_did_;
   109             current_did = current_did_;
   145         LOGLINE(DB, 
"ChertPostList data ran out");
   149     LOGLINE(DB, 
"ChertPostList value too large");
   164     if (keyend - *keypos >= 2 && (*keypos)[0] == 
'\0' && (*keypos)[1] == 
'\xe0') {
   173     return tname_in_key == tname;
   179     if (*keypos == keyend) 
return false;
   191     LOGCALL_STATIC(DB, 
Xapian::docid, 
"read_start_of_first_chunk", (
const void *)posptr | (
const void *)end | (
void *)number_of_entries_ptr | (
void *)collection_freq_ptr);
   194                            number_of_entries_ptr, collection_freq_ptr);
   195     if (number_of_entries_ptr)
   196         LOGVALUE(DB, *number_of_entries_ptr);
   197     if (collection_freq_ptr)
   215     *did_ptr += did_increase + 1;
   230                     bool * is_last_chunk_ptr)
   232     LOGCALL_STATIC(DB, 
Xapian::docid, 
"read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
   233     Assert(is_last_chunk_ptr);
   244     Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
   246     RETURN(last_did_in_chunk);
   271         : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.
empty()), did(first_did)
   273         if (!at_end) 
read_wdf(&pos, end, &wdf);
   295 PostlistChunkReader::next()
   305 PostlistChunkWriter::PostlistChunkWriter(
const string &orig_key_,
   306                                          bool is_first_chunk_,
   307                                          const string &tname_,
   309         : orig_key(orig_key_),
   310           tname(tname_), is_first_chunk(is_first_chunk_),
   311           is_last_chunk(is_last_chunk_),
   314     LOGCALL_CTOR(DB, 
"PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
   365     Assert(new_final_did >= new_first_did);
   368     pack_uint(chunk, new_final_did - new_first_did);
   374                      unsigned int start_of_chunk_header,
   375                      unsigned int end_of_chunk_header,
   380     Assert((
size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
   382     chunk.replace(start_of_chunk_header,
   383                   end_of_chunk_header - start_of_chunk_header,
   410         LOGLINE(DB, 
"PostlistChunkWriter::flush(): deleting chunk");
   413             LOGLINE(DB, 
"PostlistChunkWriter::flush(): deleting first chunk");
   427             AutoPtr<ChertCursor> cursor(table->
cursor_get());
   429             if (!cursor->find_entry(
orig_key)) {
   446                 const char *tagpos = cursor->current_tag.data();
   447                 const char *tagend = tagpos + cursor->current_tag.size();
   450                                                 &num_ent, &coll_freq);
   455             if (cursor->after_end()) {
   458             const char *kpos = cursor->current_key.data();
   459             const char *kend = kpos + cursor->current_key.size();
   471             const char *tagpos = cursor->current_tag.data();
   472             const char *tagend = tagpos + cursor->current_tag.size();
   475             bool new_is_last_chunk;
   480             string chunk_data(tagpos, tagend);
   483             table->
del(cursor->current_key);
   490                                               new_last_did_in_chunk);
   496         LOGLINE(DB, 
"PostlistChunkWriter::flush(): deleting secondary chunk");
   503             LOGLINE(DB, 
"PostlistChunkWriter::flush(): deleting secondary last chunk");
   505             AutoPtr<ChertCursor> cursor(table->
cursor_get());
   513             const char * keypos = cursor->current_key.data();
   514             const char * keyend = keypos + cursor->current_key.size();
   519             bool is_prev_first_chunk = (keypos == keyend);
   523             string tag = cursor->current_tag;
   525             const char *tagpos = tag.data();
   526             const char *tagend = tagpos + tag.size();
   530             if (is_prev_first_chunk) {
   537             bool wrong_is_last_chunk;
   538             string::size_type start_of_chunk_header = tagpos - tag.data();
   541                                     &wrong_is_last_chunk);
   542             string::size_type end_of_chunk_header = tagpos - tag.data();
   546                                  start_of_chunk_header,
   551             table->
add(cursor->current_key, tag);
   554         LOGLINE(DB, 
"PostlistChunkWriter::flush(): updating chunk which still has items in it");
   570             LOGLINE(DB, 
"PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
   580                 const char * tagpos = tag.data();
   581                 const char * tagend = tagpos + tag.size();
   583                                                 &num_ent, &coll_freq);
   590             table->
add(key, tag);
   594         LOGLINE(DB, 
"PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
   606         const char *keypos = 
orig_key.data();
   607         const char *keyend = keypos + 
orig_key.size();
   631         table->
add(new_key, tag);
   644     if (!
unpack_uint(posptr, end, number_of_entries_ptr))
   646     if (!
unpack_uint(posptr, end, collection_freq_ptr))
   670                              const string & term_,
   673           this_db(keep_reference ? this_db_ : NULL),
   676           cursor(this_db_->postlist_table.cursor_get())
   678     LOGCALL_CTOR(DB, 
"ChertPostList", this_db_.
get() | term_ | keep_reference);
   680     int found = 
cursor->find_entry(key);
   682         LOGLINE(DB, 
"postlist for term not found");
   736     LOGCALL(DB, 
bool, 
"ChertPostList::next_in_chunk", NO_ARGS);
   760     if (
cursor->after_end()) {
   765     const char * keypos = 
cursor->current_key.data();
   766     const char * keyend = keypos + 
cursor->current_key.size();
   781                 ") is not greater than final document ID in previous chunk (" +
   837     LOGCALL(DB, 
bool, 
"ChertPostList::current_chunk_contains", desired_did);
   848     LOGCALL_VOID(DB, 
"ChertPostList::move_to_chunk_containing", desired_did);
   852     const char * keypos = 
cursor->current_key.data();
   853     const char * keyend = keypos + 
cursor->current_key.size();
   867     if (keypos == keyend) {
   869 #ifdef XAPIAN_ASSERTIONS   896     LOGCALL(DB, 
bool, 
"ChertPostList::move_forward_in_chunk_to_at_least", desired_did);
   897     if (
did >= desired_did)
   903             if (
did >= desired_did) {
   922     LOGCALL(DB, 
PostList *, 
"ChertPostList::skip_to", desired_did | w_min);
   947         LOGLINE(DB, 
"Skipped to docid " << 
did << 
", wdf = " << 
wdf);
   957     LOGCALL(DB, 
bool, 
"ChertPostList::jump_to", desired_did);
  1000     AutoPtr<ChertCursor> 
cursor(cursor_get());
  1002     (void)cursor->find_entry(key);
  1003     Assert(!cursor->after_end());
  1005     const char * keypos = cursor->current_key.data();
  1006     const char * keyend = keypos + cursor->current_key.size();
  1020     bool is_first_chunk = (keypos == keyend);
  1024     const char * 
pos = cursor->current_tag.data();
  1025     const char * 
end = pos + cursor->current_tag.size();
  1027     if (is_first_chunk) {
  1040     if (did > last_did_in_chunk) {
  1045         (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
  1054     if (cursor->after_end()) {
  1057     const char *kpos = cursor->current_key.data();
  1058     const char *kend = kpos + cursor->current_key.size();
  1068     RETURN(first_did_of_next_chunk - 1);
  1073     const map<
string, map<
Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
  1074     const map<Xapian::docid, Xapian::termcount> & doclens,
  1075     const map<
string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
  1077     LOGCALL_VOID(DB, 
"ChertPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
  1083     if (!doclens.empty()) {
  1085         string current_key = 
make_key(
string());
  1086         if (!key_exists(current_key)) {
  1087             LOGLINE(DB, 
"Adding dummy first chunk");
  1090             add(current_key, newtag);
  1093         map<Xapian::docid, Xapian::termcount>::const_iterator j;
  1094         j = doclens.begin();
  1095         Assert(j != doclens.end()); 
  1100         max_did = get_chunk(
string(), j->first, 
true, &from, &to);
  1102         for ( ; j != doclens.end(); ++j) {
  1106             LOGLINE(DB, 
"Updating doclens, did=" << did);
  1109                 if (copy_did >= did) {
  1110                     if (copy_did == did) from->
next();
  1116             if ((!from || from->
is_at_end()) && did > max_did) {
  1120                 max_did = get_chunk(
string(), did, 
false, &from, &to);
  1121                 goto next_doclen_chunk;
  1125             if (new_doclen != static_cast<Xapian::termcount>(-1)) {
  1126                 to->
append(
this, did, new_doclen);
  1141     map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
  1142     for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
  1143         if (i->second.empty()) 
continue;
  1144         string tname = i->first;
  1148             map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
  1149             Assert(deltas != freq_deltas.end());
  1151             string current_key = 
make_key(tname);
  1153             (void)get_exact_entry(current_key, tag);
  1156             const char *
pos = tag.data();
  1157             const char *
end = pos + tag.size();
  1170                                                      &termfreq, &collfreq);
  1175             termfreq += deltas->second.first;
  1176             if (termfreq == 0) {
  1187                 if (!found) 
continue; 
  1188                 while (cursor.
del()) {
  1190                     const char *kend = kpos + cursor.
current_key.size();
  1195             collfreq += deltas->second.second;
  1201                 add(current_key, newhdr);
  1203                 Assert((
size_t)(pos - tag.data()) <= tag.size());
  1204                 tag.replace(0, pos - tag.data(), newhdr);
  1205                 add(current_key, tag);
  1208         map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
  1209         j = i->second.begin();
  1210         Assert(j != i->second.end()); 
  1215         max_did = get_chunk(tname, j->first, j->second.first == 
'A',
  1217         for ( ; j != i->second.end(); ++j) {
  1221             LOGLINE(DB, 
"Updating tname=" << tname << 
", did=" << did);
  1224                 if (copy_did >= did) {
  1225                     if (copy_did == did) {
  1226                         Assert(j->second.first != 
'A');
  1234             if ((!from || from->
is_at_end()) && did > max_did) {
  1238                 max_did = get_chunk(tname, did, 
false, &from, &to);
  1242             if (j->second.first != 
'D') {
  1244                 to->
append(
this, did, new_wdf);
  1265     AutoPtr<ChertCursor> cur(cursor_get());
  1273     const char * p = cur->current_tag.data();
  1274     const char * e = p + cur->current_tag.size();
  1279     Assert(!cur->after_end());
  1281     const char * keypos = cur->current_key.data();
  1282     const char * keyend = keypos + cur->current_key.size();
  1292     p = cur->current_tag.data();
  1293     e = p + cur->current_tag.size();
  1296     if (keypos == keyend) {
  1297         start_of_last_chunk = first;
  1302                                          &start_of_last_chunk)) {
 void pack_bool(std::string &s, bool value)
Append an encoded bool to a string. 
 
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry. 
 
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
 
Xapian::docid get_chunk(const string &tname, Xapian::docid did, bool adding, Chert::PostlistChunkReader **from, Chert::PostlistChunkWriter **to)
 
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk. 
 
Define the XAPIAN_NORETURN macro. 
 
~ChertPostList()
Destructor. 
 
bool is_last_chunk
True if this is the last chunk. 
 
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk. 
 
Abstract base class for postlists. 
 
A position list in a chert database. 
 
Xapian::doccount number_of_entries
The number of entries in the posting list. 
 
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist. 
 
void flush(ChertTable *table)
Flush the chunk to the buffered table. 
 
Xapian::docid first_did_in_chunk
The first document id in this chunk. 
 
const char * end
Pointer to byte after end of current chunk. 
 
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const ChertDatabase > db) const
Returns the length of document did. 
 
Xapian::termcount get_doclength() const
Returns the length of current document. 
 
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader. 
 
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
 
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk. 
 
Class managing a Btree table in a Chert database. 
 
static void report_read_error(const char *position)
Report an error when reading the posting list. 
 
#define LOGCALL_DTOR(CATEGORY, CLASS)
 
std::string get_description() const
Get a description of the document. 
 
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
 
Convert types to std::string. 
 
Xapian::docid get_docid() const
 
ChertPositionListTable position_table
Table storing position lists. 
 
Abstract base class for leaf postlists. 
 
void get_freqs(const std::string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term. 
 
std::string term
The term name for this postlist (empty for an alldocs postlist). 
 
Xapian::termcount wdf
The wdf of the current document. 
 
Xapian::termcount get_wdf_upper_bound() const
 
void next_chunk()
Move to the next chunk. 
 
PositionList * open_position_list() const
Get the list of positions of the term in the current document. 
 
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens. 
 
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk. 
 
ChertPostList(const ChertPostList &)
Copying is not allowed. 
 
Xapian::termcount get_unique_terms(Xapian::docid did) const
Virtual methods of Database::Internal. 
 
bool del(const std::string &key)
Delete an entry from the table. 
 
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms. 
 
RangeError indicates an attempt to access outside the bounds of a container. 
 
bool have_started
Whether we've started reading the list yet. 
 
void append(ChertTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk. 
 
bool C_unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode an "sort preserved" unsigned integer from a string. 
 
Interface to Btree cursors. 
 
AutoPtr< ChertCursor > cursor
Cursor pointing to current chunk of postlist. 
 
bool next(Cursor *C_, int j) const
 
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry. 
 
bool read_data(const ChertTable *table, Xapian::docid did, const string &tname)
Fill list with data, and move the position to the start. 
 
Internal * next()
Advance the current position to the next document in the postlist. 
 
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for. 
 
string str(int value)
Convert int to std::string. 
 
bool is_at_end
Whether we've run off the end of the list yet. 
 
bool empty() const
Return true if there are no entries in the table. 
 
#define CHERT_MAX_DOCID
The largest docid value supported by chert. 
 
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document. 
 
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk. 
 
void merge_changes(const map< string, map< Xapian::docid, pair< char, Xapian::termcount > > > &mod_plists, const map< Xapian::docid, Xapian::termcount > &doclens, const map< string, pair< Xapian::termcount_diff, Xapian::termcount_diff > > &freq_deltas)
Merge added, removed, and changed entries. 
 
PositionList * read_position_list()
Get the list of positions of the term in the current document. 
 
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid. 
 
Xapian::docid current_did
 
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string. 
 
ChertCursor * cursor_get() const
Get a cursor for reading from the table. 
 
Xapian::docid last_did_in_chunk
The last document id in this chunk. 
 
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const ChertDatabase > db) const
Check if document did exists. 
 
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
 
AutoPtr< ChertPostList > doclen_pl
PostList for looking up document lengths. 
 
C++ class definition for chert database. 
 
Indicates an attempt to access a document not present in the database. 
 
static bool check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
 
DatabaseCorruptError indicates database corruption was detected. 
 
std::string pack_chert_postlist_key(const std::string &term)
 
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key. 
 
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID. 
 
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string. 
 
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string. 
 
string current_key
Current key pointed to by cursor. 
 
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk. 
 
Xapian::Internal::intrusive_ptr< const ChertDatabase > this_db
The database we are searching. 
 
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
 
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents. 
 
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID. 
 
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal. 
 
Postlists in chert databases. 
 
std::string pack_glass_postlist_key(const std::string &term)
 
const unsigned int CHUNKSIZE
 
Xapian::docid did
Document id we're currently at. 
 
Pack types into strings and unpack them again. 
 
static bool get_tname_from_key(const char **src, const char *end, string &tname)
 
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency. 
 
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string. 
 
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list. 
 
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
 
ChertPositionList positionlist
The position list object for this posting list. 
 
A postlist in a chert database. 
 
Xapian::termcount get_wdf() const
 
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
 
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document. 
 
bool next_in_chunk()
Move to the next item in the chunk, if possible. 
 
Abstract base class for iterating term positions in a document. 
 
A smart pointer that uses intrusive reference counting. 
 
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
 
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid. 
 
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key. 
 
string make_key(Xapian::docid did)
 
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
 
const char * pos
Position of iteration through current chunk. 
 
void next()
Advance to the next entry.