49     return key.size() > 1 && key[0] == 
'\0' && key[1] == 
'\xc0';
    61                   vector<Xapian::termcount> & doclens,
    65     string filename = dir;
    67     filename += tablename;
    81     if (rev_ptr && *rev_ptr) {
    82         if (!table.
open(*rev_ptr)) {
    84                 *out << 
"Failed to reopen table after it checked OK" << endl;
    90     AutoPtr<ChertCursor> cursor(table.
cursor_get());
    94     cursor->find_entry(
string());
    97     if (strcmp(tablename, 
"postlist") == 0) {
    99         map<Xapian::valueno, VStats> valuestats;
   105         bool have_metainfo_key = 
false;
   109         if (!cursor->after_end()) {
   110             if (cursor->current_key == 
string(
"", 1)) {
   111                 have_metainfo_key = 
true;
   120                 const char * data = cursor->current_tag.data();
   121                 const char * end = data + cursor->current_tag.size();
   124                         *out << 
"Tag containing meta information is corrupt (couldn't read last_docid)." << endl;
   126                 } 
else if (!
unpack_uint(&data, end, &doclen_lbound)) {
   128                         *out << 
"Tag containing meta information is corrupt (couldn't read doclen_lbound)." << endl;
   130                 } 
else if (!
unpack_uint(&data, end, &wdf_ubound)) {
   132                         *out << 
"Tag containing meta information is corrupt (couldn't read wdf_ubound)." << endl;
   134                 } 
else if (!
unpack_uint(&data, end, &doclen_ubound)) {
   136                         *out << 
"Tag containing meta information is corrupt (couldn't read doclen_ubound)." << endl;
   140                         *out << 
"Tag containing meta information is corrupt (couldn't read total_doclen)." << endl;
   142                 } 
else if (data != end) {
   144                         *out << 
"Tag containing meta information is corrupt (junk at end)." << endl;
   151         bool seen_doclen_initial_chunk = 
false;
   152         for ( ; !cursor->after_end(); cursor->next()) {
   153             string & key = cursor->current_key;
   159                 if (cursor->current_tag.empty()) {
   161                         *out << 
"User metadata item is empty" << endl;
   167             if (!have_metainfo_key) {
   168                 have_metainfo_key = 
true;
   170                     *out << 
"METAINFO key missing from postlist table" << endl;
   174             if (key.size() >= 2 && key[0] == 
'\0' && key[1] == 
'\xe0') {
   176                 const char * pos, * end;
   178                 if (key.size() > 2) {
   180                     if (!seen_doclen_initial_chunk) {
   182                             *out << 
"Doclen initial chunk missing" << endl;
   186                     end = pos + key.size();
   190                             *out << 
"Error unpacking docid from doclen key" << endl;
   194                     if (did <= lastdid) {
   196                             *out << 
"First did in this chunk is <= last in "   197                                     "prev chunk" << endl;
   203                 pos = cursor->current_tag.data();
   204                 end = pos + cursor->current_tag.size();
   205                 if (key.size() == 2) {
   207                     seen_doclen_initial_chunk = 
true;
   208                     if (end - pos < 2 || pos[0] || pos[1]) {
   210                             *out << 
"Initial doclen chunk has nonzero dummy fields" << endl;
   217                             *out << 
"Failed to unpack firstdid for doclen" << endl;
   227                         *out << 
"Failed to unpack last chunk flag for doclen" << endl;
   234                         *out << 
"Failed to unpack increase to last" << endl;
   244                             *out << 
"Failed to unpack doclen" << endl;
   252                     if (did > db_last_docid) {
   254                             *out << 
"document id " << did << 
" in doclen "   255                                     "stream is larger than get_last_docid() "   256                                  << db_last_docid << endl;
   260                     if (!doclens.empty()) {
   264                         if (did < doclens.size())
   265                             termlist_doclen = doclens[did];
   267                         if (doclen != termlist_doclen) {
   269                                 *out << 
"document id " << did << 
": length "   270                                      << doclen << 
" doesn't match "   271                                      << termlist_doclen << 
" in the termlist "   277                     if (pos == end) 
break;
   282                             *out << 
"Failed to unpack docid increase" << endl;
   291                             *out << 
"docid " << did << 
" > last docid "   300                     if (did != lastdid) {
   302                             *out << 
"lastdid " << lastdid << 
" != last did "   311             if (key.size() >= 2 && key[0] == 
'\0' && key[1] == 
'\xd0') {
   313                 const char * p = key.data();
   314                 const char * end = p + key.length();
   319                         *out << 
"Bad valuestats key (no slot)" << endl;
   325                 p = cursor->current_tag.data();
   326                 end = p + cursor->current_tag.size();
   328                 VStats & v = valuestats[slot];
   332                             *out << 
"Incomplete stats item in value table";
   334                             *out << 
"Frequency statistic in value table is too large";
   344                             *out << 
"Incomplete stats item in value table";
   346                             *out << 
"Lower bound statistic in value table is too large";
   353                 size_t len = end - p;
   363             if (key.size() >= 2 && key[0] == 
'\0' && key[1] == 
'\xd8') {
   365                 const char * p = key.data();
   366                 const char * end = p + key.length();
   371                         *out << 
"Bad value chunk key (no slot)" << endl;
   378                         *out << 
"Bad value chunk key (no docid)" << endl;
   384                         *out << 
"Bad value chunk key (trailing junk)" << endl;
   389                 VStats & v = valuestats[slot];
   392                 p = cursor->current_tag.data();
   393                 end = p + cursor->current_tag.size();
   399                             *out << 
"Failed to unpack value from chunk" << endl;
   414                             *out << 
"Value slot " << slot << 
" has value "   415                                     "below lower bound: '" << value << 
"' < '"   420                             *out << 
"Value slot " << slot << 
" has value "   421                                     "above upper bound: '" << value << 
"' > '"   430                             *out << 
"Failed to unpack docid delta from chunk"   436                     if (new_did <= did) {
   438                             *out << 
"docid overflowed in value chunk" << endl;
   444                     if (did > db_last_docid) {
   446                             *out << 
"document id " << did << 
" in value chunk "   447                                     "is larger than get_last_docid() "   448                                  << db_last_docid << endl;
   455             const char * pos, * end;
   459             end = pos + key.size();
   465                     *out << 
"Error unpacking termname from key" << endl;
   469             if (!current_term.empty() && term != current_term) {
   473                         *out << 
"No last chunk for term '" << current_term
   475                     current_term.resize(0);
   478                         *out << 
"Mismatch in follow-on chunk in posting list "   479                                 "for term '" << current_term << 
"' (got '"   480                              << term << 
"')" << endl;
   489                 if (term == current_term) {
   492                         *out << 
"First posting list chunk for term '" << term
   493                              << 
"' follows previous chunk for the same term"   502                 pos = cursor->current_tag.data();
   503                 end = pos + cursor->current_tag.size();
   506                         *out << 
"Failed to unpack termfreq for term '" << term
   513                         *out << 
"Failed to unpack collfreq for term '" << term
   520                         *out << 
"Failed to unpack firstdid for term '" << term
   528                 if (current_term.empty()) {
   530                         *out << 
"First chunk for term '" << current_term
   531                              << 
"' is a continuation chunk" << endl;
   538                         *out << 
"Failed to unpack did from key" << endl;
   542                 if (did <= lastdid) {
   544                         *out << 
"First did in this chunk is <= last in "   545                                 "prev chunk" << endl;
   549                 pos = cursor->current_tag.data();
   550                 end = pos + cursor->current_tag.size();
   556                     *out << 
"Failed to unpack last chunk flag" << endl;
   563                     *out << 
"Failed to unpack increase to last" << endl;
   573                         *out << 
"Failed to unpack wdf" << endl;
   581                 if (pos == end) 
break;
   586                         *out << 
"Failed to unpack docid increase" << endl;
   595                         *out << 
"docid " << did << 
" > last docid " << lastdid
   604                 if (tf != termfreq) {
   606                         *out << 
"termfreq " << termfreq << 
" != # of entries "   610                 if (cf != collfreq) {
   612                         *out << 
"collfreq " << collfreq << 
" != sum wdf " << cf
   616                 if (did != lastdid) {
   618                         *out << 
"lastdid " << lastdid << 
" != last did " << did
   622                 current_term.resize(0);
   625         if (!current_term.empty()) {
   627                 *out << 
"Last term '" << current_term << 
"' has no last chunk"   634                 *out << 
"Document length list has " << num_doclens
   635                      << 
" entries, should be " << doccount << endl;
   639         map<Xapian::valueno, VStats>::const_iterator i;
   640         for (i = valuestats.begin(); i != valuestats.end(); ++i) {
   641             if (i->second.freq != i->second.freq_real) {
   643                     *out << 
"Value stats frequency for slot " << i->first
   644                          << 
" is " << i->second.freq << 
" but recounting "   645                             "gives " << i->second.freq_real << endl;
   649     } 
else if (strcmp(tablename, 
"record") == 0) {
   654                      << 
") != get_doccount() (" << doccount << 
")" << endl;
   660         for ( ; !cursor->after_end(); cursor->next()) {
   661             string & key = cursor->current_key;
   664             const char * pos = key.data();
   665             const char * end = pos + key.size();
   670                     *out << 
"Error unpacking docid from key" << endl;
   672             } 
else if (pos != end) {
   674                     *out << 
"Extra junk in key" << endl;
   677                 if (did > db_last_docid) {
   679                         *out << 
"document id " << did << 
" in docdata table "   680                                 "is larger than get_last_docid() "   681                              << db_last_docid << endl;
   686     } 
else if (strcmp(tablename, 
"termlist") == 0) {
   690         for ( ; !cursor->after_end(); cursor->next()) {
   691             string & key = cursor->current_key;
   694             const char * pos = key.data();
   695             const char * end = pos + key.size();
   700                     *out << 
"Error unpacking docid from key" << endl;
   705             if (did > db_last_docid) {
   707                     *out << 
"document id " << did << 
" in termlist table "   708                             "is larger than get_last_docid() "   709                          << db_last_docid << endl;
   713             if (end - pos == 1 && *pos == 
'\0') {
   715                 ++num_slotsused_entries;
   718                 pos = cursor->current_tag.data();
   719                 end = pos + cursor->current_tag.size();
   723                         *out << 
"Empty value slots used tag" << endl;
   731                         *out << 
"Value slot encoding corrupt" << endl;
   740                             *out << 
"Value slot encoding corrupt" << endl;
   744                     slot += prev_slot + 1;
   745                     if (slot <= prev_slot) {
   747                             *out << 
"Value slot number overflowed ("   748                                  << prev_slot << 
" -> " << slot << 
")" << endl;
   758                     *out << 
"Extra junk in key" << endl;
   766             pos = cursor->current_tag.data();
   767             end = pos + cursor->current_tag.size();
   780                         *out << 
"doclen out of range";
   782                         *out << 
"Unexpected end of data when reading doclen";
   794                         *out << 
"termlist_size out of range";
   796                         *out << 
"Unexpected end of data when reading "   806             string current_tname;
   811                 bool got_wdf = 
false;
   813                 if (!current_tname.empty()) {
   814                     string::size_type len = 
static_cast<unsigned char>(*pos++);
   815                     if (len > current_tname.length()) {
   817                         current_wdf = len / (current_tname.length() + 1) - 1;
   818                         len %= (current_tname.length() + 1);
   821                     current_tname.resize(len);
   825                 string::size_type len = 
static_cast<unsigned char>(*pos++);
   826                 current_tname.append(pos, len);
   834                                 *out << 
"Unexpected end of data when reading "   835                                         "termlist current_wdf";
   837                                 *out << 
"Size of wdf out of range in termlist";
   847                 ++actual_termlist_size;
   848                 actual_doclen += current_wdf;
   854             if (termlist_size != actual_termlist_size) {
   856                     *out << 
"termlist_size != # of entries in termlist" << endl;
   859             if (doclen != actual_doclen) {
   861                     *out << 
"doclen != sum(wdf)" << endl;
   866             if (doclens.size() <= did) doclens.resize(did + 1);
   867             doclens[did] = actual_doclen;
   872                 *out << 
"Number of termlists (" << num_termlists
   873                      << 
") != get_doccount() (" << doccount << 
")" << endl;
   879         if (num_slotsused_entries > doccount &&
   882                 *out << 
"More slots-used entries (" << num_slotsused_entries
   883                      << 
") then documents (" << doccount << 
")" << endl;
   886     } 
else if (strcmp(tablename, 
"position") == 0) {
   888         for ( ; !cursor->after_end(); cursor->next()) {
   889             string & key = cursor->current_key;
   892             const char * pos = key.data();
   893             const char * end = pos + key.size();
   898                     *out << 
"Error unpacking docid from key" << endl;
   903             if (did > db_last_docid) {
   905                     *out << 
"document id " << did << 
" in position table "   906                             "is larger than get_last_docid() "   907                          << db_last_docid << endl;
   909             } 
else if (!doclens.empty()) {
   913                 if (did >= doclens.size() || doclens[did] == 0) {
   915                         *out << 
"Position list entry for document " << did
   916                              << 
" which doesn't exist or has no terms" << endl;
   923                     *out << 
"No termname in key" << endl;
   930             const string & data = cursor->current_tag;
   932             end = pos + data.size();
   937                     *out << tablename << 
" table: Position list data corrupt"   949                 rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last);
   952                 while (p != pos_last) {
   954                     p = rd.decode_interpolative_next();
   957                             *out << tablename << 
" table: Positions not "   958                                     "strictly monotonically increasing" << endl;
   964                 if (ok && !rd.check_all_gone()) {
   966                         *out << tablename << 
" table: Junk after position data"   974             *out << tablename << 
" table: Don't know how to check structure\n"   981             *out << tablename << 
" table structure checked OK\n";
   983             *out << tablename << 
" table errors found: " << errors << 
"\n";
 Class to hold statistics for a given slot. 
 
chert_tablesize_t get_entry_count() const
Return a count of the number of entries in the table. 
 
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database. 
 
Class managing a Btree table in a Chert database. 
 
void open()
Open the btree at the latest revision. 
 
std::string upper_bound
An upper bound on the values stored in the given value slot. 
 
Types used by chert backend and the Btree manager. 
 
unsigned int chert_revision_number_t
A type used to store a revision number for a table. 
 
Xapian::doccount freq
The number of documents which have a (non-empty) value stored in the slot. 
 
std::string lower_bound
A lower bound on the values stored in the given value slot. 
 
Xapian::doccount freq_real
 
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms. 
 
bool C_unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode an "sort preserved" unsigned integer from a string. 
 
Interface to Btree cursors. 
 
Public interfaces for the Xapian library. 
 
static bool is_user_metadata_key(const string &key)
 
Read a stream created by BitWriter. 
 
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string. 
 
ChertCursor * cursor_get() const
Get a cursor for reading from the table. 
 
Classes to encode/decode a bitstream. 
 
Xapian::termpos decode(Xapian::termpos outof, bool force=false)
 
std::string get_description() const
Return a string describing this object. 
 
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string. 
 
size_t check_chert_table(const char *tablename, const string &dir, chert_revision_number_t *rev_ptr, int opts, vector< Xapian::termcount > &doclens, Xapian::doccount doccount, Xapian::docid db_last_docid, ostream *out)
 
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents. 
 
Pack types into strings and unpack them again. 
 
unsigned valueno
The number for a value slot in a document. 
 
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query. 
 
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string. 
 
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string. 
 
static void check(const char *tablename, const std::string &path, chert_revision_number_t *rev_ptr, int opts, std::ostream *out)
 
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string. 
 
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document. 
 
DatabaseError indicates some sort of database related error. 
 
Wrapper around standard unique_ptr template.