sourcedoc/html/glass__dbcheck_8cc_source.html

 /* Copyright 1999,2000,2001 BrightStation PLC
  * Copyright 2002-2022 Olly Betts
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
  * License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
  * USA
  */

 #include <config.h>

 #include "glass_dbcheck.h"

 #include "bitstream.h"

 #include "internaltypes.h"

 #include "glass_check.h"
 #include "glass_cursor.h"
 #include "glass_defs.h"
 #include "glass_table.h"
 #include "glass_version.h"
 #include "pack.h"
 #include "backends/valuestats.h"

 #include <xapian.h>

 #include "filetests.h"
 #include "autoptr.h"
 #include <ostream>
 #include <vector>

 using namespace std;

 static inline bool
 is_user_metadata_key(const string & key)
 {
     return key.size() > 1 && key[0] == '\0' && key[1] == '\xc0';
 }

 struct VStats : public ValueStats {
     Xapian::doccount freq_real;

     VStats() : ValueStats(), freq_real(0) {}
 };

 size_t
 check_glass_table(const char * tablename, const string &db_dir, int fd,
                   off_t offset_,
                   const GlassVersion & version_file, int opts,
                   vector<Xapian::termcount> & doclens, ostream * out)
 {
     Xapian::docid db_last_docid = version_file.get_last_docid();
     if (out)
         *out << tablename << ":\n";
     if (fd < 0) {
         if (strcmp(tablename, "postlist") != 0) {
             // Other filenames are created lazily, so may not exist.
             string filename(db_dir);
             filename += '/';
             filename += tablename;
             filename += "." GLASS_TABLE_EXTENSION;
             if (!file_exists(filename)) {
                 if (out) {
                     if (strcmp(tablename, "termlist") == 0) {
                         *out << "Not present.\n";
                     } else {
                         *out << "Lazily created, and not yet used.\n";
                     }
                     *out << endl;
                 }
                 return 0;
             }
         }
     }

     // Check the btree structure.
     AutoPtr<GlassTable> table(
             GlassTableCheck::check(tablename, db_dir, fd, offset_,
                                    version_file, opts, out));

     // Now check the glass structures inside the btree.
     AutoPtr<GlassCursor> cursor(table->cursor_get());

     size_t errors = 0;

     cursor->find_entry(string());
     cursor->next(); // Skip the empty entry.

     if (strcmp(tablename, "postlist") == 0) {
         // Now check the structure of each postlist in the table.
         map<Xapian::valueno, VStats> valuestats;
         string current_term;
         Xapian::docid lastdid = 0;
         Xapian::termcount termfreq = 0, collfreq = 0;
         Xapian::termcount tf = 0, cf = 0;
         Xapian::doccount num_doclens = 0;

         for ( ; !cursor->after_end(); cursor->next()) {
             string & key = cursor->current_key;

             if (is_user_metadata_key(key)) {
                 // User metadata can be anything, so we can't do any particular
                 // checks on it other than to check that the tag isn't empty.
                 cursor->read_tag();
                 if (cursor->current_tag.empty()) {
                     if (out)
                         *out << "User metadata item is empty" << endl;
                     ++errors;
                 }
                 continue;
             }

             if (key.size() >= 2 && key[0] == '\0' && key[1] == '\xe0') {
                 // doclen chunk
                 const char * pos, * end;
                 Xapian::docid did = 1;
                 if (key.size() > 2) {
                     // Non-initial chunk.
                     pos = key.data();
                     end = pos + key.size();
                     pos += 2;
                     if (!unpack_uint_preserving_sort(&pos, end, &did)) {
                         if (out)
                             *out << "Error unpacking docid from doclen key" << endl;
                         ++errors;
                         continue;
                     }
                     if (did <= lastdid) {
                         if (out)
                             *out << "First did in this doclen chunk is <= last in "
                                     "prev chunk" << endl;
                         ++errors;
                     }
                 }

                 cursor->read_tag();
                 pos = cursor->current_tag.data();
                 end = pos + cursor->current_tag.size();
                 if (key.size() == 2) {
                     // Initial chunk.
                     if (end - pos < 2 || pos[0] || pos[1]) {
                         if (out)
                             *out << "Initial doclen chunk has nonzero dummy fields" << endl;
                         ++errors;
                         continue;
                     }
                     pos += 2;
                     if (!unpack_uint(&pos, end, &did)) {
                         if (out)
                             *out << "Failed to unpack firstdid for doclen" << endl;
                         ++errors;
                         continue;
                     }
                     ++did;
                 }

                 bool is_last_chunk;
                 if (!unpack_bool(&pos, end, &is_last_chunk)) {
                     if (out)
                         *out << "Failed to unpack last chunk flag for doclen" << endl;
                     ++errors;
                     continue;
                 }
                 // Read what the final document ID in this chunk is.
                 if (!unpack_uint(&pos, end, &lastdid)) {
                     if (out)
                         *out << "Failed to unpack increase to last" << endl;
                     ++errors;
                     continue;
                 }
                 lastdid += did;
                 bool bad = false;
                 while (true) {
                     Xapian::termcount doclen;
                     if (!unpack_uint(&pos, end, &doclen)) {
                         if (out)
                             *out << "Failed to unpack doclen" << endl;
                         ++errors;
                         bad = true;
                         break;
                     }

                     ++num_doclens;

                     if (did > db_last_docid) {
                         if (out)
                             *out << "document id " << did << " in doclen "
                                     "stream is larger than get_last_docid() "
                                  << db_last_docid << endl;
                         ++errors;
                     }

                     if (!doclens.empty()) {
                         // In glass, a document without terms doesn't get a
                         // termlist entry.
                         Xapian::termcount termlist_doclen = 0;
                         if (did < doclens.size())
                             termlist_doclen = doclens[did];

                         if (doclen != termlist_doclen) {
                             if (out)
                                 *out << "document id " << did << ": length "
                                      << doclen << " doesn't match "
                                      << termlist_doclen << " in the termlist "
                                         "table" << endl;
                             ++errors;
                         }
                     }

                     if (pos == end) break;

                     Xapian::docid inc;
                     if (!unpack_uint(&pos, end, &inc)) {
                         if (out)
                             *out << "Failed to unpack docid increase" << endl;
                         ++errors;
                         bad = true;
                         break;
                     }
                     ++inc;
                     did += inc;
                     if (did > lastdid) {
                         if (out)
                             *out << "docid " << did << " > last docid "
                                  << lastdid << endl;
                         ++errors;
                     }
                 }
                 if (bad) {
                     continue;
                 }
                 if (is_last_chunk) {
                     if (did != lastdid) {
                         if (out)
                             *out << "lastdid " << lastdid << " != last did "
                                  << did << endl;
                         ++errors;
                     }
                 }

                 continue;
             }

             if (key.size() >= 2 && key[0] == '\0' && key[1] == '\xd0') {
                 // Value stats.
                 const char * p = key.data();
                 const char * end = p + key.length();
                 p += 2;
                 Xapian::valueno slot;
                 if (!unpack_uint_last(&p, end, &slot)) {
                     if (out)
                         *out << "Bad valuestats key (no slot)" << endl;
                     ++errors;
                     continue;
                 }

                 cursor->read_tag();
                 p = cursor->current_tag.data();
                 end = p + cursor->current_tag.size();

                 VStats & v = valuestats[slot];
                 if (!unpack_uint(&p, end, &v.freq)) {
                     if (out) {
                         if (*p == 0) {
                             *out << "Incomplete stats item in value table";
                         } else {
                             *out << "Frequency statistic in value table is too large";
                         }
                         *out << endl;
                     }
                     ++errors;
                     continue;
                 }
                 if (!unpack_string(&p, end, v.lower_bound)) {
                     if (out) {
                         if (*p == 0) {
                             *out << "Incomplete stats item in value table";
                         } else {
                             *out << "Lower bound statistic in value table is too large";
                         }
                         *out << endl;
                     }
                     ++errors;
                     continue;
                 }
                 size_t len = end - p;
                 if (len == 0) {
                     v.upper_bound = v.lower_bound;
                 } else {
                     v.upper_bound.assign(p, len);
                 }

                 continue;
             }

             if (key.size() >= 2 && key[0] == '\0' && key[1] == '\xd8') {
                 // Value stream chunk.
                 const char * p = key.data();
                 const char * end = p + key.length();
                 p += 2;
                 Xapian::valueno slot;
                 if (!unpack_uint(&p, end, &slot)) {
                     if (out)
                         *out << "Bad value chunk key (no slot)" << endl;
                     ++errors;
                     continue;
                 }
                 Xapian::docid did;
                 if (!unpack_uint_preserving_sort(&p, end, &did)) {
                     if (out)
                         *out << "Bad value chunk key (no docid)" << endl;
                     ++errors;
                     continue;
                 }
                 if (p != end) {
                     if (out)
                         *out << "Bad value chunk key (trailing junk)" << endl;
                     ++errors;
                     continue;
                 }

                 VStats & v = valuestats[slot];

                 cursor->read_tag();
                 p = cursor->current_tag.data();
                 end = p + cursor->current_tag.size();

                 while (true) {
                     string value;
                     if (!unpack_string(&p, end, value)) {
                         if (out)
                             *out << "Failed to unpack value from chunk" << endl;
                         ++errors;
                         break;
                     }

                     ++v.freq_real;

                     // FIXME: Cross-check that docid did has value slot (and
                     // vice versa - that there's a value here if the slot entry
                     // says so).

                     // FIXME: Check if the bounds are tight?  Or is that better
                     // as a separate tool which can also update the bounds?
                     if (value < v.lower_bound) {
                         if (out)
                             *out << "Value slot " << slot << " has value "
                                     "below lower bound: '" << value << "' < '"
                                  << v.lower_bound << "'" << endl;
                         ++errors;
                     } else if (value > v.upper_bound) {
                         if (out)
                             *out << "Value slot " << slot << " has value "
                                     "above upper bound: '" << value << "' > '"
                                  << v.upper_bound << "'" << endl;
                         ++errors;
                     }

                     if (p == end) break;
                     Xapian::docid delta;
                     if (!unpack_uint(&p, end, &delta)) {
                         if (out)
                             *out << "Failed to unpack docid delta from chunk"
                                  << endl;
                         ++errors;
                         break;
                     }
                     Xapian::docid new_did = did + delta + 1;
                     if (new_did <= did) {
                         if (out)
                             *out << "docid overflowed in value chunk" << endl;
                         ++errors;
                         break;
                     }
                     did = new_did;

                     if (did > db_last_docid) {
                         if (out)
                             *out << "document id " << did << " in value chunk "
                                     "is larger than get_last_docid() "
                                  << db_last_docid << endl;
                         ++errors;
                     }
                 }
                 continue;
             }

             const char * pos, * end;

             // Get term from key.
             pos = key.data();
             end = pos + key.size();

             string term;
             Xapian::docid did;
             if (!unpack_string_preserving_sort(&pos, end, term)) {
                 if (out)
                     *out << "Error unpacking termname from key" << endl;
                 ++errors;
                 continue;
             }
             if (!current_term.empty() && term != current_term) {
                 // The term changed unexpectedly.
                 if (pos == end) {
                     if (out)
                         *out << "No last chunk for term '" << current_term
                              << "'" << endl;
                     current_term.resize(0);
                 } else {
                     if (out)
                         *out << "Mismatch in follow-on chunk in posting list "
                                 "for term '" << current_term << "' (got '"
                              << term << "')" << endl;
                     current_term = term;
                     tf = cf = 0;
                     lastdid = 0;
                 }
                 ++errors;
             }
             if (pos == end) {
                 // First chunk.
                 if (term == current_term) {
                     // This probably isn't possible.
                     if (out)
                         *out << "First posting list chunk for term '" << term
                              << "' follows previous chunk for the same term"
                              << endl;
                     ++errors;
                 }
                 current_term = term;
                 tf = cf = 0;

                 // Unpack extra header from first chunk.
                 cursor->read_tag();
                 pos = cursor->current_tag.data();
                 end = pos + cursor->current_tag.size();
                 if (!unpack_uint(&pos, end, &termfreq)) {
                     if (out)
                         *out << "Failed to unpack termfreq for term '" << term
                              << "'" << endl;
                     ++errors;
                     continue;
                 }
                 if (!unpack_uint(&pos, end, &collfreq)) {
                     if (out)
                         *out << "Failed to unpack collfreq for term '" << term
                              << "'" << endl;
                     ++errors;
                     continue;
                 }
                 if (!unpack_uint(&pos, end, &did)) {
                     if (out)
                         *out << "Failed to unpack firstdid for term '" << term
                              << "'" << endl;
                     ++errors;
                     continue;
                 }
                 ++did;
             } else {
                 // Continuation chunk.
                 if (current_term.empty()) {
                     if (out)
                         *out << "First chunk for term '" << term
                              << "' is a continuation chunk" << endl;
                     ++errors;
                     current_term = term;
                 }
                 AssertEq(current_term, term);
                 if (!unpack_uint_preserving_sort(&pos, end, &did)) {
                     if (out)
                         *out << "Failed to unpack did from key" << endl;
                     ++errors;
                     continue;
                 }
                 if (did <= lastdid) {
                     if (out)
                         *out << "First did in this chunk is <= last in "
                                 "prev chunk" << endl;
                     ++errors;
                 }
                 cursor->read_tag();
                 pos = cursor->current_tag.data();
                 end = pos + cursor->current_tag.size();
             }

             bool is_last_chunk;
             if (!unpack_bool(&pos, end, &is_last_chunk)) {
                 if (out)
                     *out << "Failed to unpack last chunk flag" << endl;
                 ++errors;
                 continue;
             }
             // Read what the final document ID in this chunk is.
             if (!unpack_uint(&pos, end, &lastdid)) {
                 if (out)
                     *out << "Failed to unpack increase to last" << endl;
                 ++errors;
                 continue;
             }
             lastdid += did;
             bool bad = false;
             while (true) {
                 Xapian::termcount wdf;
                 if (!unpack_uint(&pos, end, &wdf)) {
                     if (out)
                         *out << "Failed to unpack wdf" << endl;
                     ++errors;
                     bad = true;
                     break;
                 }
                 ++tf;
                 cf += wdf;

                 if (pos == end) break;

                 Xapian::docid inc;
                 if (!unpack_uint(&pos, end, &inc)) {
                     if (out)
                         *out << "Failed to unpack docid increase" << endl;
                     ++errors;
                     bad = true;
                     break;
                 }
                 ++inc;
                 did += inc;
                 if (did > lastdid) {
                     if (out)
                         *out << "docid " << did << " > last docid " << lastdid
                              << endl;
                     ++errors;
                 }
             }
             if (bad) {
                 continue;
             }
             if (is_last_chunk) {
                 if (tf != termfreq) {
                     if (out)
                         *out << "termfreq " << termfreq << " != # of entries "
                              << tf << endl;
                     ++errors;
                 }
                 if (cf != collfreq) {
                     if (out)
                         *out << "collfreq " << collfreq << " != sum wdf " << cf
                              << endl;
                     ++errors;
                 }
                 if (did != lastdid) {
                     if (out)
                         *out << "lastdid " << lastdid << " != last did " << did
                              << endl;
                     ++errors;
                 }
                 current_term.resize(0);
             }
         }
         if (!current_term.empty()) {
             if (out)
                 *out << "Last term '" << current_term << "' has no last chunk"
                      << endl;
             ++errors;
         }

         Xapian::doccount doccount = version_file.get_doccount();
         if (num_doclens != doccount) {
             if (out)
                 *out << "Document length list has " << num_doclens
                      << " entries, should be " << doccount << endl;
             ++errors;
         }

         map<Xapian::valueno, VStats>::const_iterator i;
         for (i = valuestats.begin(); i != valuestats.end(); ++i) {
             if (i->second.freq != i->second.freq_real) {
                 if (out)
                     *out << "Value stats frequency for slot " << i->first
                          << " is " << i->second.freq << " but recounting "
                             "gives " << i->second.freq_real << endl;
                 ++errors;
             }
         }
     } else if (strcmp(tablename, "docdata") == 0) {
         // glass doesn't store a docdata entry if the document data is empty,
         // so we can only check there aren't more docdata entries than
         // documents.
         Xapian::doccount doccount = version_file.get_doccount();
         if (table->get_entry_count() > doccount) {
             if (out)
                 *out << "More document data (" << table->get_entry_count()
                      << ") then documents (" << doccount << ")" << endl;
             ++errors;
         }

         // Now check the contents of the docdata table.
         for ( ; !cursor->after_end(); cursor->next()) {
             string & key = cursor->current_key;

             // Get docid from key.
             const char * pos = key.data();
             const char * end = pos + key.size();

             Xapian::docid did;
             if (!unpack_uint_preserving_sort(&pos, end, &did)) {
                 if (out)
                     *out << "Error unpacking docid from key" << endl;
                 ++errors;
                 continue;
             }
             if (pos != end) {
                 if (out)
                     *out << "Extra junk in key" << endl;
                 ++errors;
             } else {
                 if (did > db_last_docid) {
                     if (out)
                         *out << "document id " << did << " in docdata table "
                                 "is larger than get_last_docid() "
                              << db_last_docid << endl;
                     ++errors;
                 }
             }

             // Fetch and decompress the document data to catch problems with
             // the splitting into multiple items, corruption of the compressed
             // data, etc.
             cursor->read_tag();
             if (cursor->current_tag.empty()) {
                 // We shouldn't store empty document data.
                 if (out)
                     *out << "Empty document data explicitly stored for "
                             "document id " << did << endl;
                 ++errors;
             }
         }
     } else if (strcmp(tablename, "termlist") == 0) {
         // Now check the contents of the termlist table.
         Xapian::doccount num_termlists = 0;
         Xapian::doccount num_slotsused_entries = 0;
         for ( ; !cursor->after_end(); cursor->next()) {
             string & key = cursor->current_key;

             // Get docid from key.
             const char * pos = key.data();
             const char * end = pos + key.size();

             Xapian::docid did;
             if (!unpack_uint_preserving_sort(&pos, end, &did)) {
                 if (out)
                     *out << "Error unpacking docid from key" << endl;
                 ++errors;
                 continue;
             }

             if (did > db_last_docid) {
                 if (out)
                     *out << "document id " << did << " in termlist table "
                             "is larger than get_last_docid() "
                          << db_last_docid << endl;
                 ++errors;
             }

             if (end - pos == 1 && *pos == '\0') {
                 // Value slots used entry.
                 ++num_slotsused_entries;
                 cursor->read_tag();

                 pos = cursor->current_tag.data();
                 end = pos + cursor->current_tag.size();

                 if (pos == end) {
                     if (out) {
                         *out << "document id " << did
                              << ": Empty value slots used tag\n";
                     }
                     ++errors;
                     continue;
                 }

                 Xapian::valueno prev_slot;
                 if (!unpack_uint(&pos, end, &prev_slot)) {
                     if (out) {
                         *out << "document id " << did
                              << ": Value slot encoding corrupt\n";
                     }
                     ++errors;
                     continue;
                 }

                 while (pos != end) {
                     Xapian::valueno slot;
                     if (!unpack_uint(&pos, end, &slot)) {
                         if (out) {
                             *out << "document id " << did
                                  << ": Value slot encoding corrupt\n";
                         }
                         ++errors;
                         break;
                     }
                     slot += prev_slot + 1;
                     if (slot <= prev_slot) {
                         if (out) {
                             *out << "document id " << did
                                  << ": Value slot number overflowed ("
                                  << prev_slot << " -> " << slot << ")\n";
                         }
                         ++errors;
                     }
                     prev_slot = slot;
                 }
                 continue;
             }

             if (pos != end) {
                 if (out) {
                     *out << "document id " << did << ": Extra junk in key\n";
                 }
                 ++errors;
                 continue;
             }

             ++num_termlists;
             cursor->read_tag();

             pos = cursor->current_tag.data();
             end = pos + cursor->current_tag.size();

             if (pos == end) {
                 // Empty termlist.
                 continue;
             }

             Xapian::termcount doclen, termlist_size;

             // Read doclen
             if (!unpack_uint(&pos, end, &doclen)) {
                 if (out) {
                     *out << "document id " << did;
                     if (pos != 0) {
                         *out << ": doclen out of range\n";
                     } else {
                         *out << ": Unexpected end of data when reading "
                                 "doclen\n";
                     }
                 }
                 ++errors;
                 continue;
             }

             // Check doclen with doclen lower and upper bounds
             if (doclen > version_file.get_doclength_upper_bound()) {
                 if (out) {
                     *out << "document id " << did
                          << ": doclen " << doclen << " > upper bound "
                          << version_file.get_doclength_upper_bound() << '\n';
                 }
                 ++errors;
             } else if (doclen < version_file.get_doclength_lower_bound() &&
                        doclen != 0) {
                 if (out) {
                     *out << "document id " << did
                          << ": doclen " << doclen << " < lower bound "
                          << version_file.get_doclength_lower_bound() << '\n';
                 }
                 ++errors;
             }

             // Read termlist_size
             if (!unpack_uint(&pos, end, &termlist_size)) {
                 if (out) {
                     *out << "document id " << did;
                     if (pos != 0) {
                         *out << ": termlist_size out of range\n";
                     } else {
                         *out << ": Unexpected end of data when reading "
                                 "termlist_size\n";
                     }
                 }
                 ++errors;
                 continue;
             }

             Xapian::termcount actual_doclen = 0, actual_termlist_size = 0;
             string current_tname;

             bool bad = false;
             while (pos != end) {
                 Xapian::doccount current_wdf = 0;
                 bool got_wdf = false;
                 // If there was a previous term, how much to reuse.
                 if (!current_tname.empty()) {
                     string::size_type len = static_cast<unsigned char>(*pos++);
                     if (len > current_tname.length()) {
                         // The wdf was squeezed into the same byte.
                         current_wdf = len / (current_tname.length() + 1) - 1;
                         len %= (current_tname.length() + 1);
                         got_wdf = true;
                     }
                     current_tname.resize(len);
                 }
                 // What to append (note len must be positive, since just truncating
                 // always takes us backwards in the sort order)
                 string::size_type len = static_cast<unsigned char>(*pos++);
                 current_tname.append(pos, len);
                 pos += len;

                 if (!got_wdf) {
                     // Read wdf
                     if (!unpack_uint(&pos, end, &current_wdf)) {
                         if (out) {
                             *out << "document id " << did;
                             if (pos == 0) {
                                 *out << ": Unexpected end of data when reading "
                                         "termlist current_wdf\n";
                             } else {
                                 *out << ": Size of wdf out of range in "
                                         "termlist\n";
                             }
                         }
                         ++errors;
                         bad = true;
                         break;
                     }
                 }

                 ++actual_termlist_size;
                 actual_doclen += current_wdf;
             }
             if (bad) {
                 continue;
             }

             if (termlist_size != actual_termlist_size) {
                 if (out) {
                     *out << "document id " << did << ": termlist_size "
                          << termlist_size << " != # of entries in termlist "
                          << actual_termlist_size << '\n';
                 }
                 ++errors;
             }
             if (doclen != actual_doclen) {
                 if (out) {
                     *out << "document id " << did << ": length " << doclen
                          << " != sum(wdf) " << actual_doclen << '\n';
                 }
                 ++errors;
             }

             // + 1 so that did is a valid subscript.
             if (doclens.size() <= did) doclens.resize(did + 1);
             doclens[did] = actual_doclen;
         }

         Xapian::doccount doccount = version_file.get_doccount();

         // glass doesn't store a termlist entry if there are no terms, so we
         // can only check there aren't more termlists than documents.
         if (num_termlists > doccount) {
             if (out)
                 *out << "More termlists (" << num_termlists
                      << ") then documents (" << doccount << ")" << endl;
             ++errors;
         }

         // glass doesn't store a valueslots used entry if there are no terms,
         // so we can only check there aren't more such entries than documents.
         if (num_slotsused_entries > doccount) {
             if (out)
                 *out << "More slots-used entries (" << num_slotsused_entries
                      << ") then documents (" << doccount << ")" << endl;
             ++errors;
         }
     } else if (strcmp(tablename, "position") == 0) {
         // Now check the contents of the position table.
         for ( ; !cursor->after_end(); cursor->next()) {
             string & key = cursor->current_key;

             // Get docid from key.
             const char * pos = key.data();
             const char * end = pos + key.size();

             string term;
             if (!unpack_string_preserving_sort(&pos, end, term)) {
                 if (out)
                     *out << "Error unpacking term from key" << endl;
                 ++errors;
                 continue;
             }

             Xapian::docid did;
             if (!unpack_uint_preserving_sort(&pos, end, &did)) {
                 if (out)
                     *out << "Error unpacking docid from key" << endl;
                 ++errors;
                 continue;
             }

             if (pos != end) {
                 if (out)
                     *out << "Extra junk in key with docid " << did << endl;
                 ++errors;
                 continue;
             }

             if (did > db_last_docid) {
                 if (out)
                     *out << "document id " << did << " in position table "
                             "is larger than get_last_docid() "
                          << db_last_docid << endl;
                 ++errors;
             } else if (!doclens.empty()) {
                 // In glass, a document without terms doesn't get a
                 // termlist entry, so we can't tell the difference
                 // easily.
                 if (did >= doclens.size() || doclens[did] == 0) {
                     if (out)
                         *out << "Position list entry for document " << did
                              << " which doesn't exist or has no terms" << endl;
                     ++errors;
                 }
             }

             cursor->read_tag();

             const string & data = cursor->current_tag;
             pos = data.data();
             end = pos + data.size();

             Xapian::termpos pos_last;
             if (!unpack_uint(&pos, end, &pos_last)) {
                 if (out)
                     *out << tablename << " table: Position list data corrupt"
                          << endl;
                 ++errors;
                 continue;
             }
             if (pos == end) {
                 // Special case for single entry position list.
             } else {
                 // Skip the header we just read.
                 BitReader rd(data, pos - data.data());
                 Xapian::termpos pos_first = rd.decode(pos_last);
                 Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
                 rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last);
                 Xapian::termpos p = rd.decode_interpolative_next();
                 bool ok = true;
                 while (p != pos_last) {
                     Xapian::termpos pos_prev = p;
                     p = rd.decode_interpolative_next();
                     if (p <= pos_prev) {
                         if (out)
                             *out << tablename << " table: Positions not "
                                     "strictly monotonically increasing" << endl;
                         ++errors;
                         ok = false;
                         break;
                     }
                 }
                 if (ok && !rd.check_all_gone()) {
                     if (out)
                         *out << tablename << " table: Junk after position data"
                              << endl;
                     ++errors;
                 }
             }
         }
     } else {
         if (out)
             *out << tablename << " table: Full structure check not "
                 "implemented, checking readability\n";
         for ( ; !cursor->after_end(); cursor->next()) {
             cursor->read_tag();
         }
     }

     if (out) {
         if (!errors)
             *out << tablename << " table structure checked OK\n";
         else
             *out << tablename << " table errors found: " << errors << "\n";
         *out << endl;
     }

     return errors;
 }
GlassVersion::get_doclength_upper_bound
Xapian::termcount get_doclength_upper_bound() const
Definition: glass_version.h:217

glass_version.h
GlassVersion class.

ValueStats
Class to hold statistics for a given slot.
Definition: valuestats.h:29

valuestats.h
Statistics about values.

AssertEq
#define AssertEq(A, B)
Definition: omassert.h:124

is_user_metadata_key
static bool is_user_metadata_key(const string &key)
Definition: glass_dbcheck.cc:49

GlassVersion
The GlassVersion class manages the revision files.
Definition: glass_version.h:94

opts
static const char * opts
Definition: xapian-progsrv.cc:39

VStats::VStats
VStats()
Definition: glass_dbcheck.cc:57

VStats
Definition: chert_dbcheck.cc:52

config.h

std
STL namespace.

glass_defs.h
Definitions, types, etc for use inside glass.

ValueStats::upper_bound
std::string upper_bound
An upper bound on the values stored in the given value slot.
Definition: valuestats.h:41

filetests.h
Utility functions for testing files.

check_glass_table
size_t check_glass_table(const char *tablename, const string &db_dir, int fd, off_t offset_, const GlassVersion &version_file, int opts, vector< Xapian::termcount > &doclens, ostream *out)
Definition: glass_dbcheck.cc:61

GLASS_TABLE_EXTENSION
#define GLASS_TABLE_EXTENSION
Glass table extension.
Definition: glass_defs.h:27

GlassVersion::get_last_docid
Xapian::docid get_last_docid() const
Definition: glass_version.h:211

ValueStats::freq
Xapian::doccount freq
The number of documents which have a (non-empty) value stored in the slot.
Definition: valuestats.h:33

ValueStats::lower_bound
std::string lower_bound
A lower bound on the values stored in the given value slot.
Definition: valuestats.h:37

VStats::freq_real
Xapian::doccount freq_real
Definition: chert_dbcheck.cc:53

Xapian::termcount
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72

glass_check.h
Btree checking.

xapian.h
Public interfaces for the Xapian library.

GlassVersion::get_doclength_lower_bound
Xapian::termcount get_doclength_lower_bound() const
Definition: glass_version.h:213

Xapian::BitReader
Read a stream created by BitWriter.
Definition: bitstream.h:64

unpack_string_preserving_sort
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:562

unpack_uint_preserving_sort
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
Definition: pack.h:318

glass_table.h
Btree implementation.

bitstream.h
Classes to encode/decode a bitstream.

GlassTableCheck::check
static GlassTableCheck * check(const char *tablename, const std::string &path, int fd, off_t offset_, const GlassVersion &version_file, int opts, std::ostream *out)
Definition: glass_check.cc:263

Xapian::BitReader::decode
Xapian::termpos decode(Xapian::termpos outof, bool force=false)
Definition: bitstream.cc:176

unpack_bool
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
Definition: pack.h:69

Xapian::doccount
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38

glass_cursor.h
Interface to Btree cursors.

pack.h
Pack types into strings and unpack them again.

Xapian::valueno
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108

Xapian::termpos
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83

unpack_uint_last
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
Definition: pack.h:111

unpack_uint
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413

unpack_string
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
Definition: pack.h:504

Xapian::docid
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52

glass_dbcheck.h
Check a glass table.

file_exists
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39

internaltypes.h
Types used internally.

autoptr.h
Wrapper around standard unique_ptr template.

GlassVersion::get_doccount
Xapian::doccount get_doccount() const
Definition: glass_version.h:207