xapian-core  1.4.26
chert_dbstats.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include "chert_dbstats.h"
24 
25 #include "chert_postlist.h"
26 
27 using namespace std;
28 
30 static const string DATABASE_STATS_KEY(1, '\0');
31 
32 void
34 {
35  string data;
36  if (!postlist_table.get_exact_entry(DATABASE_STATS_KEY, data)) {
37  // If there's no entry yet, then all the values are zero.
38  total_doclen = 0;
39  last_docid = 0;
40  doclen_lbound = 0;
41  doclen_ubound = 0;
42  wdf_ubound = 0;
43  return;
44  }
45 
46  const char * p = data.data();
47  const char * end = p + data.size();
48 
49  if (unpack_uint(&p, end, &last_docid) &&
50  unpack_uint(&p, end, &doclen_lbound) &&
51  unpack_uint(&p, end, &wdf_ubound) &&
52  unpack_uint(&p, end, &doclen_ubound) &&
53  unpack_uint_last(&p, end, &total_doclen)) {
54  // doclen_ubound should always be >= wdf_ubound, so we store the
55  // difference as it may encode smaller. wdf_ubound is likely to
56  // be larger than doclen_lbound.
57  doclen_ubound += wdf_ubound;
58  return;
59  }
60 
61  if (p)
62  throw Xapian::DatabaseCorruptError("Bad encoded DB stats (overflowed)");
63 
64  throw Xapian::DatabaseCorruptError("Bad encoded DB stats (out of data)");
65 }
66 
67 void
69 {
70  string data;
71  pack_uint(data, last_docid);
72  pack_uint(data, doclen_lbound);
73  pack_uint(data, wdf_ubound);
74  // doclen_ubound should always be >= wdf_ubound, so we store the
75  // difference as it may encode smaller. wdf_ubound is likely to
76  // be larger than doclen_lbound.
77  pack_uint(data, doclen_ubound - wdf_ubound);
78  // Micro-optimisation: total_doclen is likely to be the largest value, so
79  // store it last as pack_uint_last() uses a slightly more compact encoding
80  // - this could save us a few bytes!
81  pack_uint_last(data, total_doclen);
82  postlist_table.add(DATABASE_STATS_KEY, data);
83 }
STL namespace.
Chert class for database statistics.
static const string DATABASE_STATS_KEY(1, '\0')
The key in the postlist table which we use to store our encoded statistics.
void read(ChertPostListTable &postlist_table)
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
Definition: pack.h:93
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
Definition: chert_table.cc:978
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
Postlists in chert databases.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
Definition: pack.h:111
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
void write(ChertPostListTable &postlist_table) const