xapian-core  1.4.25
api_valuestats.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2008,2009,2011,2017 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_valuestats.h"
26 
27 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
30 
31 #include "apitest.h"
32 
33 using namespace std;
34 
35 // #######################################################################
36 // # Tests start here
37 
39 DEFINE_TESTCASE(valuestats1, writable && valuestats) {
41 
42  // Check that counts are initially zero.
43  TEST_EQUAL(db_w.get_value_freq(0), 0);
44  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
45  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
46  TEST_EQUAL(db_w.get_value_freq(1), 0);
47  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
48  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
49 
50  Xapian::Document doc;
51  doc.add_value(0, "hello");
52 
53  // Check that statistics for the correct value slot increase when document
54  // is added. (Check slot 1 first, so that cache invalidation of the last
55  // slot read also gets checked.)
56  db_w.add_document(doc);
57  TEST_EQUAL(db_w.get_value_freq(1), 0);
58  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
59  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
60  TEST_EQUAL(db_w.get_value_freq(0), 1);
61  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
62  TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
63 
64  // Check that statistics work correctly when second document is added.
65  doc = Xapian::Document();
66  doc.add_value(0, "world");
67  doc.add_value(1, "cheese");
68  db_w.replace_document(2, doc);
69  TEST_EQUAL(db_w.get_value_freq(0), 2);
70  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
71  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
72  TEST_EQUAL(db_w.get_value_freq(1), 1);
73  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
74  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
75 
76  // Deleting a document affects the count, but probably not the bounds.
77  // It may with a multi-database, if the document which was deleted
78  // was the only one in that shard.
79  db_w.delete_document(1);
80  TEST_EQUAL(db_w.get_value_freq(1), 1);
81  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
82  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
83  TEST_EQUAL(db_w.get_value_freq(0), 1);
84  if (db_w.size() > 1) {
85  TEST_EQUAL(db_w.get_value_lower_bound(0), "world");
86  } else {
87  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
88  }
89  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
90 
91  // Deleting all the documents returns the bounds to their original value.
92  db_w.delete_document(2);
93  TEST_EQUAL(db_w.get_value_freq(0), 0);
94  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
95  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
96  TEST_EQUAL(db_w.get_value_freq(1), 0);
97  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
98  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
99 
100  // Adding a document with a value in one of the old slots should still
101  // end up with tight bounds on it.
102  doc = Xapian::Document();
103  doc.add_value(1, "newval");
104  db_w.replace_document(2, doc);
105  TEST_EQUAL(db_w.get_value_freq(1), 1);
106  TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
107  TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
108  TEST_EQUAL(db_w.get_value_freq(0), 0);
109  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
110  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
111 }
112 
114 DEFINE_TESTCASE(valuestats2, transactions && valuestats) {
117 
118  // Check that counts are initially zero.
119  TEST_EQUAL(db_w.get_value_freq(0), 0);
120  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
121  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
122  TEST_EQUAL(db_w.get_value_freq(1), 0);
123  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
124  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
125  TEST_EQUAL(db.get_value_freq(0), 0);
128  TEST_EQUAL(db.get_value_freq(1), 0);
131 
132  Xapian::Document doc;
133  doc.add_value(0, "hello");
134 
135  // Check that statistics for the correct value slot increase when document
136  // is added. (Check slot 1 first, so that cache invalidation of the last
137  // slot read also gets checked.)
138  db_w.add_document(doc);
139  TEST_EQUAL(db_w.get_value_freq(1), 0);
140  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
141  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
142  TEST_EQUAL(db_w.get_value_freq(0), 1);
143  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
144  TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
145 
146  // The readonly database shouldn't change, though.
147  TEST_EQUAL(db.get_value_freq(1), 0);
150  TEST_EQUAL(db.get_value_freq(0), 0);
153 
154  // Check that statistics work correctly when second document is added.
155  doc = Xapian::Document();
156  doc.add_value(0, "world");
157  doc.add_value(1, "cheese");
158  db_w.replace_document(2, doc);
159  TEST_EQUAL(db_w.get_value_freq(0), 2);
160  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
161  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
162  TEST_EQUAL(db_w.get_value_freq(1), 1);
163  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
164  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
165 
166  // The readonly database shouldn't change, though.
167  TEST_EQUAL(db.get_value_freq(0), 0);
170  TEST_EQUAL(db.get_value_freq(1), 0);
173 
174  // Check that readonly database catches up when a commit is done.
175  db_w.commit();
176  TEST(db.reopen());
177  TEST_EQUAL(db.get_value_freq(1), 1);
178  TEST_EQUAL(db.get_value_lower_bound(1), "cheese");
179  TEST_EQUAL(db.get_value_upper_bound(1), "cheese");
180  TEST_EQUAL(db.get_value_freq(0), 2);
181  TEST_EQUAL(db.get_value_lower_bound(0), "hello");
182  TEST_EQUAL(db.get_value_upper_bound(0), "world");
183 
184  // Deleting a document affects the count, but not usually the bounds.
185  db_w.delete_document(1);
186  TEST_EQUAL(db_w.get_value_freq(1), 1);
187  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
188  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
189  TEST_EQUAL(db_w.get_value_freq(0), 1);
190  if (db_w.size() > 1) {
191  // With a sharded database, deleting document 1 leaves that shard empty
192  // and its value bounds should be reset so the lower bound comes only
193  // from the shard the other document is in, so it's actually exact.
194  TEST_EQUAL(db_w.get_value_lower_bound(0), "world");
195  } else {
196  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
197  }
198  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
199 
200  // Deleting all the documents returns the bounds to their original value.
201  db_w.delete_document(2);
202  TEST_EQUAL(db_w.get_value_freq(0), 0);
203  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
204  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
205  TEST_EQUAL(db_w.get_value_freq(1), 0);
206  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
207  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
208 
209  // Adding a document with a value in one of the old slots should still
210  // end up with tight bounds on it.
211  doc = Xapian::Document();
212  doc.add_value(1, "newval");
213  db_w.replace_document(2, doc);
214  TEST_EQUAL(db_w.get_value_freq(1), 1);
215  TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
216  TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
217  TEST_EQUAL(db_w.get_value_freq(0), 0);
218  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
219  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
220 
221  // Check that a readonly database gets the right statistics, too.
222  db_w.commit();
223  TEST(db.reopen());
224  TEST_EQUAL(db.get_value_freq(0), 0);
227  TEST_EQUAL(db.get_value_freq(1), 1);
228  TEST_EQUAL(db.get_value_lower_bound(1), "newval");
229  TEST_EQUAL(db.get_value_upper_bound(1), "newval");
230 }
231 
233 DEFINE_TESTCASE(valuestats3, valuestats) {
234  Xapian::Database db = get_database("apitest_simpledata");
235 
236  TEST_EQUAL(db.get_value_freq(1), 6);
237  TEST_EQUAL(db.get_value_lower_bound(1), "h");
238  TEST_EQUAL(db.get_value_upper_bound(1), "n");
239  TEST_EQUAL(db.get_value_freq(2), 6);
240  TEST_EQUAL(db.get_value_lower_bound(2), "d");
241  TEST_EQUAL(db.get_value_upper_bound(2), "i");
242  TEST_EQUAL(db.get_value_freq(3), 6);
243  TEST_EQUAL(db.get_value_lower_bound(3), " ");
244  TEST_EQUAL(db.get_value_upper_bound(3), "s");
245  TEST_EQUAL(db.get_value_freq(4), 6);
246  TEST_EQUAL(db.get_value_lower_bound(4), " ");
247  TEST_EQUAL(db.get_value_upper_bound(4), "y");
248  TEST_EQUAL(db.get_value_freq(5), 6);
249  TEST_EQUAL(db.get_value_lower_bound(5), "e");
250  TEST_EQUAL(db.get_value_upper_bound(5), "p");
251  TEST_EQUAL(db.get_value_freq(6), 6);
252  TEST_EQUAL(db.get_value_lower_bound(6), "a");
253  TEST_EQUAL(db.get_value_upper_bound(6), "t");
254  TEST_EQUAL(db.get_value_freq(7), 6);
255  TEST_EQUAL(db.get_value_lower_bound(7), " ");
256  TEST_EQUAL(db.get_value_upper_bound(7), "r");
257  TEST_EQUAL(db.get_value_freq(8), 6);
258  TEST_EQUAL(db.get_value_lower_bound(8), "a");
259  TEST_EQUAL(db.get_value_upper_bound(8), "t");
260  TEST_EQUAL(db.get_value_freq(9), 6);
261  TEST_EQUAL(db.get_value_lower_bound(9), " ");
262  TEST_EQUAL(db.get_value_upper_bound(9), "n");
263  TEST_EQUAL(db.get_value_freq(10), 6);
264  TEST_EQUAL(db.get_value_lower_bound(10), "e");
265  TEST_EQUAL(db.get_value_upper_bound(10), "w");
266  TEST_EQUAL(db.get_value_freq(11), 6);
267  TEST_EQUAL(db.get_value_lower_bound(11), "\xb9P");
268  TEST_EQUAL(db.get_value_upper_bound(11), "\xc7\x04");
269 }
270 
271 DEFINE_TESTCASE(valuestats4, transactions && valuestats) {
272  size_t FLUSH_THRESHOLD = 10000;
273  {
275  // The flush threshold applies per shard in a sharded database.
276  FLUSH_THRESHOLD *= db_w.size();
277  Xapian::Document doc;
278  doc.add_value(1, "test");
279  for (size_t i = 0; i < FLUSH_THRESHOLD; ++i) {
280  db_w.add_document(doc);
281  }
282 
284  // Check that we had an automatic-commit.
285  TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
286  // Check that the value stats are there.
287  TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
288  TEST_EQUAL(db.get_value_lower_bound(1), "test");
289  TEST_EQUAL(db.get_value_upper_bound(1), "test");
290 
291  db_w.begin_transaction();
292  doc.add_value(1, "umbrella");
293  db_w.cancel_transaction();
294  }
295 
296  {
298  // Check that we had an automatic-commit.
299  TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
300  // Check that the value stats are there.
301  TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
302  TEST_EQUAL(db.get_value_lower_bound(1), "test");
303  TEST_EQUAL(db.get_value_upper_bound(1), "test");
304  }
305 }
306 
308 DEFINE_TESTCASE(valuestats5, !backend) {
309  Xapian::Document doc;
310  doc.add_value(0, "zero");
311  doc.add_value(1, "one");
312  doc.add_value(2, "two");
313  doc.add_value(3, "three");
314  doc.add_value(4, "");
315  doc.add_value(5, "five");
316  doc.remove_value(3);
317  doc.add_value(1, "");
318 
319  // Check that we don't have any empty values reported.
320  size_t c = 0;
322  while (v != doc.values_end()) {
323  TEST(!(*v).empty());
324  ++c;
325  ++v;
326  }
327  TEST_EQUAL(c, 3); // 0, 2, 5
328 }
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
Definition: omdatabase.cc:890
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
This class is used to access a database, or a group of databases.
Definition: database.h:68
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
void begin_transaction(bool flushed=true)
Begin a transaction.
Definition: omdatabase.cc:868
a generic test suite engine
bool reopen()
Re-open the database.
Definition: omdatabase.cc:125
Class for iterating over document values.
Definition: valueiterator.h:40
STL namespace.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
Definition: omdatabase.cc:386
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
void remove_value(Xapian::valueno slot)
Remove any value with the given number.
Definition: omdocument.cc:114
test functionality of the Xapian API
This class provides read/write access to a database.
Definition: database.h:789
Public interfaces for the Xapian library.
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Xapian::Database get_writable_database_as_database()
Definition: apitest.cc:119
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:281
size_t size() const
Return number of shards in this Database object.
Definition: database.h:93
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
DEFINE_TESTCASE(valuestats1, writable &&valuestats)
Test of value statistics methods.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
Definition: omdatabase.cc:368
Xapian-specific test helper functions and macros.
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Definition: omdatabase.cc:355
A handle representing a document in a Xapian database.
Definition: document.h:61