xapian-core  2.0.0
api_valuestats.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2008,2009,2011,2017,2023 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "api_valuestats.h"
25 
26 #include <xapian.h>
27 #include "str.h"
28 #include "testsuite.h"
29 #include "testutils.h"
30 
31 #include "apitest.h"
32 
33 using namespace std;
34 
35 // #######################################################################
36 // # Tests start here
37 
39 DEFINE_TESTCASE(valuestats1, writable && valuestats) {
41 
42  // Check that counts are initially zero.
43  TEST_EQUAL(db_w.get_value_freq(0), 0);
44  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
45  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
46  TEST_EQUAL(db_w.get_value_freq(1), 0);
47  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
48  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
49 
50  Xapian::Document doc;
51  doc.add_value(0, "hello");
52 
53  // Check that statistics for the correct value slot increase when document
54  // is added. (Check slot 1 first, so that cache invalidation of the last
55  // slot read also gets checked.)
56  db_w.add_document(doc);
57  TEST_EQUAL(db_w.get_value_freq(1), 0);
58  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
59  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
60  TEST_EQUAL(db_w.get_value_freq(0), 1);
61  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
62  TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
63 
64  // Check that statistics work correctly when second document is added.
65  doc = Xapian::Document();
66  doc.add_value(0, "world");
67  doc.add_value(1, "cheese");
68  db_w.replace_document(2, doc);
69  TEST_EQUAL(db_w.get_value_freq(0), 2);
70  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
71  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
72  TEST_EQUAL(db_w.get_value_freq(1), 1);
73  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
74  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
75 
76  // Deleting a document affects the count, but probably not the bounds.
77  // It may with a multi-database, if the document which was deleted
78  // was the only one in that shard.
79  db_w.delete_document(1);
80  TEST_EQUAL(db_w.get_value_freq(1), 1);
81  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
82  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
83  TEST_EQUAL(db_w.get_value_freq(0), 1);
84  if (db_w.size() > 1) {
85  TEST_EQUAL(db_w.get_value_lower_bound(0), "world");
86  } else {
87  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
88  }
89  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
90 
91  // Deleting all the documents returns the bounds to their original value.
92  db_w.delete_document(2);
93  TEST_EQUAL(db_w.get_value_freq(0), 0);
94  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
95  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
96  TEST_EQUAL(db_w.get_value_freq(1), 0);
97  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
98  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
99 
100  // Adding a document with a value in one of the old slots should still
101  // end up with tight bounds on it.
102  doc = Xapian::Document();
103  doc.add_value(1, "newval");
104  db_w.replace_document(2, doc);
105  TEST_EQUAL(db_w.get_value_freq(1), 1);
106  TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
107  TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
108  TEST_EQUAL(db_w.get_value_freq(0), 0);
109  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
110  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
111 }
112 
114 DEFINE_TESTCASE(valuestats2, transactions && valuestats) {
117 
118  // Check that counts are initially zero.
119  TEST_EQUAL(db_w.get_value_freq(0), 0);
120  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
121  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
122  TEST_EQUAL(db_w.get_value_freq(1), 0);
123  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
124  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
125  TEST_EQUAL(db.get_value_freq(0), 0);
128  TEST_EQUAL(db.get_value_freq(1), 0);
131 
132  Xapian::Document doc;
133  doc.add_value(0, "hello");
134 
135  // Check that statistics for the correct value slot increase when document
136  // is added. (Check slot 1 first, so that cache invalidation of the last
137  // slot read also gets checked.)
138  db_w.add_document(doc);
139  TEST_EQUAL(db_w.get_value_freq(1), 0);
140  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
141  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
142  TEST_EQUAL(db_w.get_value_freq(0), 1);
143  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
144  TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
145 
146  // The readonly database shouldn't change, though.
147  TEST_EQUAL(db.get_value_freq(1), 0);
150  TEST_EQUAL(db.get_value_freq(0), 0);
153 
154  // Check that statistics work correctly when second document is added.
155  doc = Xapian::Document();
156  doc.add_value(0, "world");
157  doc.add_value(1, "cheese");
158  db_w.replace_document(2, doc);
159  TEST_EQUAL(db_w.get_value_freq(0), 2);
160  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
161  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
162  TEST_EQUAL(db_w.get_value_freq(1), 1);
163  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
164  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
165 
166  // The readonly database shouldn't change, though.
167  TEST_EQUAL(db.get_value_freq(0), 0);
170  TEST_EQUAL(db.get_value_freq(1), 0);
173 
174  // Check that readonly database catches up when a commit is done.
175  db_w.commit();
176  TEST(db.reopen());
177  TEST_EQUAL(db.get_value_freq(1), 1);
178  TEST_EQUAL(db.get_value_lower_bound(1), "cheese");
179  TEST_EQUAL(db.get_value_upper_bound(1), "cheese");
180  TEST_EQUAL(db.get_value_freq(0), 2);
181  TEST_EQUAL(db.get_value_lower_bound(0), "hello");
182  TEST_EQUAL(db.get_value_upper_bound(0), "world");
183 
184  // Deleting a document affects the count, but not usually the bounds.
185  db_w.delete_document(1);
186  TEST_EQUAL(db_w.get_value_freq(1), 1);
187  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
188  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
189  TEST_EQUAL(db_w.get_value_freq(0), 1);
190  if (db_w.size() > 1) {
191  // With a sharded database, deleting document 1 leaves that shard empty
192  // and its value bounds should be reset so the lower bound comes only
193  // from the shard the other document is in, so it's actually exact.
194  TEST_EQUAL(db_w.get_value_lower_bound(0), "world");
195  } else {
196  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
197  }
198  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
199 
200  // Deleting all the documents returns the bounds to their original value.
201  db_w.delete_document(2);
202  TEST_EQUAL(db_w.get_value_freq(0), 0);
203  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
204  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
205  TEST_EQUAL(db_w.get_value_freq(1), 0);
206  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
207  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
208 
209  // Adding a document with a value in one of the old slots should still
210  // end up with tight bounds on it.
211  doc = Xapian::Document();
212  doc.add_value(1, "newval");
213  db_w.replace_document(2, doc);
214  TEST_EQUAL(db_w.get_value_freq(1), 1);
215  TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
216  TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
217  TEST_EQUAL(db_w.get_value_freq(0), 0);
218  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
219  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
220 
221  // Check that a readonly database gets the right statistics, too.
222  db_w.commit();
223  TEST(db.reopen());
224  TEST_EQUAL(db.get_value_freq(0), 0);
227  TEST_EQUAL(db.get_value_freq(1), 1);
228  TEST_EQUAL(db.get_value_lower_bound(1), "newval");
229  TEST_EQUAL(db.get_value_upper_bound(1), "newval");
230 }
231 
233 DEFINE_TESTCASE(valuestats3, valuestats) {
234  Xapian::Database db = get_database("apitest_simpledata");
235 
236  TEST_EQUAL(db.get_value_freq(1), 6);
237  TEST_EQUAL(db.get_value_lower_bound(1), "h");
238  TEST_EQUAL(db.get_value_upper_bound(1), "n");
239  TEST_EQUAL(db.get_value_freq(2), 6);
240  TEST_EQUAL(db.get_value_lower_bound(2), "d");
241  TEST_EQUAL(db.get_value_upper_bound(2), "i");
242  TEST_EQUAL(db.get_value_freq(3), 6);
243  TEST_EQUAL(db.get_value_lower_bound(3), " ");
244  TEST_EQUAL(db.get_value_upper_bound(3), "s");
245  TEST_EQUAL(db.get_value_freq(4), 6);
246  TEST_EQUAL(db.get_value_lower_bound(4), " ");
247  TEST_EQUAL(db.get_value_upper_bound(4), "y");
248  TEST_EQUAL(db.get_value_freq(5), 6);
249  TEST_EQUAL(db.get_value_lower_bound(5), "e");
250  TEST_EQUAL(db.get_value_upper_bound(5), "p");
251  TEST_EQUAL(db.get_value_freq(6), 6);
252  TEST_EQUAL(db.get_value_lower_bound(6), "a");
253  TEST_EQUAL(db.get_value_upper_bound(6), "t");
254  TEST_EQUAL(db.get_value_freq(7), 6);
255  TEST_EQUAL(db.get_value_lower_bound(7), " ");
256  TEST_EQUAL(db.get_value_upper_bound(7), "r");
257  TEST_EQUAL(db.get_value_freq(8), 6);
258  TEST_EQUAL(db.get_value_lower_bound(8), "a");
259  TEST_EQUAL(db.get_value_upper_bound(8), "t");
260  TEST_EQUAL(db.get_value_freq(9), 6);
261  TEST_EQUAL(db.get_value_lower_bound(9), " ");
262  TEST_EQUAL(db.get_value_upper_bound(9), "n");
263  TEST_EQUAL(db.get_value_freq(10), 6);
264  TEST_EQUAL(db.get_value_lower_bound(10), "e");
265  TEST_EQUAL(db.get_value_upper_bound(10), "w");
266  TEST_EQUAL(db.get_value_freq(11), 6);
267  TEST_EQUAL(db.get_value_lower_bound(11), "\xb9P");
268  TEST_EQUAL(db.get_value_upper_bound(11), "\xc7\x04");
269 }
270 
271 DEFINE_TESTCASE(valuestats4, transactions && valuestats) {
272  size_t FLUSH_THRESHOLD = 10000;
273  {
275  // The flush threshold applies per shard in a sharded database.
276  FLUSH_THRESHOLD *= db_w.size();
277  Xapian::Document doc;
278  doc.add_value(1, "test");
279  for (size_t i = 0; i < FLUSH_THRESHOLD; ++i) {
280  db_w.add_document(doc);
281  }
282 
284  // Check that we had an automatic-commit.
285  TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
286  // Check that the value stats are there.
287  TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
288  TEST_EQUAL(db.get_value_lower_bound(1), "test");
289  TEST_EQUAL(db.get_value_upper_bound(1), "test");
290 
291  db_w.begin_transaction();
292  doc.add_value(1, "umbrella");
293  db_w.cancel_transaction();
294  }
295 
296  {
298  // Check that we had an automatic-commit.
299  TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
300  // Check that the value stats are there.
301  TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
302  TEST_EQUAL(db.get_value_lower_bound(1), "test");
303  TEST_EQUAL(db.get_value_upper_bound(1), "test");
304  }
305 }
306 
308 DEFINE_TESTCASE(valuestats5, !backend) {
309  Xapian::Document doc;
310  doc.add_value(0, "zero");
311  doc.add_value(1, "one");
312  doc.add_value(2, "two");
313  doc.add_value(3, "three");
314  doc.add_value(4, "");
315  doc.add_value(5, "five");
316  doc.remove_value(3);
317  doc.add_value(1, "");
318 
319  // Check that we don't have any empty values reported.
320  size_t c = 0;
322  while (v != doc.values_end()) {
323  TEST(!(*v).empty());
324  ++c;
325  ++v;
326  }
327  TEST_EQUAL(c, 3); // 0, 2, 5
328 }
329 
330 static void
332 {
333  Xapian::Document doc;
334  // It'd be nice to test up to 32, but the glass to honey conversion
335  // currently loops over each number from 0 to the highest used slot
336  // number.
337  for (int i = 0; i < 24; ++i) {
338  Xapian::valueno slot = Xapian::valueno{1} << i;
339  doc.add_value(slot, str(slot));
340  }
341  wdb.add_document(doc);
342 }
343 
345 DEFINE_TESTCASE(valuestats6, backend) {
346  Xapian::Database db = get_database("valuestats6", gen_valuestats6_db);
347  Xapian::Document doc = db.get_document(1);
349  for (int i = 0; i < 24; ++i) {
350  TEST(v != doc.values_end());
351  Xapian::valueno slot = Xapian::valueno{1} << i;
352  TEST_EQUAL(v.get_valueno(), slot);
353  slot *= 2;
354  ++v;
355  }
356  TEST(v == doc.values_end());
357 }
DEFINE_TESTCASE(valuestats1, writable &&valuestats)
Test of value statistics methods.
static void gen_valuestats6_db(Xapian::WritableDatabase &wdb, const string &)
Xapian::Database get_writable_database_as_database()
Definition: apitest.cc:126
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:86
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
An indexed database of documents.
Definition: database.h:75
size_t size() const
Return number of shards in this Database object.
Definition: database.cc:105
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
Definition: database.cc:296
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
Definition: database.cc:290
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Definition: database.cc:284
bool reopen()
Reopen the database at the latest available revision.
Definition: database.cc:93
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
Definition: database.cc:368
Class representing a document.
Definition: document.h:64
ValueIterator values_begin() const
Start iterating the values in this document.
Definition: document.cc:208
void remove_value(Xapian::valueno slot)
Remove any value from the specified slot.
Definition: document.h:242
ValueIterator values_end() const noexcept
End iterator corresponding to values_begin().
Definition: document.h:259
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Definition: document.cc:191
Class for iterating over document values.
Definition: valueiterator.h:39
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
This class provides read/write access to a database.
Definition: database.h:964
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: database.cc:567
void begin_transaction(bool flushed=true)
Begin a transaction.
Definition: database.cc:549
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a document in the database.
Definition: database.cc:582
void cancel_transaction()
Abort the transaction currently in progress.
Definition: database.h:1206
void commit()
Commit pending modifications.
Definition: database.cc:543
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
string str(int value)
Convert int to std::string.
Definition: str.cc:91
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
Convert types to std::string.
a generic test suite engine
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.