xapian-core  1.4.21
api_valuestats.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2008,2009,2011,2017 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_valuestats.h"
26 
27 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
30 
31 #include "apitest.h"
32 
33 using namespace std;
34 
35 // #######################################################################
36 // # Tests start here
37 
39 DEFINE_TESTCASE(valuestats1, writable && valuestats) {
41 
42  // Check that counts are initially zero.
43  TEST_EQUAL(db_w.get_value_freq(0), 0);
44  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
45  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
46  TEST_EQUAL(db_w.get_value_freq(1), 0);
47  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
48  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
49 
50  Xapian::Document doc;
51  doc.add_value(0, "hello");
52 
53  // Check that statistics for the correct value slot increase when document
54  // is added. (Check slot 1 first, so that cache invalidation of the last
55  // slot read also gets checked.)
56  db_w.add_document(doc);
57  TEST_EQUAL(db_w.get_value_freq(1), 0);
58  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
59  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
60  TEST_EQUAL(db_w.get_value_freq(0), 1);
61  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
62  TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
63 
64  // Check that statistics work correctly when second document is added.
65  doc = Xapian::Document();
66  doc.add_value(0, "world");
67  doc.add_value(1, "cheese");
68  db_w.replace_document(2, doc);
69  TEST_EQUAL(db_w.get_value_freq(0), 2);
70  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
71  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
72  TEST_EQUAL(db_w.get_value_freq(1), 1);
73  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
74  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
75 
76  // Deleting a document affects the count, but probably not the bounds.
77  // It may with a multi-database, if the document which was deleted
78  // was the only one in that shard.
79  db_w.delete_document(1);
80  TEST_EQUAL(db_w.get_value_freq(1), 1);
81  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
82  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
83  TEST_EQUAL(db_w.get_value_freq(0), 1);
84  if (!startswith(get_dbtype(), "multi")) {
85  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
86  } else {
87  TEST_EQUAL(db_w.get_value_lower_bound(0), "world");
88  }
89  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
90 
91  // Deleting all the documents returns the bounds to their original value.
92  db_w.delete_document(2);
93  TEST_EQUAL(db_w.get_value_freq(0), 0);
94  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
95  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
96  TEST_EQUAL(db_w.get_value_freq(1), 0);
97  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
98  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
99 
100  // Adding a document with a value in one of the old slots should still
101  // end up with tight bounds on it.
102  doc = Xapian::Document();
103  doc.add_value(1, "newval");
104  db_w.replace_document(2, doc);
105  TEST_EQUAL(db_w.get_value_freq(1), 1);
106  TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
107  TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
108  TEST_EQUAL(db_w.get_value_freq(0), 0);
109  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
110  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
111 }
112 
114 DEFINE_TESTCASE(valuestats2, transactions && valuestats) {
117 
118  // Check that counts are initially zero.
119  TEST_EQUAL(db_w.get_value_freq(0), 0);
120  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
121  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
122  TEST_EQUAL(db_w.get_value_freq(1), 0);
123  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
124  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
125  TEST_EQUAL(db.get_value_freq(0), 0);
128  TEST_EQUAL(db.get_value_freq(1), 0);
131 
132  Xapian::Document doc;
133  doc.add_value(0, "hello");
134 
135  // Check that statistics for the correct value slot increase when document
136  // is added. (Check slot 1 first, so that cache invalidation of the last
137  // slot read also gets checked.)
138  db_w.add_document(doc);
139  TEST_EQUAL(db_w.get_value_freq(1), 0);
140  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
141  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
142  TEST_EQUAL(db_w.get_value_freq(0), 1);
143  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
144  TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
145 
146  // The readonly database shouldn't change, though.
147  TEST_EQUAL(db.get_value_freq(1), 0);
150  TEST_EQUAL(db.get_value_freq(0), 0);
153 
154  // Check that statistics work correctly when second document is added.
155  doc = Xapian::Document();
156  doc.add_value(0, "world");
157  doc.add_value(1, "cheese");
158  db_w.replace_document(2, doc);
159  TEST_EQUAL(db_w.get_value_freq(0), 2);
160  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
161  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
162  TEST_EQUAL(db_w.get_value_freq(1), 1);
163  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
164  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
165 
166  // The readonly database shouldn't change, though.
167  TEST_EQUAL(db.get_value_freq(0), 0);
170  TEST_EQUAL(db.get_value_freq(1), 0);
173 
174  // Check that readonly database catches up when a commit is done.
175  db_w.commit();
176  TEST(db.reopen());
177  TEST_EQUAL(db.get_value_freq(1), 1);
178  TEST_EQUAL(db.get_value_lower_bound(1), "cheese");
179  TEST_EQUAL(db.get_value_upper_bound(1), "cheese");
180  TEST_EQUAL(db.get_value_freq(0), 2);
181  TEST_EQUAL(db.get_value_lower_bound(0), "hello");
182  TEST_EQUAL(db.get_value_upper_bound(0), "world");
183 
184  // Deleting a document affects the count, but not the bounds.
185  db_w.delete_document(1);
186  TEST_EQUAL(db_w.get_value_freq(1), 1);
187  TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
188  TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
189  TEST_EQUAL(db_w.get_value_freq(0), 1);
190  TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
191  TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
192 
193  // Deleting all the documents returns the bounds to their original value.
194  db_w.delete_document(2);
195  TEST_EQUAL(db_w.get_value_freq(0), 0);
196  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
197  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
198  TEST_EQUAL(db_w.get_value_freq(1), 0);
199  TEST_EQUAL(db_w.get_value_lower_bound(1), "");
200  TEST_EQUAL(db_w.get_value_upper_bound(1), "");
201 
202  // Adding a document with a value in one of the old slots should still
203  // end up with tight bounds on it.
204  doc = Xapian::Document();
205  doc.add_value(1, "newval");
206  db_w.replace_document(2, doc);
207  TEST_EQUAL(db_w.get_value_freq(1), 1);
208  TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
209  TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
210  TEST_EQUAL(db_w.get_value_freq(0), 0);
211  TEST_EQUAL(db_w.get_value_lower_bound(0), "");
212  TEST_EQUAL(db_w.get_value_upper_bound(0), "");
213 
214  // Check that a readonly database gets the right statistics, too.
215  db_w.commit();
216  TEST(db.reopen());
217  TEST_EQUAL(db.get_value_freq(0), 0);
220  TEST_EQUAL(db.get_value_freq(1), 1);
221  TEST_EQUAL(db.get_value_lower_bound(1), "newval");
222  TEST_EQUAL(db.get_value_upper_bound(1), "newval");
223 }
224 
226 DEFINE_TESTCASE(valuestats3, valuestats) {
227  Xapian::Database db = get_database("apitest_simpledata");
228 
229  TEST_EQUAL(db.get_value_freq(1), 6);
230  TEST_EQUAL(db.get_value_lower_bound(1), "h");
231  TEST_EQUAL(db.get_value_upper_bound(1), "n");
232  TEST_EQUAL(db.get_value_freq(2), 6);
233  TEST_EQUAL(db.get_value_lower_bound(2), "d");
234  TEST_EQUAL(db.get_value_upper_bound(2), "i");
235  TEST_EQUAL(db.get_value_freq(3), 6);
236  TEST_EQUAL(db.get_value_lower_bound(3), " ");
237  TEST_EQUAL(db.get_value_upper_bound(3), "s");
238  TEST_EQUAL(db.get_value_freq(4), 6);
239  TEST_EQUAL(db.get_value_lower_bound(4), " ");
240  TEST_EQUAL(db.get_value_upper_bound(4), "y");
241  TEST_EQUAL(db.get_value_freq(5), 6);
242  TEST_EQUAL(db.get_value_lower_bound(5), "e");
243  TEST_EQUAL(db.get_value_upper_bound(5), "p");
244  TEST_EQUAL(db.get_value_freq(6), 6);
245  TEST_EQUAL(db.get_value_lower_bound(6), "a");
246  TEST_EQUAL(db.get_value_upper_bound(6), "t");
247  TEST_EQUAL(db.get_value_freq(7), 6);
248  TEST_EQUAL(db.get_value_lower_bound(7), " ");
249  TEST_EQUAL(db.get_value_upper_bound(7), "r");
250  TEST_EQUAL(db.get_value_freq(8), 6);
251  TEST_EQUAL(db.get_value_lower_bound(8), "a");
252  TEST_EQUAL(db.get_value_upper_bound(8), "t");
253  TEST_EQUAL(db.get_value_freq(9), 6);
254  TEST_EQUAL(db.get_value_lower_bound(9), " ");
255  TEST_EQUAL(db.get_value_upper_bound(9), "n");
256  TEST_EQUAL(db.get_value_freq(10), 6);
257  TEST_EQUAL(db.get_value_lower_bound(10), "e");
258  TEST_EQUAL(db.get_value_upper_bound(10), "w");
259  TEST_EQUAL(db.get_value_freq(11), 6);
260  TEST_EQUAL(db.get_value_lower_bound(11), "\xb9P");
261  TEST_EQUAL(db.get_value_upper_bound(11), "\xc7\x04");
262 }
263 
264 DEFINE_TESTCASE(valuestats4, transactions && valuestats) {
265  const size_t FLUSH_THRESHOLD = 10000;
266  {
268  Xapian::Document doc;
269  doc.add_value(1, "test");
270  for (size_t i = 0; i < FLUSH_THRESHOLD; ++i) {
271  db_w.add_document(doc);
272  }
273 
275  // Check that we had an automatic-commit.
276  TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
277  // Check that the value stats are there.
278  TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
279  TEST_EQUAL(db.get_value_lower_bound(1), "test");
280  TEST_EQUAL(db.get_value_upper_bound(1), "test");
281 
282  db_w.begin_transaction();
283  doc.add_value(1, "umbrella");
284  db_w.cancel_transaction();
285  }
286 
287  {
289  // Check that we had an automatic-commit.
290  TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
291  // Check that the value stats are there.
292  TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
293  TEST_EQUAL(db.get_value_lower_bound(1), "test");
294  TEST_EQUAL(db.get_value_upper_bound(1), "test");
295  }
296 }
297 
299 DEFINE_TESTCASE(valuestats5, !backend) {
300  Xapian::Document doc;
301  doc.add_value(0, "zero");
302  doc.add_value(1, "one");
303  doc.add_value(2, "two");
304  doc.add_value(3, "three");
305  doc.add_value(4, "");
306  doc.add_value(5, "five");
307  doc.remove_value(3);
308  doc.add_value(1, "");
309 
310  // Check that we don't have any empty values reported.
311  size_t c = 0;
313  while (v != doc.values_end()) {
314  TEST(!(*v).empty());
315  ++c;
316  ++v;
317  }
318  TEST_EQUAL(c, 3); // 0, 2, 5
319 }
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
Definition: omdatabase.cc:890
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
This class is used to access a database, or a group of databases.
Definition: database.h:68
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
void begin_transaction(bool flushed=true)
Begin a transaction.
Definition: omdatabase.cc:868
a generic test suite engine
bool reopen()
Re-open the database.
Definition: omdatabase.cc:125
Class for iterating over document values.
Definition: valueiterator.h:40
STL namespace.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
Definition: omdatabase.cc:386
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
void remove_value(Xapian::valueno slot)
Remove any value with the given number.
Definition: omdocument.cc:114
test functionality of the Xapian API
This class provides read/write access to a database.
Definition: database.h:785
Public interfaces for the Xapian library.
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
std::string get_dbtype()
Definition: apitest.cc:42
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Xapian::Database get_writable_database_as_database()
Definition: apitest.cc:119
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:271
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
DEFINE_TESTCASE(valuestats1, writable &&valuestats)
Test of value statistics methods.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
Definition: omdatabase.cc:368
Xapian-specific test helper functions and macros.
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Definition: omdatabase.cc:355
A handle representing a document in a Xapian database.
Definition: document.h:61