xapian-core  2.0.0
api_valuestream.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008,2009,2010 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "api_valuestream.h"
25 
26 #include <xapian.h>
27 #include "testsuite.h"
28 #include "testutils.h"
29 
30 #include "apitest.h"
31 
32 using namespace std;
33 
35 DEFINE_TESTCASE(valuestream1, backend) {
36  Xapian::Database db = get_database("apitest_simpledata");
37 
38  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
39  tout << "testing valuestream iteration for slot " << slot << '\n';
41  while (it != db.valuestream_end(slot)) {
42  TEST_EQUAL(it.get_valueno(), slot);
43  string value = *it;
44  Xapian::docid did = it.get_docid();
45 
46  Xapian::Document doc = db.get_document(did);
47  TEST_EQUAL(doc.get_value(slot), value);
48 
49  ++it;
50  }
51  }
52 }
53 
55 DEFINE_TESTCASE(valuestream2, backend) {
56  Xapian::Database db = get_database("etext");
57 
58  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
59  unsigned interval = 1;
60  while (interval < 1999) {
61  tout.str(string());
62  tout << "testing valuestream skip_to for slot " << slot
63  << " with interval " << interval << '\n';
64  Xapian::docid did = 1;
66  if (it == db.valuestream_end(slot)) break;
67  while (it.skip_to(did), it != db.valuestream_end(slot)) {
68  TEST_EQUAL(it.get_valueno(), slot);
69  string value = *it;
70 
71  // Check that the skipped documents had no values.
72  Xapian::docid actual_did = it.get_docid();
73  TEST_REL(actual_did,>=,did);
74  while (did < actual_did) {
75  Xapian::Document doc = db.get_document(did);
76  TEST(doc.get_value(slot).empty());
77  ++did;
78  }
79 
80  Xapian::Document doc = db.get_document(actual_did);
81  TEST_EQUAL(doc.get_value(slot), value);
82  did += interval;
83  }
84  interval = interval * 3 - 1;
85  }
86  }
87 }
88 
90 DEFINE_TESTCASE(valuestream3, backend) {
91  Xapian::Database db = get_database("etext");
92 
93  // Check combinations of check with other operations.
94  typedef enum {
95  CHECK, CHECK_AND_NEXT, CHECK2, SKIP_TO, CHECK_AND_LOOP
96  } test_op;
97  test_op operation = CHECK;
98 
99  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
100  unsigned interval = 1;
101  while (interval < 1999) {
102  tout << "testing valuestream check for slot " << slot
103  << " with interval " << interval << '\n';
104  Xapian::docid did = 1;
106  if (it == db.valuestream_end(slot)) break;
107  while (true) {
108  bool positioned = true;
109  switch (operation) {
110  case CHECK_AND_LOOP:
111  operation = CHECK;
112  // FALLTHRU.
113  case CHECK: case CHECK2:
114  positioned = it.check(did);
115  break;
116  case CHECK_AND_NEXT: {
117  bool was_skip_to = it.check(did);
118  if (!was_skip_to) ++it;
119  break;
120  }
121  case SKIP_TO:
122  it.skip_to(did);
123  break;
124  }
125  operation = test_op(operation + 1);
126  if (positioned) {
127  if (it == db.valuestream_end(slot)) break;
128  TEST_EQUAL(it.get_valueno(), slot);
129  string value = *it;
130 
131  // Check that the skipped documents had no values.
132  Xapian::docid actual_did = it.get_docid();
133  while (did < actual_did) {
134  Xapian::Document doc = db.get_document(did);
135  TEST(doc.get_value(slot).empty());
136  ++did;
137  }
138 
139  Xapian::Document doc = db.get_document(actual_did);
140  TEST_EQUAL(doc.get_value(slot), value);
141  }
142  did += interval;
143  }
144  interval = interval * 3 - 1;
145  }
146  }
147 }
148 
149 static void
151 {
152  Xapian::Document doc;
154  db.replace_document(1, doc);
155  db.replace_document(0xffffffff, doc);
156 }
157 
162 DEFINE_TESTCASE(valueweightsource5, valuestats) {
163  // inmemory's memory use is currently O(last_docid)!
164  SKIP_TEST_FOR_BACKEND("inmemory");
165  // remote's value slot iteration is very slow for this case currently
166  // because it throws and catches DocNotFoundError across the link 2^32-3
167  // times.
168  if (contains(get_dbtype(), "remote"))
169  SKIP_TEST("Testcase is too slow with remote shards");
170 
171  Xapian::Database db = get_database("valueweightsource5",
174  src.reset(db, 0);
175  src.next(0.0);
176  TEST(!src.at_end());
177  TEST_EQUAL(src.get_docid(), 1);
178  src.next(0.0);
179  TEST(!src.at_end());
180  TEST_EQUAL(src.get_docid(), 0xffffffff);
181  src.next(0.0);
182  TEST(src.at_end());
183 }
184 
185 // Check that ValueMapPostingSource works correctly.
186 // the test db has value 13 set to:
187 // 1 Thi
188 // 2 The
189 // 3 You
190 // 4 War
191 // 5 Fri
192 // 6 Ins
193 // 7 Whi
194 // 8 Com
195 // 9 A p
196 // 10 Tel
197 // 11 Tel
198 // 12 Enc
199 // 13 Get
200 // 14 Doe
201 // 15 fir
202 // 16 Pad
203 // 17 Pad
204 //
205 DEFINE_TESTCASE(valuemapsource1, backend) {
206  Xapian::Database db(get_database("apitest_phrase"));
207  Xapian::Enquire enq(db);
208 
210  src.add_mapping("Thi", 2.0);
211  src.add_mapping("The", 1.0);
212  src.add_mapping("You", 3.0);
213  src.add_mapping("War", 4.0);
214  src.add_mapping("Fri", 5.0);
215 
216  // check mset size and order
217  enq.set_query(Xapian::Query(&src));
218  Xapian::MSet mset = enq.get_mset(0, 5);
219 
220  TEST(mset.size() == 5);
221  mset_expect_order(mset, 5, 4, 3, 1, 2);
222 
223  // and with default weight
224  src.clear_mappings();
225  src.set_default_weight(3.5);
226  src.add_mapping("Thi", 2.0);
227  src.add_mapping("The", 1.0);
228  src.add_mapping("You", 3.0);
229  src.add_mapping("War", 4.0);
230  src.add_mapping("Fri", 5.0);
231 
232  enq.set_query(Xapian::Query(&src));
233  mset = enq.get_mset(0, 5);
234 
235  TEST(mset.size() == 5);
236  mset_expect_order(mset, 5, 4, 6, 7, 8);
237 }
238 
239 // Regression test for valuepostingsource subclasses: used to segfault if skip_to()
240 // called on an empty list.
241 DEFINE_TESTCASE(valuemapsource2, backend && !multi) {
242  Xapian::Database db(get_database("apitest_phrase"));
243 
244  {
246  src.reset(db, 0);
247  TEST(src.at_end() == false);
248  src.next(0.0);
249  TEST(src.at_end() == true);
250  }
251 
252  {
254  src.reset(db, 0);
255  TEST(src.at_end() == false);
256  src.skip_to(1, 0.0);
257  TEST(src.at_end() == true);
258  }
259 
260  {
262  src.reset(db, 0);
263  TEST(src.at_end() == false);
264  src.check(1, 0.0);
265  TEST(src.at_end() == true);
266  }
267 }
268 
269 // Regression test for fixedweightpostingsource: used to segfault if skip_to()
270 // called on an empty list.
271 DEFINE_TESTCASE(fixedweightsource2, !backend) {
272  Xapian::Database db;
273 
274  {
276  src.reset(db, 0);
277  TEST(src.at_end() == false);
278  src.next(0.0);
279  TEST(src.at_end() == true);
280  }
281 
282  {
284  src.reset(db, 0);
285  TEST(src.at_end() == false);
286  src.skip_to(1, 0.0);
287  TEST(src.at_end() == true);
288  }
289 
290  // No need to test behaviour of check() - check is only allowed to be
291  // called with document IDs which exist, so can never be called for a
292  // FixedWeightPostingSource with an empty database.
293 }
294 
295 // Test DecreasingValueWeightPostingSource.
296 DEFINE_TESTCASE(decvalwtsource1, writable) {
298 
299  Xapian::Document doc;
301  db.add_document(doc);
303  db.add_document(doc);
305  db.add_document(doc);
306  db.commit();
307 
308  // Check basic function
309  {
311  src.reset(db, 0);
312 
313  src.next(0.0);
314  TEST(!src.at_end());
315  TEST_EQUAL(src.get_docid(), 1);
316 
317  src.next(0.0);
318  TEST(!src.at_end());
319  TEST_EQUAL(src.get_docid(), 2);
320 
321  src.next(0.0);
322  TEST(!src.at_end());
323  TEST_EQUAL(src.get_docid(), 3);
324 
325  src.next(0.0);
326  TEST(src.at_end());
327  }
328 
329  // Check skipping to end of list due to weight
330  {
332  src.reset(db, 0);
333 
334  src.next(1.5);
335  TEST(!src.at_end());
336  TEST_EQUAL(src.get_docid(), 1);
337 
338  src.next(1.5);
339  TEST(!src.at_end());
340  TEST_EQUAL(src.get_docid(), 2);
341 
342  src.next(1.5);
343  TEST(src.at_end());
344  }
345 
346  // Check behaviour with a restricted range
348  db.add_document(doc);
349 
350  {
352  src.reset(db, 0);
353 
354  src.next(1.5);
355  TEST(!src.at_end());
356  TEST_EQUAL(src.get_docid(), 1);
357 
358  src.next(1.5);
359  TEST(!src.at_end());
360  TEST_EQUAL(src.get_docid(), 2);
361 
362  src.next(1.5);
363  TEST(!src.at_end());
364  TEST_EQUAL(src.get_docid(), 4);
365 
366  src.next(1.5);
367  TEST(src.at_end());
368  }
369 
370  {
372  src.reset(db, 0);
373 
374  src.next(1.5);
375  TEST(!src.at_end());
376  TEST_EQUAL(src.get_docid(), 1);
377 
378  src.skip_to(3, 1.5);
379  TEST(!src.at_end());
380  TEST_EQUAL(src.get_docid(), 4);
381 
382  src.next(1.5);
383  TEST(src.at_end());
384  }
385 
386  {
388  src.reset(db, 0);
389 
390  src.next(1.5);
391  TEST(!src.at_end());
392  TEST_EQUAL(src.get_docid(), 1);
393 
394  TEST(src.check(3, 1.5));
395  TEST(!src.at_end());
396  TEST_EQUAL(src.get_docid(), 4);
397 
398  src.next(1.5);
399  TEST(src.at_end());
400  }
401 }
402 
403 // Test DecreasingValueWeightPostingSource with out-of-order sections at
404 // start, and with repeated weights.
405 DEFINE_TESTCASE(decvalwtsource2, writable) {
407 
408  Xapian::Document doc;
410  db.add_document(doc);
412  db.add_document(doc);
414  db.add_document(doc);
416  db.add_document(doc);
417  db.commit();
418 
419  // Check basic function
420  {
422  src.reset(db, 0);
423 
424  src.next(0.0);
425  TEST(!src.at_end());
426  TEST_EQUAL(src.get_docid(), 1);
427 
428  src.next(0.0);
429  TEST(!src.at_end());
430  TEST_EQUAL(src.get_docid(), 2);
431 
432  src.next(0.0);
433  TEST(!src.at_end());
434  TEST_EQUAL(src.get_docid(), 3);
435 
436  src.next(0.0);
437  TEST(!src.at_end());
438  TEST_EQUAL(src.get_docid(), 4);
439 
440  src.next(0.0);
441  TEST(src.at_end());
442  }
443 
444  // Check skipping to end of list due to weight
445  {
447  src.reset(db, 0);
448 
449  src.next(1.5);
450  TEST(!src.at_end());
451  TEST_EQUAL(src.get_docid(), 1);
452 
453  src.next(1.5);
454  TEST(!src.at_end());
455  TEST_EQUAL(src.get_docid(), 2);
456 
457  src.next(1.5);
458  TEST(!src.at_end());
459  TEST_EQUAL(src.get_docid(), 3);
460 
461  src.next(1.5);
462  TEST(src.at_end());
463  }
464 
465  // Check behaviour with a restricted range
467  db.add_document(doc);
468 
469  {
471  src.reset(db, 0);
472 
473  src.next(1.5);
474  TEST(!src.at_end());
475  TEST_EQUAL(src.get_docid(), 1);
476 
477  src.next(1.5);
478  TEST(!src.at_end());
479  TEST_EQUAL(src.get_docid(), 2);
480 
481  src.next(1.5);
482  TEST(!src.at_end());
483  TEST_EQUAL(src.get_docid(), 3);
484 
485  src.next(1.5);
486  TEST(!src.at_end());
487  TEST_EQUAL(src.get_docid(), 5);
488 
489  src.next(1.5);
490  TEST(src.at_end());
491  }
492 
493  {
495  src.reset(db, 0);
496 
497  TEST(src.check(1, 1.5));
498  TEST(!src.at_end());
499  TEST_EQUAL(src.get_docid(), 1);
500 
501  src.next(1.5);
502  TEST(!src.at_end());
503  TEST_EQUAL(src.get_docid(), 2);
504 
505  src.skip_to(4, 1.5);
506  TEST(!src.at_end());
507  TEST_EQUAL(src.get_docid(), 5);
508 
509  src.next(1.5);
510  TEST(src.at_end());
511  }
512 
513  {
515  src.reset(db, 0);
516 
517  TEST(src.check(1, 1.5));
518  TEST(!src.at_end());
519  TEST_EQUAL(src.get_docid(), 1);
520 
521  src.next(1.5);
522  TEST(!src.at_end());
523  TEST_EQUAL(src.get_docid(), 2);
524 
525  TEST(src.check(4, 1.5));
526  TEST(!src.at_end());
527  TEST_EQUAL(src.get_docid(), 5);
528 
529  src.next(1.5);
530  TEST(src.at_end());
531  }
532 }
533 
534 static void
536 {
537  Xapian::Document doc;
538  doc.add_term("foo");
540  db.add_document(doc);
542  db.add_document(doc);
543  doc.add_term("bar");
545  db.add_document(doc);
547  db.add_document(doc);
548 }
549 
550 // Test DecreasingValueWeightPostingSource with an actual query.
551 DEFINE_TESTCASE(decvalwtsource3, backend) {
552  Xapian::Database db = get_database("decvalwtsource3",
554 
556  Xapian::Query q(&ps);
557  Xapian::Enquire enq(db);
558  enq.set_query(q);
559 
560  Xapian::MSet mset1(enq.get_mset(0, 1));
561  Xapian::MSet mset2(enq.get_mset(0, 2));
562  Xapian::MSet mset3(enq.get_mset(0, 3));
563  Xapian::MSet mset4(enq.get_mset(0, 4));
564 
565  TEST_EQUAL(mset1.size(), 1);
566  TEST_EQUAL(mset2.size(), 2);
567  TEST_EQUAL(mset3.size(), 3);
568  TEST_EQUAL(mset4.size(), 4);
569 
570  TEST(mset_range_is_same(mset1, 0, mset2, 0, 1));
571  TEST(mset_range_is_same(mset2, 0, mset3, 0, 2));
572  TEST(mset_range_is_same(mset3, 0, mset4, 0, 3));
573 }
574 
575 // Test DecreasingValueWeightPostingSource with an actual query on a fixed
576 // dataset (this was to cover the remote backend before we supported generated
577 // databases for remote databases).
578 DEFINE_TESTCASE(decvalwtsource4, backend && !multi) {
579  Xapian::Database db = get_database("apitest_declen");
580 
582  Xapian::Query q(&ps);
583  Xapian::Enquire enq(db);
584  enq.set_query(q);
585 
586  Xapian::MSet mset1(enq.get_mset(0, 1));
587  Xapian::MSet mset2(enq.get_mset(0, 2));
588  Xapian::MSet mset3(enq.get_mset(0, 3));
589  Xapian::MSet mset4(enq.get_mset(0, 4));
590 
591  TEST_EQUAL(mset1.size(), 1);
592  TEST_EQUAL(mset2.size(), 2);
593  TEST_EQUAL(mset3.size(), 3);
594  TEST_EQUAL(mset4.size(), 4);
595 
596  TEST(mset_range_is_same(mset1, 0, mset2, 0, 1));
597  TEST(mset_range_is_same(mset2, 0, mset3, 0, 2));
598  TEST(mset_range_is_same(mset3, 0, mset4, 0, 3));
599 }
600 
601 static void
603 {
604  Xapian::Document doc;
606  db.add_document(doc);
608  db.add_document(doc);
609 }
610 
611 // Regression test - used to get segfaults if
612 // DecreasingValueWeightPostingSource was pointed at an empty slot.
613 DEFINE_TESTCASE(decvalwtsource5, writable) {
614  Xapian::Database db = get_database("decvalwtsource5",
616 
617  {
619  Xapian::Query q(&ps);
620  Xapian::Enquire enq(db);
621  enq.set_query(q);
622  Xapian::MSet mset1(enq.get_mset(0, 3));
623  TEST_EQUAL(mset1.size(), 2);
624  }
625  {
627  Xapian::Query q(&ps);
628  Xapian::Enquire enq(db);
629  enq.set_query(q);
630  Xapian::MSet mset1(enq.get_mset(0, 3));
631  TEST_EQUAL(mset1.size(), 1);
632  }
633  {
635  Xapian::Query q(&ps);
636  Xapian::Enquire enq(db);
637  enq.set_query(q);
638  Xapian::MSet mset1(enq.get_mset(0, 3));
639  TEST_EQUAL(mset1.size(), 0);
640  }
641 }
static void gen_decvalwtsource5_db(Xapian::WritableDatabase &db, const string &)
static void gen_valueweightsource5_db(Xapian::WritableDatabase &db, const string &)
DEFINE_TESTCASE(valuestream1, backend)
Feature test simple valuestream iteration.
static void gen_decvalwtsource3_db(Xapian::WritableDatabase &db, const string &)
std::string get_dbtype()
Definition: apitest.cc:41
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:86
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:84
An indexed database of documents.
Definition: database.h:75
ValueIterator valuestream_begin(Xapian::valueno slot) const
Return an iterator over the value in slot slot for each document.
Definition: database.cc:335
ValueIterator valuestream_end(Xapian::valueno) const noexcept
Return end iterator corresponding to valuestream_begin().
Definition: database.h:421
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
Definition: database.cc:368
Read weights from a value which is known to decrease as docid increases.
bool check(Xapian::docid min_docid, double min_wt) override
Check if the specified docid occurs.
void reset(const Database &db_, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid min_docid, double min_wt) override
Advance to the specified docid.
void next(double min_wt) override
Advance the current position to the next matching document.
Class representing a document.
Definition: document.h:64
void add_term(std::string_view term, Xapian::termcount wdf_inc=1)
Add a term to this document.
Definition: document.cc:87
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
Definition: document.cc:185
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Definition: document.cc:191
Querying session.
Definition: enquire.h:57
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
A posting source which returns a fixed weight for all documents.
void next(double min_wt) override
Advance the current position to the next matching document.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void reset(const Database &db_, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid min_docid, double min_wt) override
Advance to the specified docid.
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
Class representing a query.
Definition: query.h:45
Class for iterating over document values.
Definition: valueiterator.h:39
Xapian::docid get_docid() const
Return the docid at the current position.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
bool check(Xapian::docid docid)
Check if the specified docid occurs.
void skip_to(Xapian::docid docid_or_slot)
Advance the iterator to document id or value slot docid_or_slot.
A posting source which looks up weights in a map using values as the key.
void add_mapping(const std::string &key, double wt)
Add a mapping.
void reset(const Database &db_, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
void clear_mappings()
Clear all mappings.
void set_default_weight(double wt)
Set a default weight for document values not in the map.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
void next(double min_wt)
Advance the current position to the next matching document.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
Xapian::docid get_docid() const
Return the current docid.
A posting source which reads weights from a value slot.
void reset(const Database &db_, Xapian::doccount shard_index)
Set this PostingSource to the start of the list of postings.
This class provides read/write access to a database.
Definition: database.h:964
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a document in the database.
Definition: database.cc:582
void commit()
Commit pending modifications.
Definition: database.cc:543
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1229
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
bool contains(std::string_view s, char substring)
Definition: stringutils.h:104
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
a generic test suite engine
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:224
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:45
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.