xapian-core  1.4.27
api_valuestream.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008,2009,2010 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_valuestream.h"
26 
27 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
30 
31 #include "apitest.h"
32 
33 using namespace std;
34 
36 DEFINE_TESTCASE(valuestream1, backend) {
37  Xapian::Database db = get_database("apitest_simpledata");
38 
39  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
40  tout << "testing valuestream iteration for slot " << slot << '\n';
42  while (it != db.valuestream_end(slot)) {
43  TEST_EQUAL(it.get_valueno(), slot);
44  string value = *it;
45  Xapian::docid did = it.get_docid();
46 
47  Xapian::Document doc = db.get_document(did);
48  TEST_EQUAL(doc.get_value(slot), value);
49 
50  ++it;
51  }
52  }
53 }
54 
56 DEFINE_TESTCASE(valuestream2, backend) {
57  Xapian::Database db = get_database("etext");
58 
59  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
60  unsigned interval = 1;
61  while (interval < 1999) {
62  tout.str(string());
63  tout << "testing valuestream skip_to for slot " << slot
64  << " with interval " << interval << '\n';
65  Xapian::docid did = 1;
67  if (it == db.valuestream_end(slot)) break;
68  while (it.skip_to(did), it != db.valuestream_end(slot)) {
69  TEST_EQUAL(it.get_valueno(), slot);
70  string value = *it;
71 
72  // Check that the skipped documents had no values.
73  Xapian::docid actual_did = it.get_docid();
74  TEST_REL(actual_did,>=,did);
75  while (did < actual_did) {
76  Xapian::Document doc = db.get_document(did);
77  TEST(doc.get_value(slot).empty());
78  ++did;
79  }
80 
81  Xapian::Document doc = db.get_document(actual_did);
82  TEST_EQUAL(doc.get_value(slot), value);
83  did += interval;
84  }
85  interval = interval * 3 - 1;
86  }
87  }
88 }
89 
91 DEFINE_TESTCASE(valuestream3, backend) {
92  Xapian::Database db = get_database("etext");
93 
94  // Check combinations of check with other operations.
95  typedef enum {
96  CHECK, CHECK_AND_NEXT, CHECK2, SKIP_TO, CHECK_AND_LOOP
97  } test_op;
98  test_op operation = CHECK;
99 
100  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
101  unsigned interval = 1;
102  while (interval < 1999) {
103  tout << "testing valuestream check for slot " << slot
104  << " with interval " << interval << '\n';
105  Xapian::docid did = 1;
107  if (it == db.valuestream_end(slot)) break;
108  while (true) {
109  bool positioned = true;
110  switch (operation) {
111  case CHECK_AND_LOOP:
112  operation = CHECK;
113  // FALLTHRU.
114  case CHECK: case CHECK2:
115  positioned = it.check(did);
116  break;
117  case CHECK_AND_NEXT: {
118  bool was_skip_to = it.check(did);
119  if (!was_skip_to) ++it;
120  break;
121  }
122  case SKIP_TO:
123  it.skip_to(did);
124  break;
125  }
126  operation = test_op(operation + 1);
127  if (positioned) {
128  if (it == db.valuestream_end(slot)) break;
129  TEST_EQUAL(it.get_valueno(), slot);
130  string value = *it;
131 
132  // Check that the skipped documents had no values.
133  Xapian::docid actual_did = it.get_docid();
134  while (did < actual_did) {
135  Xapian::Document doc = db.get_document(did);
136  TEST(doc.get_value(slot).empty());
137  ++did;
138  }
139 
140  Xapian::Document doc = db.get_document(actual_did);
141  TEST_EQUAL(doc.get_value(slot), value);
142  }
143  did += interval;
144  }
145  interval = interval * 3 - 1;
146  }
147  }
148 }
149 
150 static void
152 {
153  Xapian::Document doc;
155  db.replace_document(1, doc);
156  db.replace_document(0xffffffff, doc);
157 }
158 
163 DEFINE_TESTCASE(valueweightsource5, valuestats) {
164  // inmemory's memory use is currently O(last_docid)!
165  SKIP_TEST_FOR_BACKEND("inmemory");
166  // remote's value slot iteration is very slow for this case currently
167  // because it throws and catches DocNotFoundError across the link 2^32-3
168  // times.
169  if (contains(get_dbtype(), "remote"))
170  SKIP_TEST("Testcase is too slow with remote shards");
171  XFAIL_FOR_BACKEND("honey", "compaction needs to split sparse document length chunks");
172 
173  Xapian::Database db = get_database("valueweightsource5",
176  src.init(db);
177  src.next(0.0);
178  TEST(!src.at_end());
179  TEST_EQUAL(src.get_docid(), 1);
180  src.next(0.0);
181  TEST(!src.at_end());
182  TEST_EQUAL(src.get_docid(), 0xffffffff);
183  src.next(0.0);
184  TEST(src.at_end());
185 }
186 
187 // Check that ValueMapPostingSource works correctly.
188 // the test db has value 13 set to:
189 // 1 Thi
190 // 2 The
191 // 3 You
192 // 4 War
193 // 5 Fri
194 // 6 Ins
195 // 7 Whi
196 // 8 Com
197 // 9 A p
198 // 10 Tel
199 // 11 Tel
200 // 12 Enc
201 // 13 Get
202 // 14 Doe
203 // 15 fir
204 // 16 Pad
205 // 17 Pad
206 //
207 DEFINE_TESTCASE(valuemapsource1, backend) {
208  Xapian::Database db(get_database("apitest_phrase"));
209  Xapian::Enquire enq(db);
210 
212  src.add_mapping("Thi", 2.0);
213  src.add_mapping("The", 1.0);
214  src.add_mapping("You", 3.0);
215  src.add_mapping("War", 4.0);
216  src.add_mapping("Fri", 5.0);
217 
218  // check mset size and order
219  enq.set_query(Xapian::Query(&src));
220  Xapian::MSet mset = enq.get_mset(0, 5);
221 
222  TEST(mset.size() == 5);
223  mset_expect_order(mset, 5, 4, 3, 1, 2);
224 
225  // and with default weight
226  src.clear_mappings();
227  src.set_default_weight(3.5);
228  src.add_mapping("Thi", 2.0);
229  src.add_mapping("The", 1.0);
230  src.add_mapping("You", 3.0);
231  src.add_mapping("War", 4.0);
232  src.add_mapping("Fri", 5.0);
233 
234  enq.set_query(Xapian::Query(&src));
235  mset = enq.get_mset(0, 5);
236 
237  TEST(mset.size() == 5);
238  mset_expect_order(mset, 5, 4, 6, 7, 8);
239 }
240 
241 // Regression test for valuepostingsource subclasses: used to segfault if skip_to()
242 // called on an empty list.
243 DEFINE_TESTCASE(valuemapsource2, backend && !multi) {
244  Xapian::Database db(get_database("apitest_phrase"));
245 
246  {
248  src.init(db);
249  TEST(src.at_end() == false);
250  src.next(0.0);
251  TEST(src.at_end() == true);
252  }
253 
254  {
256  src.init(db);
257  TEST(src.at_end() == false);
258  src.skip_to(1, 0.0);
259  TEST(src.at_end() == true);
260  }
261 
262  {
264  src.init(db);
265  TEST(src.at_end() == false);
266  src.check(1, 0.0);
267  TEST(src.at_end() == true);
268  }
269 }
270 
271 // Regression test for fixedweightpostingsource: used to segfault if skip_to()
272 // called on an empty list.
273 DEFINE_TESTCASE(fixedweightsource2, !backend) {
274  Xapian::Database db;
275 
276  {
278  src.init(db);
279  TEST(src.at_end() == false);
280  src.next(0.0);
281  TEST(src.at_end() == true);
282  }
283 
284  {
286  src.init(db);
287  TEST(src.at_end() == false);
288  src.skip_to(1, 0.0);
289  TEST(src.at_end() == true);
290  }
291 
292  // No need to test behaviour of check() - check is only allowed to be
293  // called with document IDs which exist, so can never be called for a
294  // FixedWeightPostingSource with an empty database.
295 }
296 
297 // Test DecreasingValueWeightPostingSource.
298 DEFINE_TESTCASE(decvalwtsource1, writable) {
300 
301  Xapian::Document doc;
303  db.add_document(doc);
305  db.add_document(doc);
307  db.add_document(doc);
308  db.commit();
309 
310  // Check basic function
311  {
313  src.init(db);
314 
315  src.next(0.0);
316  TEST(!src.at_end());
317  TEST_EQUAL(src.get_docid(), 1);
318 
319  src.next(0.0);
320  TEST(!src.at_end());
321  TEST_EQUAL(src.get_docid(), 2);
322 
323  src.next(0.0);
324  TEST(!src.at_end());
325  TEST_EQUAL(src.get_docid(), 3);
326 
327  src.next(0.0);
328  TEST(src.at_end());
329  }
330 
331  // Check skipping to end of list due to weight
332  {
334  src.init(db);
335 
336  src.next(1.5);
337  TEST(!src.at_end());
338  TEST_EQUAL(src.get_docid(), 1);
339 
340  src.next(1.5);
341  TEST(!src.at_end());
342  TEST_EQUAL(src.get_docid(), 2);
343 
344  src.next(1.5);
345  TEST(src.at_end());
346  }
347 
348  // Check behaviour with a restricted range
350  db.add_document(doc);
351 
352  {
354  src.init(db);
355 
356  src.next(1.5);
357  TEST(!src.at_end());
358  TEST_EQUAL(src.get_docid(), 1);
359 
360  src.next(1.5);
361  TEST(!src.at_end());
362  TEST_EQUAL(src.get_docid(), 2);
363 
364  src.next(1.5);
365  TEST(!src.at_end());
366  TEST_EQUAL(src.get_docid(), 4);
367 
368  src.next(1.5);
369  TEST(src.at_end());
370  }
371 
372  {
374  src.init(db);
375 
376  src.next(1.5);
377  TEST(!src.at_end());
378  TEST_EQUAL(src.get_docid(), 1);
379 
380  src.skip_to(3, 1.5);
381  TEST(!src.at_end());
382  TEST_EQUAL(src.get_docid(), 4);
383 
384  src.next(1.5);
385  TEST(src.at_end());
386  }
387 
388  {
390  src.init(db);
391 
392  src.next(1.5);
393  TEST(!src.at_end());
394  TEST_EQUAL(src.get_docid(), 1);
395 
396  TEST(src.check(3, 1.5));
397  TEST(!src.at_end());
398  TEST_EQUAL(src.get_docid(), 4);
399 
400  src.next(1.5);
401  TEST(src.at_end());
402  }
403 }
404 
405 // Test DecreasingValueWeightPostingSource with out-of-order sections at
406 // start, and with repeated weights.
407 DEFINE_TESTCASE(decvalwtsource2, writable) {
409 
410  Xapian::Document doc;
412  db.add_document(doc);
414  db.add_document(doc);
416  db.add_document(doc);
418  db.add_document(doc);
419  db.commit();
420 
421  // Check basic function
422  {
424  src.init(db);
425 
426  src.next(0.0);
427  TEST(!src.at_end());
428  TEST_EQUAL(src.get_docid(), 1);
429 
430  src.next(0.0);
431  TEST(!src.at_end());
432  TEST_EQUAL(src.get_docid(), 2);
433 
434  src.next(0.0);
435  TEST(!src.at_end());
436  TEST_EQUAL(src.get_docid(), 3);
437 
438  src.next(0.0);
439  TEST(!src.at_end());
440  TEST_EQUAL(src.get_docid(), 4);
441 
442  src.next(0.0);
443  TEST(src.at_end());
444  }
445 
446  // Check skipping to end of list due to weight
447  {
449  src.init(db);
450 
451  src.next(1.5);
452  TEST(!src.at_end());
453  TEST_EQUAL(src.get_docid(), 1);
454 
455  src.next(1.5);
456  TEST(!src.at_end());
457  TEST_EQUAL(src.get_docid(), 2);
458 
459  src.next(1.5);
460  TEST(!src.at_end());
461  TEST_EQUAL(src.get_docid(), 3);
462 
463  src.next(1.5);
464  TEST(src.at_end());
465  }
466 
467  // Check behaviour with a restricted range
469  db.add_document(doc);
470 
471  {
473  src.init(db);
474 
475  src.next(1.5);
476  TEST(!src.at_end());
477  TEST_EQUAL(src.get_docid(), 1);
478 
479  src.next(1.5);
480  TEST(!src.at_end());
481  TEST_EQUAL(src.get_docid(), 2);
482 
483  src.next(1.5);
484  TEST(!src.at_end());
485  TEST_EQUAL(src.get_docid(), 3);
486 
487  src.next(1.5);
488  TEST(!src.at_end());
489  TEST_EQUAL(src.get_docid(), 5);
490 
491  src.next(1.5);
492  TEST(src.at_end());
493  }
494 
495  {
497  src.init(db);
498 
499  TEST(src.check(1, 1.5));
500  TEST(!src.at_end());
501  TEST_EQUAL(src.get_docid(), 1);
502 
503  src.next(1.5);
504  TEST(!src.at_end());
505  TEST_EQUAL(src.get_docid(), 2);
506 
507  src.skip_to(4, 1.5);
508  TEST(!src.at_end());
509  TEST_EQUAL(src.get_docid(), 5);
510 
511  src.next(1.5);
512  TEST(src.at_end());
513  }
514 
515  {
517  src.init(db);
518 
519  TEST(src.check(1, 1.5));
520  TEST(!src.at_end());
521  TEST_EQUAL(src.get_docid(), 1);
522 
523  src.next(1.5);
524  TEST(!src.at_end());
525  TEST_EQUAL(src.get_docid(), 2);
526 
527  TEST(src.check(4, 1.5));
528  TEST(!src.at_end());
529  TEST_EQUAL(src.get_docid(), 5);
530 
531  src.next(1.5);
532  TEST(src.at_end());
533  }
534 }
535 
536 static void
538 {
539  Xapian::Document doc;
540  doc.add_term("foo");
542  db.add_document(doc);
544  db.add_document(doc);
545  doc.add_term("bar");
547  db.add_document(doc);
549  db.add_document(doc);
550 }
551 
552 // Test DecreasingValueWeightPostingSource with an actual query.
553 DEFINE_TESTCASE(decvalwtsource3, backend) {
554  Xapian::Database db = get_database("decvalwtsource3",
556 
558  Xapian::Query q(&ps);
559  Xapian::Enquire enq(db);
560  enq.set_query(q);
561 
562  Xapian::MSet mset1(enq.get_mset(0, 1));
563  Xapian::MSet mset2(enq.get_mset(0, 2));
564  Xapian::MSet mset3(enq.get_mset(0, 3));
565  Xapian::MSet mset4(enq.get_mset(0, 4));
566 
567  TEST_EQUAL(mset1.size(), 1);
568  TEST_EQUAL(mset2.size(), 2);
569  TEST_EQUAL(mset3.size(), 3);
570  TEST_EQUAL(mset4.size(), 4);
571 
572  TEST(mset_range_is_same(mset1, 0, mset2, 0, 1));
573  TEST(mset_range_is_same(mset2, 0, mset3, 0, 2));
574  TEST(mset_range_is_same(mset3, 0, mset4, 0, 3));
575 }
576 
577 // Test DecreasingValueWeightPostingSource with an actual query on a fixed
578 // dataset (this was to cover the remote backend before we supported generated
579 // databases for remote databases).
580 DEFINE_TESTCASE(decvalwtsource4, backend && !multi) {
581  Xapian::Database db = get_database("apitest_declen");
582 
584  Xapian::Query q(&ps);
585  Xapian::Enquire enq(db);
586  enq.set_query(q);
587 
588  Xapian::MSet mset1(enq.get_mset(0, 1));
589  Xapian::MSet mset2(enq.get_mset(0, 2));
590  Xapian::MSet mset3(enq.get_mset(0, 3));
591  Xapian::MSet mset4(enq.get_mset(0, 4));
592 
593  TEST_EQUAL(mset1.size(), 1);
594  TEST_EQUAL(mset2.size(), 2);
595  TEST_EQUAL(mset3.size(), 3);
596  TEST_EQUAL(mset4.size(), 4);
597 
598  TEST(mset_range_is_same(mset1, 0, mset2, 0, 1));
599  TEST(mset_range_is_same(mset2, 0, mset3, 0, 2));
600  TEST(mset_range_is_same(mset3, 0, mset4, 0, 3));
601 }
602 
603 static void
605 {
606  Xapian::Document doc;
608  db.add_document(doc);
610  db.add_document(doc);
611 }
612 
613 // Regression test - used to get segfaults if
614 // DecreasingValueWeightPostingSource was pointed at an empty slot.
615 DEFINE_TESTCASE(decvalwtsource5, writable) {
616  Xapian::Database db = get_database("decvalwtsource5",
618 
619  {
621  Xapian::Query q(&ps);
622  Xapian::Enquire enq(db);
623  enq.set_query(q);
624  Xapian::MSet mset1(enq.get_mset(0, 3));
625  TEST_EQUAL(mset1.size(), 2);
626  }
627  {
629  Xapian::Query q(&ps);
630  Xapian::Enquire enq(db);
631  enq.set_query(q);
632  Xapian::MSet mset1(enq.get_mset(0, 3));
633  TEST_EQUAL(mset1.size(), 1);
634  }
635  {
637  Xapian::Query q(&ps);
638  Xapian::Enquire enq(db);
639  enq.set_query(q);
640  Xapian::MSet mset1(enq.get_mset(0, 3));
641  TEST_EQUAL(mset1.size(), 0);
642  }
643 }
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
This class is used to access a database, or a group of databases.
Definition: database.h:68
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
Xapian::docid get_docid() const
Return the current docid.
A posting source which looks up weights in a map using values as the key.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
bool at_end() const
Return true if the current position is past the last entry in this list.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
a generic test suite engine
Class for iterating over document values.
Definition: valueiterator.h:40
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1382
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
void clear_mappings()
Clear all mappings.
Read weights from a value which is known to decrease as docid increases.
bool contains(const std::string &s, char substring)
Definition: stringutils.h:99
test functionality of the Xapian API
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
ValueIterator valuestream_end(Xapian::valueno) const
Return end iterator corresponding to valuestream_begin().
Definition: database.h:363
This class provides read/write access to a database.
Definition: database.h:789
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
Public interfaces for the Xapian library.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
A posting source which returns a fixed weight for all documents.
DEFINE_TESTCASE(valuestream1, backend)
Feature test simple valuestream iteration.
bool at_end() const
Return true if the current position is past the last entry in this list.
std::string get_dbtype()
Definition: apitest.cc:42
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:75
ValueIterator valuestream_begin(Xapian::valueno slot) const
Return an iterator over the value in slot slot for each document.
Definition: omdatabase.cc:450
static void gen_valueweightsource5_db(Xapian::WritableDatabase &db, const string &)
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
bool check(Xapian::docid docid)
Check if the specified docid occurs.
A posting source which reads weights from a value slot.
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
void set_default_weight(double wt)
Set a default weight for document values not in the map.
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:74
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
Definition: apitest.cc:147
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
void init(const Xapian::Database &db_)
Set this PostingSource to the start of the list of postings.
void add_mapping(const std::string &key, double wt)
Add a mapping.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
static void gen_decvalwtsource3_db(Xapian::WritableDatabase &db, const string &)
Xapian::docid get_docid() const
Return the docid at the current position.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
void next(double min_wt)
Advance the current position to the next matching document.
void next(double min_wt)
Advance the current position to the next matching document.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
static void gen_decvalwtsource5_db(Xapian::WritableDatabase &db, const string &)
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
void skip_to(Xapian::docid docid_or_slot)
Advance the iterator to document id or value slot docid_or_slot.
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
void next(double min_wt)
Advance the current position to the next matching document.