xapian-core  1.4.21
api_valuestream.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008,2009,2010 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_valuestream.h"
26 
27 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
30 
31 #include "apitest.h"
32 
33 using namespace std;
34 
36 DEFINE_TESTCASE(valuestream1, backend && !multi) {
37  // FIXME: enable for multi once support is in place.
38  Xapian::Database db = get_database("apitest_simpledata");
39 
40  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
41  tout << "testing valuestream iteration for slot " << slot << endl;
43  while (it != db.valuestream_end(slot)) {
44  TEST_EQUAL(it.get_valueno(), slot);
45  string value = *it;
46  Xapian::docid did = it.get_docid();
47 
48  Xapian::Document doc = db.get_document(did);
49  TEST_EQUAL(doc.get_value(slot), value);
50 
51  ++it;
52  }
53  }
54 }
55 
57 DEFINE_TESTCASE(valuestream2, backend) {
58  Xapian::Database db = get_database("etext");
59 
60  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
61  unsigned interval = 1;
62  while (interval < 1999) {
63  tout.str(string());
64  tout << "testing valuestream skip_to for slot " << slot
65  << " with interval " << interval << endl;
66  Xapian::docid did = 1;
68  if (it == db.valuestream_end(slot)) break;
69  while (it.skip_to(did), it != db.valuestream_end(slot)) {
70  TEST_EQUAL(it.get_valueno(), slot);
71  string value = *it;
72 
73  // Check that the skipped documents had no values.
74  Xapian::docid actual_did = it.get_docid();
75  TEST_REL(actual_did,>=,did);
76  while (did < actual_did) {
77  Xapian::Document doc = db.get_document(did);
78  TEST(doc.get_value(slot).empty());
79  ++did;
80  }
81 
82  Xapian::Document doc = db.get_document(actual_did);
83  TEST_EQUAL(doc.get_value(slot), value);
84  did += interval;
85  }
86  interval = interval * 3 - 1;
87  }
88  }
89 }
90 
92 DEFINE_TESTCASE(valuestream3, backend) {
93  Xapian::Database db = get_database("etext");
94 
95  // Check combinations of check with other operations.
96  typedef enum {
97  CHECK, CHECK_AND_NEXT, CHECK2, SKIP_TO, CHECK_AND_LOOP
98  } test_op;
99  test_op operation = CHECK;
100 
101  for (Xapian::valueno slot = 0; slot < 15; ++slot) {
102  unsigned interval = 1;
103  while (interval < 1999) {
104  tout << "testing valuestream check for slot " << slot
105  << " with interval " << interval << endl;
106  Xapian::docid did = 1;
108  if (it == db.valuestream_end(slot)) break;
109  while (true) {
110  bool positioned = true;
111  switch (operation) {
112  case CHECK_AND_LOOP:
113  operation = CHECK;
114  // FALLTHRU.
115  case CHECK: case CHECK2:
116  positioned = it.check(did);
117  break;
118  case CHECK_AND_NEXT: {
119  bool was_skip_to = it.check(did);
120  if (!was_skip_to) ++it;
121  break;
122  }
123  case SKIP_TO:
124  it.skip_to(did);
125  break;
126  }
127  operation = test_op(operation + 1);
128  if (positioned) {
129  if (it == db.valuestream_end(slot)) break;
130  TEST_EQUAL(it.get_valueno(), slot);
131  string value = *it;
132 
133  // Check that the skipped documents had no values.
134  Xapian::docid actual_did = it.get_docid();
135  while (did < actual_did) {
136  Xapian::Document doc = db.get_document(did);
137  TEST(doc.get_value(slot).empty());
138  ++did;
139  }
140 
141  Xapian::Document doc = db.get_document(actual_did);
142  TEST_EQUAL(doc.get_value(slot), value);
143  }
144  did += interval;
145  }
146  interval = interval * 3 - 1;
147  }
148  }
149 }
150 
155 DEFINE_TESTCASE(valueweightsource5, writable && valuestats) {
156  // inmemory's memory use is currently O(last_docid)!
157  SKIP_TEST_FOR_BACKEND("inmemory");
158  // remote's value slot iteration is very slow for this case currently
159  // because it throws and catches DocNotFoundError across the link 2^32-3
160  // times.
161  SKIP_TEST_FOR_BACKEND("remote");
163  Xapian::Document doc;
165  db.replace_document(1, doc);
166  db.replace_document(0xffffffff, doc);
167  db.commit();
168 
170  src.init(db);
171  src.next(0.0);
172  TEST(!src.at_end());
173  TEST_EQUAL(src.get_docid(), 1);
174  src.next(0.0);
175  TEST(!src.at_end());
176  TEST_EQUAL(src.get_docid(), 0xffffffff);
177  src.next(0.0);
178  TEST(src.at_end());
179 }
180 
181 // Check that ValueMapPostingSource works correctly.
182 // the test db has value 13 set to:
183 // 1 Thi
184 // 2 The
185 // 3 You
186 // 4 War
187 // 5 Fri
188 // 6 Ins
189 // 7 Whi
190 // 8 Com
191 // 9 A p
192 // 10 Tel
193 // 11 Tel
194 // 12 Enc
195 // 13 Get
196 // 14 Doe
197 // 15 fir
198 // 16 Pad
199 // 17 Pad
200 //
201 DEFINE_TESTCASE(valuemapsource1, backend) {
202  Xapian::Database db(get_database("apitest_phrase"));
203  Xapian::Enquire enq(db);
204 
206  src.add_mapping("Thi", 2.0);
207  src.add_mapping("The", 1.0);
208  src.add_mapping("You", 3.0);
209  src.add_mapping("War", 4.0);
210  src.add_mapping("Fri", 5.0);
211 
212  // check mset size and order
213  enq.set_query(Xapian::Query(&src));
214  Xapian::MSet mset = enq.get_mset(0, 5);
215 
216  TEST(mset.size() == 5);
217  mset_expect_order(mset, 5, 4, 3, 1, 2);
218 
219  // and with default weight
220  src.clear_mappings();
221  src.set_default_weight(3.5);
222  src.add_mapping("Thi", 2.0);
223  src.add_mapping("The", 1.0);
224  src.add_mapping("You", 3.0);
225  src.add_mapping("War", 4.0);
226  src.add_mapping("Fri", 5.0);
227 
228  enq.set_query(Xapian::Query(&src));
229  mset = enq.get_mset(0, 5);
230 
231  TEST(mset.size() == 5);
232  mset_expect_order(mset, 5, 4, 6, 7, 8);
233 }
234 
235 // Regression test for valuepostingsource subclasses: used to segfault if skip_to()
236 // called on an empty list.
237 DEFINE_TESTCASE(valuemapsource2, backend && !multi) {
238  Xapian::Database db(get_database("apitest_phrase"));
239 
240  {
242  src.init(db);
243  TEST(src.at_end() == false);
244  src.next(0.0);
245  TEST(src.at_end() == true);
246  }
247 
248  {
250  src.init(db);
251  TEST(src.at_end() == false);
252  src.skip_to(1, 0.0);
253  TEST(src.at_end() == true);
254  }
255 
256  {
258  src.init(db);
259  TEST(src.at_end() == false);
260  src.check(1, 0.0);
261  TEST(src.at_end() == true);
262  }
263 }
264 
265 // Regression test for fixedweightpostingsource: used to segfault if skip_to()
266 // called on an empty list.
267 DEFINE_TESTCASE(fixedweightsource2, !backend) {
268  Xapian::Database db;
269 
270  {
272  src.init(db);
273  TEST(src.at_end() == false);
274  src.next(0.0);
275  TEST(src.at_end() == true);
276  }
277 
278  {
280  src.init(db);
281  TEST(src.at_end() == false);
282  src.skip_to(1, 0.0);
283  TEST(src.at_end() == true);
284  }
285 
286  // No need to test behaviour of check() - check is only allowed to be
287  // called with document IDs which exist, so can never be called for a
288  // FixedWeightPostingSource with an empty database.
289 }
290 
291 // Test DecreasingValueWeightPostingSource.
292 DEFINE_TESTCASE(decvalwtsource1, writable) {
294 
295  Xapian::Document doc;
297  db.add_document(doc);
299  db.add_document(doc);
301  db.add_document(doc);
302  db.commit();
303 
304  // Check basic function
305  {
307  src.init(db);
308 
309  src.next(0.0);
310  TEST(!src.at_end());
311  TEST_EQUAL(src.get_docid(), 1);
312 
313  src.next(0.0);
314  TEST(!src.at_end());
315  TEST_EQUAL(src.get_docid(), 2);
316 
317  src.next(0.0);
318  TEST(!src.at_end());
319  TEST_EQUAL(src.get_docid(), 3);
320 
321  src.next(0.0);
322  TEST(src.at_end());
323  }
324 
325  // Check skipping to end of list due to weight
326  {
328  src.init(db);
329 
330  src.next(1.5);
331  TEST(!src.at_end());
332  TEST_EQUAL(src.get_docid(), 1);
333 
334  src.next(1.5);
335  TEST(!src.at_end());
336  TEST_EQUAL(src.get_docid(), 2);
337 
338  src.next(1.5);
339  TEST(src.at_end());
340  }
341 
342  // Check behaviour with a restricted range
344  db.add_document(doc);
345 
346  {
348  src.init(db);
349 
350  src.next(1.5);
351  TEST(!src.at_end());
352  TEST_EQUAL(src.get_docid(), 1);
353 
354  src.next(1.5);
355  TEST(!src.at_end());
356  TEST_EQUAL(src.get_docid(), 2);
357 
358  src.next(1.5);
359  TEST(!src.at_end());
360  TEST_EQUAL(src.get_docid(), 4);
361 
362  src.next(1.5);
363  TEST(src.at_end());
364  }
365 
366  {
368  src.init(db);
369 
370  src.next(1.5);
371  TEST(!src.at_end());
372  TEST_EQUAL(src.get_docid(), 1);
373 
374  src.skip_to(3, 1.5);
375  TEST(!src.at_end());
376  TEST_EQUAL(src.get_docid(), 4);
377 
378  src.next(1.5);
379  TEST(src.at_end());
380  }
381 
382  {
384  src.init(db);
385 
386  src.next(1.5);
387  TEST(!src.at_end());
388  TEST_EQUAL(src.get_docid(), 1);
389 
390  TEST(src.check(3, 1.5));
391  TEST(!src.at_end());
392  TEST_EQUAL(src.get_docid(), 4);
393 
394  src.next(1.5);
395  TEST(src.at_end());
396  }
397 }
398 
399 // Test DecreasingValueWeightPostingSource with out-of-order sections at
400 // start, and with repeated weights.
401 DEFINE_TESTCASE(decvalwtsource2, writable) {
403 
404  Xapian::Document doc;
406  db.add_document(doc);
408  db.add_document(doc);
410  db.add_document(doc);
412  db.add_document(doc);
413  db.commit();
414 
415  // Check basic function
416  {
418  src.init(db);
419 
420  src.next(0.0);
421  TEST(!src.at_end());
422  TEST_EQUAL(src.get_docid(), 1);
423 
424  src.next(0.0);
425  TEST(!src.at_end());
426  TEST_EQUAL(src.get_docid(), 2);
427 
428  src.next(0.0);
429  TEST(!src.at_end());
430  TEST_EQUAL(src.get_docid(), 3);
431 
432  src.next(0.0);
433  TEST(!src.at_end());
434  TEST_EQUAL(src.get_docid(), 4);
435 
436  src.next(0.0);
437  TEST(src.at_end());
438  }
439 
440  // Check skipping to end of list due to weight
441  {
443  src.init(db);
444 
445  src.next(1.5);
446  TEST(!src.at_end());
447  TEST_EQUAL(src.get_docid(), 1);
448 
449  src.next(1.5);
450  TEST(!src.at_end());
451  TEST_EQUAL(src.get_docid(), 2);
452 
453  src.next(1.5);
454  TEST(!src.at_end());
455  TEST_EQUAL(src.get_docid(), 3);
456 
457  src.next(1.5);
458  TEST(src.at_end());
459  }
460 
461  // Check behaviour with a restricted range
463  db.add_document(doc);
464 
465  {
467  src.init(db);
468 
469  src.next(1.5);
470  TEST(!src.at_end());
471  TEST_EQUAL(src.get_docid(), 1);
472 
473  src.next(1.5);
474  TEST(!src.at_end());
475  TEST_EQUAL(src.get_docid(), 2);
476 
477  src.next(1.5);
478  TEST(!src.at_end());
479  TEST_EQUAL(src.get_docid(), 3);
480 
481  src.next(1.5);
482  TEST(!src.at_end());
483  TEST_EQUAL(src.get_docid(), 5);
484 
485  src.next(1.5);
486  TEST(src.at_end());
487  }
488 
489  {
491  src.init(db);
492 
493  TEST(src.check(1, 1.5));
494  TEST(!src.at_end());
495  TEST_EQUAL(src.get_docid(), 1);
496 
497  src.next(1.5);
498  TEST(!src.at_end());
499  TEST_EQUAL(src.get_docid(), 2);
500 
501  src.skip_to(4, 1.5);
502  TEST(!src.at_end());
503  TEST_EQUAL(src.get_docid(), 5);
504 
505  src.next(1.5);
506  TEST(src.at_end());
507  }
508 
509  {
511  src.init(db);
512 
513  TEST(src.check(1, 1.5));
514  TEST(!src.at_end());
515  TEST_EQUAL(src.get_docid(), 1);
516 
517  src.next(1.5);
518  TEST(!src.at_end());
519  TEST_EQUAL(src.get_docid(), 2);
520 
521  TEST(src.check(4, 1.5));
522  TEST(!src.at_end());
523  TEST_EQUAL(src.get_docid(), 5);
524 
525  src.next(1.5);
526  TEST(src.at_end());
527  }
528 }
529 
530 // Test DecreasingValueWeightPostingSource with an actual query.
531 DEFINE_TESTCASE(decvalwtsource3, writable) {
533 
534  Xapian::Document doc;
535  doc.add_term("foo");
537  db.add_document(doc);
539  db.add_document(doc);
540  doc.add_term("bar");
542  db.add_document(doc);
544  db.add_document(doc);
545  db.commit();
546 
548  Xapian::Query q(&ps);
549  Xapian::Enquire enq(db);
550  enq.set_query(q);
551 
552  Xapian::MSet mset1(enq.get_mset(0, 1));
553  Xapian::MSet mset2(enq.get_mset(0, 2));
554  Xapian::MSet mset3(enq.get_mset(0, 3));
555  Xapian::MSet mset4(enq.get_mset(0, 4));
556 
557  TEST_EQUAL(mset1.size(), 1);
558  TEST_EQUAL(mset2.size(), 2);
559  TEST_EQUAL(mset3.size(), 3);
560  TEST_EQUAL(mset4.size(), 4);
561 
562  TEST(mset_range_is_same(mset1, 0, mset2, 0, 1));
563  TEST(mset_range_is_same(mset2, 0, mset3, 0, 2));
564  TEST(mset_range_is_same(mset3, 0, mset4, 0, 3));
565 }
566 
567 // Test DecreasingValueWeightPostingSource with an actual query on a fixed
568 // dataset (so we can cover the remote backend too).
569 DEFINE_TESTCASE(decvalwtsource4, backend && !multi) {
570  Xapian::Database db = get_database("apitest_declen");
571 
573  Xapian::Query q(&ps);
574  Xapian::Enquire enq(db);
575  enq.set_query(q);
576 
577  Xapian::MSet mset1(enq.get_mset(0, 1));
578  Xapian::MSet mset2(enq.get_mset(0, 2));
579  Xapian::MSet mset3(enq.get_mset(0, 3));
580  Xapian::MSet mset4(enq.get_mset(0, 4));
581 
582  TEST_EQUAL(mset1.size(), 1);
583  TEST_EQUAL(mset2.size(), 2);
584  TEST_EQUAL(mset3.size(), 3);
585  TEST_EQUAL(mset4.size(), 4);
586 
587  TEST(mset_range_is_same(mset1, 0, mset2, 0, 1));
588  TEST(mset_range_is_same(mset2, 0, mset3, 0, 2));
589  TEST(mset_range_is_same(mset3, 0, mset4, 0, 3));
590 }
591 
592 // Regression test - used to get segfaults if
593 // DecreasingValueWeightPostingSource was pointed at an empty slot.
594 DEFINE_TESTCASE(decvalwtsource5, writable) {
596 
597  Xapian::Document doc;
599  db.add_document(doc);
601  db.add_document(doc);
602  db.commit();
603 
604  {
606  Xapian::Query q(&ps);
607  Xapian::Enquire enq(db);
608  enq.set_query(q);
609  Xapian::MSet mset1(enq.get_mset(0, 3));
610  TEST_EQUAL(mset1.size(), 2);
611  }
612  {
614  Xapian::Query q(&ps);
615  Xapian::Enquire enq(db);
616  enq.set_query(q);
617  Xapian::MSet mset1(enq.get_mset(0, 3));
618  TEST_EQUAL(mset1.size(), 1);
619  }
620  {
622  Xapian::Query q(&ps);
623  Xapian::Enquire enq(db);
624  enq.set_query(q);
625  Xapian::MSet mset1(enq.get_mset(0, 3));
626  TEST_EQUAL(mset1.size(), 0);
627  }
628 }
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
This class is used to access a database, or a group of databases.
Definition: database.h:68
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
Xapian::docid get_docid() const
Return the current docid.
A posting source which looks up weights in a map using values as the key.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
bool at_end() const
Return true if the current position is past the last entry in this list.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
a generic test suite engine
Class for iterating over document values.
Definition: valueiterator.h:40
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1347
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
void clear_mappings()
Clear all mappings.
Read weights from a value which is known to decrease as docid increases.
test functionality of the Xapian API
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
ValueIterator valuestream_end(Xapian::valueno) const
Return end iterator corresponding to valuestream_begin().
Definition: database.h:359
This class provides read/write access to a database.
Definition: database.h:785
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Public interfaces for the Xapian library.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
A posting source which returns a fixed weight for all documents.
bool at_end() const
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:75
ValueIterator valuestream_begin(Xapian::valueno slot) const
Return an iterator over the value in slot slot for each document.
Definition: omdatabase.cc:450
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
bool check(Xapian::docid docid)
Check if the specified docid occurs.
A posting source which reads weights from a value slot.
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
void set_default_weight(double wt)
Set a default weight for document values not in the map.
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
void init(const Xapian::Database &db_)
Set this PostingSource to the start of the list of postings.
void add_mapping(const std::string &key, double wt)
Add a mapping.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
Xapian::docid get_docid() const
Return the docid at the current position.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
void next(double min_wt)
Advance the current position to the next matching document.
void next(double min_wt)
Advance the current position to the next matching document.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
DEFINE_TESTCASE(valuestream1, backend &&!multi)
Feature test simple valuestream iteration.
void skip_to(Xapian::docid docid_or_slot)
Advance the iterator to document id or value slot docid_or_slot.
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
void next(double min_wt)
Advance the current position to the next matching document.