xapian-core  1.4.22
api_postingsource.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008,2009,2011,2015,2016,2019 Olly Betts
5  * Copyright 2008,2009 Lemur Consulting Ltd
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_postingsource.h"
27 
28 #include <xapian.h>
29 
30 #include <string>
31 #include "safeunistd.h"
32 
33 #include "str.h"
34 #include "testutils.h"
35 #include "apitest.h"
36 
37 using namespace std;
38 
41 
43 
45 
47  Xapian::doccount last_docid_)
48  : num_docs(num_docs_), last_docid(last_docid_), did(0)
49  { }
50 
51  public:
53  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
54  { }
55 
56  PostingSource * clone() const { return new MyOddPostingSource(num_docs, last_docid); }
57 
58  void init(const Xapian::Database &) { did = 0; }
59 
60  // These bounds could be better, but that's not important here.
61  Xapian::doccount get_termfreq_min() const { return 0; }
62 
63  Xapian::doccount get_termfreq_est() const { return num_docs / 2; }
64 
65  Xapian::doccount get_termfreq_max() const { return num_docs; }
66 
67  void next(double wt) {
68  (void)wt;
69  ++did;
70  if (did % 2 == 0) ++did;
71  }
72 
73  void skip_to(Xapian::docid to_did, double wt) {
74  (void)wt;
75  did = to_did;
76  if (did % 2 == 0) ++did;
77  }
78 
79  bool at_end() const {
80  // Doesn't work if last_docid is 2^32 - 1.
81  return did > last_docid;
82  }
83 
84  Xapian::docid get_docid() const { return did; }
85 
86  string get_description() const { return "MyOddPostingSource"; }
87 };
88 
89 DEFINE_TESTCASE(externalsource1, backend && !remote && !multi) {
90  // Doesn't work for remote without registering with the server.
91  // Doesn't work for multi because it checks the docid in the
92  // subdatabase.
93  Xapian::Database db(get_database("apitest_phrase"));
94  Xapian::Enquire enq(db);
95  MyOddPostingSource src(db);
96 
97  // Check that passing NULL is rejected as intended.
98  Xapian::PostingSource * nullsrc = NULL;
100 
101  enq.set_query(Xapian::Query(&src));
102 
103  Xapian::MSet mset = enq.get_mset(0, 10);
104  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
105 
107  Xapian::Query("leav"),
108  Xapian::Query(&src));
109  enq.set_query(q);
110 
111  mset = enq.get_mset(0, 10);
112  mset_expect_order(mset, 5, 7, 11, 13, 9);
113 }
114 
115 // Test that trying to use PostingSource with the remote backend throws
116 // Xapian::UnimplementedError as expected (we need to register the class
117 // in xapian-tcpsrv/xapian-progsrv for this to work).
118 DEFINE_TESTCASE(externalsource2, remote) {
119  Xapian::Database db(get_database("apitest_phrase"));
120  Xapian::Enquire enq(db);
121  MyOddPostingSource src(db);
122 
123  enq.set_query(Xapian::Query(&src));
124 
126  Xapian::MSet mset = enq.get_mset(0, 10));
127 
129  Xapian::Query("leav"),
130  Xapian::Query(&src));
131  enq.set_query(q);
132 
134  Xapian::MSet mset = enq.get_mset(0, 10));
135 }
136 
139 
141 
143 
145  Xapian::doccount last_docid_)
146  : num_docs(num_docs_), last_docid(last_docid_), did(0)
147  {
148  set_maxweight(1000);
149  }
150 
151  public:
153  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
154  { }
155 
156  PostingSource * clone() const {
157  return new MyOddWeightingPostingSource(num_docs, last_docid);
158  }
159 
160  void init(const Xapian::Database &) { did = 0; }
161 
162  double get_weight() const {
163  return (did % 2) ? 1000 : 0.001;
164  }
165 
166  // These bounds could be better, but that's not important here.
167  Xapian::doccount get_termfreq_min() const { return 0; }
168 
169  Xapian::doccount get_termfreq_est() const { return num_docs / 2; }
170 
171  Xapian::doccount get_termfreq_max() const { return num_docs; }
172 
173  void next(double wt) {
174  (void)wt;
175  ++did;
176  }
177 
178  void skip_to(Xapian::docid to_did, double wt) {
179  (void)wt;
180  did = to_did;
181  }
182 
183  bool at_end() const {
184  // Doesn't work if last_docid is 2^32 - 1.
185  return did > last_docid;
186  }
187 
188  Xapian::docid get_docid() const { return did; }
189 
190  string get_description() const {
191  return "MyOddWeightingPostingSource";
192  }
193 };
194 
195 // Like externalsource1, except we use the weight to favour odd documents.
196 DEFINE_TESTCASE(externalsource3, backend && !remote && !multi) {
197  // Doesn't work for remote without registering with the server.
198  // Doesn't work for multi because it checks the docid in the
199  // subdatabase.
200  Xapian::Database db(get_database("apitest_phrase"));
201  Xapian::Enquire enq(db);
203 
204  enq.set_query(Xapian::Query(&src));
205 
206  Xapian::MSet mset = enq.get_mset(0, 10);
207  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17, 2);
208 
210  Xapian::Query("leav"),
211  Xapian::Query(&src));
212  enq.set_query(q);
213 
214  mset = enq.get_mset(0, 5);
215  mset_expect_order(mset, 5, 7, 11, 13, 9);
216 
217  tout << "max possible weight = " << mset.get_max_possible() << endl;
218  TEST(mset.get_max_possible() > 1000);
219 
220  enq.set_cutoff(0, 1000.001);
221  mset = enq.get_mset(0, 10);
222  mset_expect_order(mset, 5, 7, 11, 13, 9);
223 
224  tout << "max possible weight = " << mset.get_max_possible() << endl;
225  TEST(mset.get_max_possible() > 1000);
226 
228  mset = enq.get_mset(0, 10);
229  TEST(mset.empty());
230 
231  TEST_EQUAL(mset.get_max_possible(), 500);
232 
234  mset = enq.get_mset(0, 10);
235  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
236 
237  TEST_EQUAL(mset.get_max_possible(), 2000);
238 }
239 
242 
244 
246 
248  Xapian::doccount last_docid_)
249  : num_docs(num_docs_), last_docid(last_docid_), did(0)
250  { }
251 
252  public:
253  MyDontAskWeightPostingSource() : Xapian::PostingSource() {}
254 
255  PostingSource * clone() const { return new MyDontAskWeightPostingSource(num_docs, last_docid); }
256 
257  void init(const Xapian::Database &db) {
258  num_docs = db.get_doccount();
259  last_docid = db.get_lastdocid();
260  did = 0;
261  }
262 
263  double get_weight() const {
264  FAIL_TEST("MyDontAskWeightPostingSource::get_weight() called");
265  }
266 
267  // These bounds could be better, but that's not important here.
268  Xapian::doccount get_termfreq_min() const { return num_docs; }
269 
270  Xapian::doccount get_termfreq_est() const { return num_docs; }
271 
272  Xapian::doccount get_termfreq_max() const { return num_docs; }
273 
274  void next(double wt) {
275  (void)wt;
276  ++did;
277  }
278 
279  void skip_to(Xapian::docid to_did, double wt) {
280  (void)wt;
281  did = to_did;
282  }
283 
284  bool at_end() const {
285  // Doesn't work if last_docid is 2^32 - 1.
286  return did > last_docid;
287  }
288 
289  Xapian::docid get_docid() const { return did; }
290 
291  string get_description() const {
292  return "MyDontAskWeightPostingSource";
293  }
294 };
295 
296 // Check that boolean use doesn't call get_weight().
297 DEFINE_TESTCASE(externalsource4, backend && !remote) {
298  Xapian::Database db(get_database("apitest_phrase"));
299  Xapian::Enquire enq(db);
301 
302  tout << "OP_SCALE_WEIGHT 0" << endl;
304 
305  Xapian::MSet mset = enq.get_mset(0, 5);
306  mset_expect_order(mset, 1, 2, 3, 4, 5);
307 
308  tout << "OP_FILTER" << endl;
310  Xapian::Query("leav"),
311  Xapian::Query(&src));
312  enq.set_query(q);
313 
314  mset = enq.get_mset(0, 5);
315  mset_expect_order(mset, 8, 6, 4, 5, 7);
316 
317  tout << "BoolWeight" << endl;
318  enq.set_query(Xapian::Query(&src));
320 
321  // mset = enq.get_mset(0, 5);
322  // mset_expect_order(mset, 1, 2, 3, 4, 5);
323 }
324 
325 // Check that valueweightsource works correctly.
326 DEFINE_TESTCASE(valueweightsource1, backend) {
327  Xapian::Database db(get_database("apitest_phrase"));
328  Xapian::Enquire enq(db);
330 
331  // Should be in descending order of length
332  tout << "RAW" << endl;
333  enq.set_query(Xapian::Query(&src));
334  Xapian::MSet mset = enq.get_mset(0, 5);
335  mset_expect_order(mset, 3, 1, 2, 8, 14);
336 
337  // In relevance order
338  tout << "OP_FILTER" << endl;
340  Xapian::Query("leav"),
341  Xapian::Query(&src));
342  enq.set_query(q);
343  mset = enq.get_mset(0, 5);
344  mset_expect_order(mset, 8, 6, 4, 5, 7);
345 
346  // Should be in descending order of length
347  tout << "OP_FILTER other way" << endl;
349  Xapian::Query(&src),
350  Xapian::Query("leav"));
351  enq.set_query(q);
352  mset = enq.get_mset(0, 5);
353  mset_expect_order(mset, 8, 14, 9, 13, 7);
354 }
355 
356 // Check that valueweightsource gives the correct bounds for those databases
357 // which support value statistics.
358 DEFINE_TESTCASE(valueweightsource2, valuestats) {
359  Xapian::Database db(get_database("apitest_phrase"));
361  src.init(db);
362  TEST_EQUAL(src.get_termfreq_min(), 17);
363  TEST_EQUAL(src.get_termfreq_est(), 17);
364  TEST_EQUAL(src.get_termfreq_max(), 17);
365  TEST_EQUAL(src.get_maxweight(), 135);
366 }
367 
368 // Check that valueweightsource skip_to() can stay in the same position.
369 DEFINE_TESTCASE(valueweightsource3, valuestats && !multi) {
370  // FIXME: multi doesn't support iterating valuestreams yet.
371  Xapian::Database db(get_database("apitest_phrase"));
373  src.init(db);
374  TEST(!src.at_end());
375  src.skip_to(8, 0.0);
376  TEST(!src.at_end());
377  TEST_EQUAL(src.get_docid(), 8);
378  src.skip_to(8, 0.0);
379  TEST(!src.at_end());
380  TEST_EQUAL(src.get_docid(), 8);
381 }
382 
383 // Check that fixedweightsource works correctly.
384 DEFINE_TESTCASE(fixedweightsource1, backend) {
385  Xapian::Database db(get_database("apitest_phrase"));
386  Xapian::Enquire enq(db);
387  double wt = 5.6;
388 
389  {
391 
392  // Should be in increasing order of docid.
393  enq.set_query(Xapian::Query(&src));
394  Xapian::MSet mset = enq.get_mset(0, 5);
395  mset_expect_order(mset, 1, 2, 3, 4, 5);
396 
397  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
398  TEST_EQUAL(i.get_weight(), wt);
399  }
400  }
401 
402  // Do some direct tests, to check the skip_to() and check() methods work.
403  {
404  // Check next and skip_to().
406  src.init(db);
407 
408  src.next(1.0);
409  TEST(!src.at_end());
410  TEST_EQUAL(src.get_docid(), 1);
411  src.next(1.0);
412  TEST(!src.at_end());
413  TEST_EQUAL(src.get_docid(), 2);
414  src.skip_to(5, 1.0);
415  TEST(!src.at_end());
416  TEST_EQUAL(src.get_docid(), 5);
417  src.next(wt * 2);
418  TEST(src.at_end());
419  }
420  {
421  // Check check() as the first operation, followed by next.
423  src.init(db);
424 
425  TEST_EQUAL(src.check(5, 1.0), true);
426  TEST(!src.at_end());
427  TEST_EQUAL(src.get_docid(), 5);
428  src.next(1.0);
429  TEST(!src.at_end());
430  TEST_EQUAL(src.get_docid(), 6);
431  }
432  {
433  // Check check() as the first operation, followed by skip_to().
435  src.init(db);
436 
437  TEST_EQUAL(src.check(5, 1.0), true);
438  TEST(!src.at_end());
439  TEST_EQUAL(src.get_docid(), 5);
440  src.skip_to(6, 1.0);
441  TEST(!src.at_end());
442  TEST_EQUAL(src.get_docid(), 6);
443  src.skip_to(7, wt * 2);
444  TEST(src.at_end());
445  }
446 }
447 
448 // A posting source which changes the maximum weight.
451 
452  // Maximum docid that get_weight() should be called for.
454 
455  public:
457  : did(0), maxid_accessed(maxid_accessed_) { }
458 
459  void init(const Xapian::Database &) { did = 0; }
460 
461  double get_weight() const {
462  if (did > maxid_accessed) {
463  FAIL_TEST("ChangeMaxweightPostingSource::get_weight() called "
464  "for docid " + str(did) + ", max id accessed "
465  "should be " + str(maxid_accessed));
466  }
467  return 5 - did;
468  }
469 
470  Xapian::doccount get_termfreq_min() const { return 4; }
471  Xapian::doccount get_termfreq_est() const { return 4; }
472  Xapian::doccount get_termfreq_max() const { return 4; }
473 
474  void next(double) {
475  ++did;
476  set_maxweight(5 - did);
477  }
478 
479  void skip_to(Xapian::docid to_did, double) {
480  did = to_did;
481  set_maxweight(5 - did);
482  }
483 
484  bool at_end() const { return did >= 5; }
485  Xapian::docid get_docid() const { return did; }
486  string get_description() const { return "ChangeMaxweightPostingSource"; }
487 };
488 
489 // Test a posting source with a variable maxweight.
490 DEFINE_TESTCASE(changemaxweightsource1, backend && !remote && !multi) {
491  // The ChangeMaxweightPostingSource doesn't work with multi or remote.
492  Xapian::Database db(get_database("apitest_phrase"));
493  Xapian::Enquire enq(db);
494 
495  {
498 
500  Xapian::Query(&src1), Xapian::Query(&src2));
501  enq.set_query(q);
502  // Set descending docid order so that the matcher isn't able to
503  // terminate early after 4 documents just because weight == maxweight.
504  enq.set_docid_order(enq.DESCENDING);
505 
506  Xapian::MSet mset = enq.get_mset(0, 4);
507  TEST(src1.at_end());
508  mset_expect_order(mset, 1, 2, 3, 4);
509  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
510  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
511  }
512  }
513 
514  {
517 
519  Xapian::Query(&src1), Xapian::Query(&src2));
520  enq.set_query(q);
521 
522  Xapian::MSet mset = enq.get_mset(0, 2);
523  TEST(!src1.at_end());
524  TEST_EQUAL(src1.get_docid(), 3);
525  TEST_EQUAL_DOUBLE(src1.get_maxweight(), 2.0);
526  mset_expect_order(mset, 1, 2);
527  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
528  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
529  }
530  }
531 }
532 
533 // Test using a valueweightpostingsource which has no entries.
534 DEFINE_TESTCASE(emptyvalwtsource1, backend && !remote && !multi) {
535  Xapian::Database db(get_database("apitest_phrase"));
536  Xapian::Enquire enq(db);
537 
538  Xapian::ValueWeightPostingSource src2(11); // A non-empty slot.
539  Xapian::ValueWeightPostingSource src3(100); // An empty slot.
540  Xapian::Query q1("leav");
541  Xapian::Query q2(&src2);
542  Xapian::Query q3(&src3);
544 
545  // Perform search without ORring with the posting source.
546  Xapian::doccount size1;
547  {
548  enq.set_query(q1);
549  Xapian::MSet mset = enq.get_mset(0, 10);
550  TEST_REL(mset.get_max_possible(), >, 0.0);
551  size1 = mset.size();
552  TEST_REL(size1, >, 0);
553  }
554 
555  // Perform a search with just the non-empty posting source, checking it
556  // returns something.
557  {
558  enq.set_query(q2);
559  Xapian::MSet mset = enq.get_mset(0, 10);
560  TEST_REL(mset.get_max_possible(), >, 0.0);
561  TEST_REL(mset.size(), >, 0);
562  }
563 
564  // Perform a search with just the empty posting source, checking it returns
565  // nothing.
566  {
567  enq.set_query(q3);
568  Xapian::MSet mset = enq.get_mset(0, 10);
569 
570  // get_max_possible() returns 0 here for backends which track the upper
571  // bound on value slot entries, MAX_DBL for backends which don't.
572  // Either is valid.
573  TEST_REL(mset.get_max_possible(), >=, 0.0);
574 
575  TEST_EQUAL(mset.size(), 0);
576  }
577 
578  // Perform a search with the posting source ORred with the normal query.
579  // This is a regression test - it used to return nothing.
580  {
581  enq.set_query(q);
582  Xapian::MSet mset = enq.get_mset(0, 10);
583  TEST_REL(mset.get_max_possible(), >, 0.0);
584  TEST_REL(mset.size(), >, 0.0);
585  TEST_EQUAL(mset.size(), size1);
586  }
587 }
588 
591  public:
592  int & count;
593 
595  : Xapian::DecreasingValueWeightPostingSource(0), count(count_) { }
596 
598  {
599  return new SlowDecreasingValueWeightPostingSource(count);
600  }
601 
602  void next(double min_wt) {
603  sleep(1);
604  ++count;
606  }
607 };
608 
609 static void
611 {
612  for (int wt = 20; wt > 0; --wt) {
613  Xapian::Document doc;
614  doc.add_value(0, Xapian::sortable_serialise(double(wt)));
615  db.add_document(doc);
616  }
617 }
618 
619 // FIXME: This doesn't run for remote databases (we'd need to register
620 // SlowDecreasingValueWeightPostingSource on the remote).
621 DEFINE_TESTCASE(matchtimelimit1, generated && !remote)
622 {
623 #ifndef HAVE_TIMER_CREATE
624  SKIP_TEST("Enquire::set_time_limit() not implemented for this platform");
625 #endif
626  Xapian::Database db = get_database("matchtimelimit1",
628 
629  int count = 0;
631  src.init(db);
632  Xapian::Enquire enquire(db);
633  enquire.set_query(Xapian::Query(&src));
634 
635  enquire.set_time_limit(1.5);
636 
637  Xapian::MSet mset = enquire.get_mset(0, 1, 1000);
638  TEST_EQUAL(mset.size(), 1);
639  TEST_EQUAL(count, 2);
640 }
641 
644  public:
646 
648 
650  Xapian::doccount& doclen_ub_)
651  : Xapian::DecreasingValueWeightPostingSource(0),
652  doclen_lb(doclen_lb_),
653  doclen_ub(doclen_ub_) { }
654 
656  {
657  return new CheckBoundsPostingSource(doclen_lb, doclen_ub);
658  }
659 
660  void init(const Xapian::Database& database) {
661  doclen_lb = database.get_doclength_lower_bound();
662  doclen_ub = database.get_doclength_upper_bound();
664  }
665 };
666 
667 // Test that doclength bounds are correct.
668 // Regression test for bug fixed in 1.2.25 and 1.4.1.
669 DEFINE_TESTCASE(postingsourcebounds1, backend && !remote)
670 {
671  Xapian::Database db = get_database("apitest_simpledata");
672 
673  Xapian::doccount doclen_lb = 0, doclen_ub = 0;
674  CheckBoundsPostingSource ps(doclen_lb, doclen_ub);
675 
676  Xapian::Enquire enquire(db);
677  enquire.set_query(Xapian::Query(&ps));
678 
679  Xapian::MSet mset = enquire.get_mset(0, 1);
680 
681  TEST_EQUAL(doclen_lb, db.get_doclength_lower_bound());
682  TEST_EQUAL(doclen_ub, db.get_doclength_upper_bound());
683 }
684 
685 // PostingSource which really just counts the clone() calls.
686 // Never actually matches anything, but pretends it might.
689 
690  public:
691  CloneTestPostingSource(int& clone_count_)
692  : clone_count(clone_count_)
693  { }
694 
695  PostingSource * clone() const {
696  ++clone_count;
697  return new CloneTestPostingSource(clone_count);
698  }
699 
700  void init(const Xapian::Database&) { }
701 
702  Xapian::doccount get_termfreq_min() const { return 0; }
703 
704  Xapian::doccount get_termfreq_est() const { return 1; }
705 
706  Xapian::doccount get_termfreq_max() const { return 2; }
707 
708  void next(double) { }
709 
710  void skip_to(Xapian::docid, double) { }
711 
712  bool at_end() const {
713  return true;
714  }
715 
716  Xapian::docid get_docid() const { return 0; }
717 
718  string get_description() const { return "CloneTestPostingSource"; }
719 };
720 
722 DEFINE_TESTCASE(postingsourceclone1, !backend)
723 {
724  // This fails with 1.3.5-1.4.0 inclusive.
725  {
726  int clones = 0;
727  CloneTestPostingSource ps(clones);
728  TEST_EQUAL(clones, 0);
729  Xapian::Query q(&ps);
730  TEST_EQUAL(clones, 1);
731  }
732 
733  // Check that clone() isn't needlessly called if reference counting has
734  // been turned on for the PostingSource.
735  {
736  int clones = 0;
738  TEST_EQUAL(clones, 0);
739  Xapian::Query q(ps->release());
740  TEST_EQUAL(clones, 0);
741  }
742 }
743 
746 
748 
750 
751  public:
753 
754  explicit
755  OnlyTheFirstPostingSource(bool allow_clone_) : allow_clone(allow_clone_) {}
756 
757  PostingSource* clone() const {
758  return allow_clone ? new OnlyTheFirstPostingSource(true) : nullptr;
759  }
760 
761  void init(const Xapian::Database& db) {
762  did = 0;
763  if (shard_index == 0) {
764  last_docid = db.get_lastdocid();
765  } else {
766  last_docid = 0;
767  }
768  ++shard_index;
769  }
770 
771  Xapian::doccount get_termfreq_min() const { return 0; }
772 
773  Xapian::doccount get_termfreq_est() const { return last_docid / 2; }
774 
775  Xapian::doccount get_termfreq_max() const { return last_docid; }
776 
777  void next(double wt) {
778  (void)wt;
779  ++did;
780  if (did > last_docid) did = 0;
781  }
782 
783  void skip_to(Xapian::docid to_did, double wt) {
784  (void)wt;
785  did = to_did;
786  if (did > last_docid) did = 0;
787  }
788 
789  bool at_end() const {
790  return did == 0;
791  }
792 
793  Xapian::docid get_docid() const { return did; }
794 
795  string get_description() const { return "OnlyTheFirstPostingSource"; }
796 };
797 
799 
800 DEFINE_TESTCASE(postingsourceshardindex1, multi && !remote) {
801  Xapian::Database db = get_database("apitest_simpledata");
802 
804 
805  Xapian::Enquire enquire(db);
806  {
807  auto ps = new OnlyTheFirstPostingSource(true);
808  enquire.set_query(Xapian::Query(ps->release()));
809 
810  Xapian::MSet mset = enquire.get_mset(0, 10);
811  mset_expect_order(mset, 1, 3, 5);
812  }
813 
814  {
815  /* Regression test for bug fixed in 1.4.12 - we should get an exception
816  * if we use a PostingSource that doesn't support clone() with a multi
817  * DB.
818  */
819  auto ps = new OnlyTheFirstPostingSource(false);
820  enquire.set_query(Xapian::Query(ps->release()));
821 
823  auto m = enquire.get_mset(0, 10));
824  }
825 }
826 
830 
831  public:
833  Xapian::doccount est_,
834  Xapian::doccount ub_)
835  : lb(lb_), est(est_), ub(ub_)
836  { }
837 
838  PostingSource * clone() const { return new EstimatePS(lb, est, ub); }
839 
840  void init(const Xapian::Database &) { }
841 
842  Xapian::doccount get_termfreq_min() const { return lb; }
843 
845 
846  Xapian::doccount get_termfreq_max() const { return ub; }
847 
848  void next(double) {
849  FAIL_TEST("EstimatePS::next() shouldn't be called");
850  }
851 
852  void skip_to(Xapian::docid, double) {
853  FAIL_TEST("EstimatePS::skip_to() shouldn't be called");
854  }
855 
856  bool at_end() const {
857  return false;
858  }
859 
861  FAIL_TEST("EstimatePS::get_docid() shouldn't be called");
862  }
863 
864  string get_description() const { return "EstimatePS"; }
865 };
866 
868 DEFINE_TESTCASE(estimaterounding1, backend && !multi && !remote) {
869  Xapian::Database db = get_database("etext");
870  Xapian::Enquire enquire(db);
871  static const struct { Xapian::doccount lb, est, ub, exp; } testcases[] = {
872  // Test rounding down.
873  {411, 424, 439, 420},
874  {1, 312, 439, 300},
875  // Test rounding up.
876  {411, 426, 439, 430},
877  {123, 351, 439, 400},
878  // Rounding based on estimate size if smaller than range size.
879  {1, 12, 439, 10},
880  // Round "5" away from the nearer bound.
881  {1, 15, 439, 20},
882  {1, 350, 439, 300},
883  // Check we round up if rounding down would be out of range.
884  {411, 416, 439, 420},
885  {411, 412, 439, 420},
886  // Check we round down if rounding up would be out of range.
887  {111, 133, 138, 130},
888  {111, 137, 138, 130},
889  // Check we don't round if either way would be out of range.
890  {411, 415, 419, 415},
891  // Leave small estimates alone.
892  {1, 6, 439, 6},
893  };
894  for (auto& t : testcases) {
895  EstimatePS ps(t.lb, t.est, t.ub);
896  enquire.set_query(Xapian::Query(&ps));
897  Xapian::MSet mset = enquire.get_mset(0, 0);
898  // MSet::get_description() includes bounds and raw estimate.
899  tout << mset.get_description() << endl;
900  TEST_EQUAL(mset.get_matches_estimated(), t.exp);
901  }
902 }
PostingSource * clone() const
Clone the posting source.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
string get_description() const
Return a string describing this object.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
OnlyTheFirstPostingSource(bool allow_clone_)
void next(double wt)
Advance the current position to the next matching document.
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:850
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
This class is used to access a database, or a group of databases.
Definition: database.h:68
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:325
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:856
Xapian::docid get_docid() const
Return the current docid.
EstimatePS(Xapian::doccount lb_, Xapian::doccount est_, Xapian::doccount ub_)
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
void next(double wt)
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
SlowDecreasingValueWeightPostingSource * clone() const
Clone the posting source.
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount ub
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
Definition: omdatabase.cc:401
void next(double min_wt)
Advance the current position to the next matching document.
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:283
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
bool at_end() const
Return true if the current position is past the last entry in this list.
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
PostingSource * clone() const
Clone the posting source.
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Convert types to std::string.
void skip_to(Xapian::docid, double)
Advance to the specified docid.
static double est(double l, double r, double n)
Definition: orpostlist.cc:306
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1347
PostingSource * clone() const
Clone the posting source.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
MyOddWeightingPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
ChangeMaxweightPostingSource(Xapian::docid maxid_accessed_)
Xapian::doccount & doclen_ub
string get_description() const
Return a string describing this object.
PostingSource * clone() const
Clone the posting source.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount num_docs
Xapian::doccount last_docid
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Read weights from a value which is known to decrease as docid increases.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
test functionality of the Xapian API
void next(double)
Advance the current position to the next matching document.
string get_description() const
Return a string describing this object.
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
void next(double wt)
Advance the current position to the next matching document.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
MyOddWeightingPostingSource(const Xapian::Database &db)
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Definition: omdatabase.cc:421
DEFINE_TESTCASE(externalsource1, backend &&!remote &&!multi)
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
void next(double)
Advance the current position to the next matching document.
void init(const Xapian::Database &database)
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
bool at_end() const
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid, double)
Advance to the specified docid.
This class provides read/write access to a database.
Definition: database.h:785
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:351
Scale the weight contributed by a subquery.
Definition: query.h:166
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
bool at_end() const
Return true if the current position is past the last entry in this list.
Public interfaces for the Xapian library.
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
string get_description() const
Return a string describing this object.
PostingSource * clone() const
Clone the posting source.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
A posting source which returns a fixed weight for all documents.
void set_time_limit(double time_limit)
Set a time limit for the match.
Definition: omenquire.cc:926
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::docid get_docid() const
Return the current docid.
string get_description() const
Return a string describing this object.
PostingSource * clone() const
Clone the posting source.
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
double get_weight() const
Return the weight contribution for the current document.
MyOddPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
string get_description() const
Return a string describing this object.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:607
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:612
void init(const Xapian::Database &db)
Set this PostingSource to the start of the list of postings.
static Xapian::doccount shard_index
bool at_end() const
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
Xapian::docid get_docid() const
Return the current docid.
double get_weight() const
Return the weight contribution for the current document.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
CheckBoundsPostingSource(Xapian::doccount &doclen_lb_, Xapian::doccount &doclen_ub_)
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
A posting source which reads weights from a value slot.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
CheckBoundsPostingSource * clone() const
Clone the posting source.
Match only documents which all subqueries match.
Definition: query.h:84
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
MyOddPostingSource(const Xapian::Database &db)
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:74
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
CloneTestPostingSource(int &clone_count_)
void init(const Xapian::Database &db_)
Set this PostingSource to the start of the list of postings.
bool at_end() const
Return true if the current position is past the last entry in this list.
void next(double)
Advance the current position to the next matching document.
Xapian::doccount & doclen_lb
PostingSource subclass for injecting tf bounds and estimate.
Xapian::docid get_docid() const
Return the current docid.
Match documents which at least one subquery matches.
Definition: query.h:92
Xapian-specific test helper functions and macros.
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
MyDontAskWeightPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
void init(const Xapian::Database &db)
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
PostingSource * release()
Start reference counting this object.
<unistd.h>, but with compat.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
double get_maxweight() const
Return the currently set upper bound on what get_weight() can return.
void next(double min_wt)
Advance the current position to the next matching document.
Xapian::docid get_docid() const
Return the current docid.
void next(double min_wt)
Advance the current position to the next matching document.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
static void make_matchtimelimit1_db(Xapian::WritableDatabase &db, const string &)
double get_weight() const
Return the weight contribution for the current document.
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
void next(double wt)
Advance the current position to the next matching document.
void skip_to(Xapian::docid to_did, double)
Advance to the specified docid.
A handle representing a document in a Xapian database.
Definition: document.h:61
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
string get_description() const
Return a string describing this object.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
docids sort in descending order.
Definition: enquire.h:326
static const testcase testcases[]
Definition: api_unicode.cc:39