xapian-core  1.4.30
api_postingsource.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008,2009,2011,2015,2016,2019,2024 Olly Betts
5  * Copyright 2008,2009 Lemur Consulting Ltd
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_postingsource.h"
27 
28 #include <xapian.h>
29 
30 #include <string>
31 #include "safeunistd.h"
32 
33 #include "str.h"
34 #include "testutils.h"
35 #include "apitest.h"
36 
37 using namespace std;
38 
41 
43 
45 
47  Xapian::doccount last_docid_)
48  : num_docs(num_docs_), last_docid(last_docid_), did(0)
49  { }
50 
51  public:
53  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
54  { }
55 
56  PostingSource* clone() const override {
57  return new MyOddPostingSource(num_docs, last_docid);
58  }
59 
60  void init(const Xapian::Database&) override { did = 0; }
61 
62  // These bounds could be better, but that's not important here.
63  Xapian::doccount get_termfreq_min() const override { return 0; }
64 
65  Xapian::doccount get_termfreq_est() const override { return num_docs / 2; }
66 
67  Xapian::doccount get_termfreq_max() const override { return num_docs; }
68 
69  void next(double wt) override {
70  (void)wt;
71  ++did;
72  if (did % 2 == 0) ++did;
73  }
74 
75  void skip_to(Xapian::docid to_did, double wt) override {
76  (void)wt;
77  did = to_did;
78  if (did % 2 == 0) ++did;
79  }
80 
81  bool at_end() const override {
82  // Doesn't work if last_docid is 2^32 - 1.
83  return did > last_docid;
84  }
85 
86  Xapian::docid get_docid() const override { return did; }
87 
88  string get_description() const override { return "MyOddPostingSource"; }
89 };
90 
91 DEFINE_TESTCASE(externalsource1, backend && !remote && !multi) {
92  // Doesn't work for remote without registering with the server.
93  // Doesn't work for multi because it checks the docid in the
94  // subdatabase.
95  Xapian::Database db(get_database("apitest_phrase"));
96  Xapian::Enquire enq(db);
97  MyOddPostingSource src(db);
98 
99  // Check that passing NULL is rejected as intended.
100  Xapian::PostingSource * nullsrc = NULL;
102 
103  enq.set_query(Xapian::Query(&src));
104 
105  Xapian::MSet mset = enq.get_mset(0, 10);
106  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
107 
109  Xapian::Query("leav"),
110  Xapian::Query(&src));
111  enq.set_query(q);
112 
113  mset = enq.get_mset(0, 10);
114  mset_expect_order(mset, 5, 7, 11, 13, 9);
115 }
116 
117 // Test that trying to use PostingSource with the remote backend throws
118 // Xapian::UnimplementedError as expected (we need to register the class
119 // in xapian-tcpsrv/xapian-progsrv for this to work).
120 DEFINE_TESTCASE(externalsource2, remote) {
121  Xapian::Database db(get_database("apitest_phrase"));
122  Xapian::Enquire enq(db);
123  MyOddPostingSource src(db);
124 
125  enq.set_query(Xapian::Query(&src));
126 
128  Xapian::MSet mset = enq.get_mset(0, 10));
129 
131  Xapian::Query("leav"),
132  Xapian::Query(&src));
133  enq.set_query(q);
134 
136  Xapian::MSet mset = enq.get_mset(0, 10));
137 }
138 
141 
143 
145 
147  Xapian::doccount last_docid_)
148  : num_docs(num_docs_), last_docid(last_docid_), did(0)
149  {
150  set_maxweight(1000);
151  }
152 
153  public:
155  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
156  { }
157 
158  PostingSource* clone() const override {
159  return new MyOddWeightingPostingSource(num_docs, last_docid);
160  }
161 
162  void init(const Xapian::Database&) override { did = 0; }
163 
164  double get_weight() const override {
165  return (did % 2) ? 1000 : 0.001;
166  }
167 
168  // These bounds could be better, but that's not important here.
169  Xapian::doccount get_termfreq_min() const override { return 0; }
170 
171  Xapian::doccount get_termfreq_est() const override { return num_docs / 2; }
172 
173  Xapian::doccount get_termfreq_max() const override { return num_docs; }
174 
175  void next(double wt) override {
176  (void)wt;
177  ++did;
178  }
179 
180  void skip_to(Xapian::docid to_did, double wt) override {
181  (void)wt;
182  did = to_did;
183  }
184 
185  bool at_end() const override {
186  // Doesn't work if last_docid is 2^32 - 1.
187  return did > last_docid;
188  }
189 
190  Xapian::docid get_docid() const override { return did; }
191 
192  string get_description() const override {
193  return "MyOddWeightingPostingSource";
194  }
195 };
196 
197 // Like externalsource1, except we use the weight to favour odd documents.
198 DEFINE_TESTCASE(externalsource3, backend && !remote && !multi) {
199  // Doesn't work for remote without registering with the server.
200  // Doesn't work for multi because it checks the docid in the
201  // subdatabase.
202  Xapian::Database db(get_database("apitest_phrase"));
203  Xapian::Enquire enq(db);
205 
206  enq.set_query(Xapian::Query(&src));
207 
208  Xapian::MSet mset = enq.get_mset(0, 10);
209  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17, 2);
210 
212  Xapian::Query("leav"),
213  Xapian::Query(&src));
214  enq.set_query(q);
215 
216  mset = enq.get_mset(0, 5);
217  mset_expect_order(mset, 5, 7, 11, 13, 9);
218 
219  tout << "max possible weight = " << mset.get_max_possible() << '\n';
220  TEST(mset.get_max_possible() > 1000);
221 
222  enq.set_cutoff(0, 1000.001);
223  mset = enq.get_mset(0, 10);
224  mset_expect_order(mset, 5, 7, 11, 13, 9);
225 
226  tout << "max possible weight = " << mset.get_max_possible() << '\n';
227  TEST(mset.get_max_possible() > 1000);
228 
230  mset = enq.get_mset(0, 10);
231  TEST(mset.empty());
232 
233  TEST_EQUAL(mset.get_max_possible(), 500);
234 
236  mset = enq.get_mset(0, 10);
237  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
238 
239  TEST_EQUAL(mset.get_max_possible(), 2000);
240 }
241 
244 
246 
248 
250  Xapian::doccount last_docid_)
251  : num_docs(num_docs_), last_docid(last_docid_), did(0)
252  { }
253 
254  public:
255  MyDontAskWeightPostingSource() : Xapian::PostingSource() {}
256 
257  PostingSource* clone() const override {
258  return new MyDontAskWeightPostingSource(num_docs, last_docid);
259  }
260 
261  void init(const Xapian::Database& db) override {
262  num_docs = db.get_doccount();
263  last_docid = db.get_lastdocid();
264  did = 0;
265  }
266 
267  double get_weight() const override {
268  FAIL_TEST("MyDontAskWeightPostingSource::get_weight() called");
269  }
270 
271  // These bounds could be better, but that's not important here.
272  Xapian::doccount get_termfreq_min() const override { return num_docs; }
273 
274  Xapian::doccount get_termfreq_est() const override { return num_docs; }
275 
276  Xapian::doccount get_termfreq_max() const override { return num_docs; }
277 
278  void next(double wt) override {
279  (void)wt;
280  ++did;
281  }
282 
283  void skip_to(Xapian::docid to_did, double wt) override {
284  (void)wt;
285  did = to_did;
286  }
287 
288  bool at_end() const override {
289  // Doesn't work if last_docid is 2^32 - 1.
290  return did > last_docid;
291  }
292 
293  Xapian::docid get_docid() const override { return did; }
294 
295  string get_description() const override {
296  return "MyDontAskWeightPostingSource";
297  }
298 };
299 
300 // Check that boolean use doesn't call get_weight().
301 DEFINE_TESTCASE(externalsource4, backend && !remote) {
302  Xapian::Database db(get_database("apitest_phrase"));
303  Xapian::Enquire enq(db);
305 
306  tout << "OP_SCALE_WEIGHT 0\n";
308 
309  Xapian::MSet mset = enq.get_mset(0, 5);
310  mset_expect_order(mset, 1, 2, 3, 4, 5);
311 
312  tout << "OP_FILTER\n";
314  Xapian::Query("leav"),
315  Xapian::Query(&src));
316  enq.set_query(q);
317 
318  mset = enq.get_mset(0, 5);
319  mset_expect_order(mset, 8, 6, 4, 5, 7);
320 
321  tout << "BoolWeight\n";
322  enq.set_query(Xapian::Query(&src));
324 
325  // mset = enq.get_mset(0, 5);
326  // mset_expect_order(mset, 1, 2, 3, 4, 5);
327 }
328 
329 // Check that valueweightsource works correctly.
330 DEFINE_TESTCASE(valueweightsource1, backend) {
331  Xapian::Database db(get_database("apitest_phrase"));
332  Xapian::Enquire enq(db);
334 
335  // Should be in descending order of length
336  tout << "RAW\n";
337  enq.set_query(Xapian::Query(&src));
338  Xapian::MSet mset = enq.get_mset(0, 5);
339  mset_expect_order(mset, 3, 1, 2, 8, 14);
340 
341  // In relevance order
342  tout << "OP_FILTER\n";
344  Xapian::Query("leav"),
345  Xapian::Query(&src));
346  enq.set_query(q);
347  mset = enq.get_mset(0, 5);
348  mset_expect_order(mset, 8, 6, 4, 5, 7);
349 
350  // Should be in descending order of length
351  tout << "OP_FILTER other way\n";
353  Xapian::Query(&src),
354  Xapian::Query("leav"));
355  enq.set_query(q);
356  mset = enq.get_mset(0, 5);
357  mset_expect_order(mset, 8, 14, 9, 13, 7);
358 }
359 
360 // Check that valueweightsource gives the correct bounds for those databases
361 // which support value statistics.
362 DEFINE_TESTCASE(valueweightsource2, valuestats) {
363  Xapian::Database db(get_database("apitest_phrase"));
365  src.init(db);
366  TEST_EQUAL(src.get_termfreq_min(), 17);
367  TEST_EQUAL(src.get_termfreq_est(), 17);
368  TEST_EQUAL(src.get_termfreq_max(), 17);
369  TEST_EQUAL(src.get_maxweight(), 135);
370 }
371 
372 // Check that valueweightsource skip_to() can stay in the same position.
373 DEFINE_TESTCASE(valueweightsource3, valuestats) {
374  Xapian::Database db(get_database("apitest_phrase"));
376  src.init(db);
377  TEST(!src.at_end());
378  src.skip_to(8, 0.0);
379  TEST(!src.at_end());
380  TEST_EQUAL(src.get_docid(), 8);
381  src.skip_to(8, 0.0);
382  TEST(!src.at_end());
383  TEST_EQUAL(src.get_docid(), 8);
384 }
385 
386 // Check that fixedweightsource works correctly.
387 DEFINE_TESTCASE(fixedweightsource1, backend) {
388  Xapian::Database db(get_database("apitest_phrase"));
389  Xapian::Enquire enq(db);
390  double wt = 5.6;
391 
392  {
394 
395  // Should be in increasing order of docid.
396  enq.set_query(Xapian::Query(&src));
397  Xapian::MSet mset = enq.get_mset(0, 5);
398  mset_expect_order(mset, 1, 2, 3, 4, 5);
399 
400  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
401  TEST_EQUAL(i.get_weight(), wt);
402  }
403  }
404 
405  // Do some direct tests, to check the skip_to() and check() methods work.
406  {
407  // Check next and skip_to().
409  src.init(db);
410 
411  src.next(1.0);
412  TEST(!src.at_end());
413  TEST_EQUAL(src.get_docid(), 1);
414  src.next(1.0);
415  TEST(!src.at_end());
416  TEST_EQUAL(src.get_docid(), 2);
417  src.skip_to(5, 1.0);
418  TEST(!src.at_end());
419  TEST_EQUAL(src.get_docid(), 5);
420  src.next(wt * 2);
421  TEST(src.at_end());
422  }
423  {
424  // Check check() as the first operation, followed by next.
426  src.init(db);
427 
428  TEST_EQUAL(src.check(5, 1.0), true);
429  TEST(!src.at_end());
430  TEST_EQUAL(src.get_docid(), 5);
431  src.next(1.0);
432  TEST(!src.at_end());
433  TEST_EQUAL(src.get_docid(), 6);
434  }
435  {
436  // Check check() as the first operation, followed by skip_to().
438  src.init(db);
439 
440  TEST_EQUAL(src.check(5, 1.0), true);
441  TEST(!src.at_end());
442  TEST_EQUAL(src.get_docid(), 5);
443  src.skip_to(6, 1.0);
444  TEST(!src.at_end());
445  TEST_EQUAL(src.get_docid(), 6);
446  src.skip_to(7, wt * 2);
447  TEST(src.at_end());
448  }
449 }
450 
451 // A posting source which changes the maximum weight.
454 
455  // Maximum docid that get_weight() should be called for.
457 
458  public:
460  : did(0), maxid_accessed(maxid_accessed_) { }
461 
462  void init(const Xapian::Database&) override { did = 0; }
463 
464  double get_weight() const override {
465  if (did > maxid_accessed) {
466  FAIL_TEST("ChangeMaxweightPostingSource::get_weight() called "
467  "for docid " + str(did) + ", max id accessed "
468  "should be " + str(maxid_accessed));
469  }
470  return 5 - did;
471  }
472 
473  Xapian::doccount get_termfreq_min() const override { return 4; }
474  Xapian::doccount get_termfreq_est() const override { return 4; }
475  Xapian::doccount get_termfreq_max() const override { return 4; }
476 
477  void next(double) override {
478  ++did;
479  set_maxweight(5 - did);
480  }
481 
482  void skip_to(Xapian::docid to_did, double) override {
483  did = to_did;
484  set_maxweight(5 - did);
485  }
486 
487  bool at_end() const override { return did >= 5; }
488  Xapian::docid get_docid() const override { return did; }
489  string get_description() const override {
490  return "ChangeMaxweightPostingSource";
491  }
492 };
493 
494 // Test a posting source with a variable maxweight.
495 DEFINE_TESTCASE(changemaxweightsource1, backend && !remote && !multi) {
496  // The ChangeMaxweightPostingSource doesn't work with multi or remote.
497  Xapian::Database db(get_database("apitest_phrase"));
498  Xapian::Enquire enq(db);
499 
500  {
503 
505  Xapian::Query(&src1), Xapian::Query(&src2));
506  enq.set_query(q);
507  // Set descending docid order so that the matcher isn't able to
508  // terminate early after 4 documents just because weight == maxweight.
509  enq.set_docid_order(enq.DESCENDING);
510 
511  Xapian::MSet mset = enq.get_mset(0, 4);
512  TEST(src1.at_end());
513  mset_expect_order(mset, 1, 2, 3, 4);
514  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
515  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
516  }
517  }
518 
519  {
522 
524  Xapian::Query(&src1), Xapian::Query(&src2));
525  enq.set_query(q);
526 
527  Xapian::MSet mset = enq.get_mset(0, 2);
528  TEST(!src1.at_end());
529  TEST_EQUAL(src1.get_docid(), 3);
530  TEST_EQUAL_DOUBLE(src1.get_maxweight(), 2.0);
531  mset_expect_order(mset, 1, 2);
532  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
533  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
534  }
535  }
536 }
537 
538 // Test using a valueweightpostingsource which has no entries.
539 DEFINE_TESTCASE(emptyvalwtsource1, backend && !remote && !multi) {
540  Xapian::Database db(get_database("apitest_phrase"));
541  Xapian::Enquire enq(db);
542 
543  Xapian::ValueWeightPostingSource src2(11); // A non-empty slot.
544  Xapian::ValueWeightPostingSource src3(100); // An empty slot.
545  Xapian::Query q1("leav");
546  Xapian::Query q2(&src2);
547  Xapian::Query q3(&src3);
549 
550  // Perform search without ORring with the posting source.
551  Xapian::doccount size1;
552  {
553  enq.set_query(q1);
554  Xapian::MSet mset = enq.get_mset(0, 10);
555  TEST_REL(mset.get_max_possible(), >, 0.0);
556  size1 = mset.size();
557  TEST_REL(size1, >, 0);
558  }
559 
560  // Perform a search with just the non-empty posting source, checking it
561  // returns something.
562  {
563  enq.set_query(q2);
564  Xapian::MSet mset = enq.get_mset(0, 10);
565  TEST_REL(mset.get_max_possible(), >, 0.0);
566  TEST_REL(mset.size(), >, 0);
567  }
568 
569  // Perform a search with just the empty posting source, checking it returns
570  // nothing.
571  {
572  enq.set_query(q3);
573  Xapian::MSet mset = enq.get_mset(0, 10);
574 
575  // get_max_possible() returns 0 here for backends which track the upper
576  // bound on value slot entries, MAX_DBL for backends which don't.
577  // Either is valid.
578  TEST_REL(mset.get_max_possible(), >=, 0.0);
579 
580  TEST_EQUAL(mset.size(), 0);
581  }
582 
583  // Perform a search with the posting source ORred with the normal query.
584  // This is a regression test - it used to return nothing.
585  {
586  enq.set_query(q);
587  Xapian::MSet mset = enq.get_mset(0, 10);
588  TEST_REL(mset.get_max_possible(), >, 0.0);
589  TEST_REL(mset.size(), >, 0.0);
590  TEST_EQUAL(mset.size(), size1);
591  }
592 }
593 
596  public:
597  int & count;
598 
600  : Xapian::DecreasingValueWeightPostingSource(0), count(count_) { }
601 
603  return new SlowDecreasingValueWeightPostingSource(count);
604  }
605 
606  void next(double min_wt) override {
607  sleep(1);
608  ++count;
610  }
611 };
612 
613 static void
615 {
616  for (int wt = 20; wt > 0; --wt) {
617  Xapian::Document doc;
618  doc.add_value(0, Xapian::sortable_serialise(double(wt)));
619  db.add_document(doc);
620  }
621 }
622 
623 // FIXME: This doesn't run for remote databases (we'd need to register
624 // SlowDecreasingValueWeightPostingSource on the remote).
625 DEFINE_TESTCASE(matchtimelimit1, backend && !remote)
626 {
627 #ifndef HAVE_TIMER_CREATE
628  SKIP_TEST("Enquire::set_time_limit() not implemented for this platform");
629 #endif
630  Xapian::Database db = get_database("matchtimelimit1",
632 
633  int count = 0;
635  src.init(db);
636  Xapian::Enquire enquire(db);
637  enquire.set_query(Xapian::Query(&src));
638 
639  enquire.set_time_limit(1.5);
640 
641  Xapian::MSet mset = enquire.get_mset(0, 1, 1000);
642  TEST_EQUAL(mset.size(), 1);
643  TEST_EQUAL(count, 2);
644 }
645 
648  public:
650 
652 
654  Xapian::doccount& doclen_ub_)
655  : Xapian::DecreasingValueWeightPostingSource(0),
656  doclen_lb(doclen_lb_),
657  doclen_ub(doclen_ub_) { }
658 
659  CheckBoundsPostingSource* clone() const override {
660  return new CheckBoundsPostingSource(doclen_lb, doclen_ub);
661  }
662 
663  void init(const Xapian::Database& database) override {
664  doclen_lb = database.get_doclength_lower_bound();
665  doclen_ub = database.get_doclength_upper_bound();
667  }
668 };
669 
670 // Test that doclength bounds are correct.
671 // Regression test for bug fixed in 1.2.25 and 1.4.1.
672 DEFINE_TESTCASE(postingsourcebounds1, backend && !remote)
673 {
674  Xapian::Database db = get_database("apitest_simpledata");
675 
676  Xapian::doccount doclen_lb = 0, doclen_ub = 0;
677  CheckBoundsPostingSource ps(doclen_lb, doclen_ub);
678 
679  Xapian::Enquire enquire(db);
680  enquire.set_query(Xapian::Query(&ps));
681 
682  Xapian::MSet mset = enquire.get_mset(0, 1);
683 
684  TEST_EQUAL(doclen_lb, db.get_doclength_lower_bound());
685  TEST_EQUAL(doclen_ub, db.get_doclength_upper_bound());
686 }
687 
688 // PostingSource which really just counts the clone() calls.
689 // Never actually matches anything, but pretends it might.
692 
693  public:
694  CloneTestPostingSource(int& clone_count_)
695  : clone_count(clone_count_)
696  { }
697 
698  PostingSource* clone() const override {
699  ++clone_count;
700  return new CloneTestPostingSource(clone_count);
701  }
702 
703  void init(const Xapian::Database&) override { }
704 
705  Xapian::doccount get_termfreq_min() const override { return 0; }
706 
707  Xapian::doccount get_termfreq_est() const override { return 1; }
708 
709  Xapian::doccount get_termfreq_max() const override { return 2; }
710 
711  void next(double) override { }
712 
713  void skip_to(Xapian::docid, double) override { }
714 
715  bool at_end() const override {
716  return true;
717  }
718 
719  Xapian::docid get_docid() const override { return 0; }
720 
721  string get_description() const override { return "CloneTestPostingSource"; }
722 };
723 
725 DEFINE_TESTCASE(postingsourceclone1, !backend)
726 {
727  // This fails with 1.3.5-1.4.0 inclusive.
728  {
729  int clones = 0;
730  CloneTestPostingSource ps(clones);
731  TEST_EQUAL(clones, 0);
732  Xapian::Query q(&ps);
733  TEST_EQUAL(clones, 1);
734  }
735 
736  // Check that clone() isn't needlessly called if reference counting has
737  // been turned on for the PostingSource.
738  {
739  int clones = 0;
741  TEST_EQUAL(clones, 0);
742  Xapian::Query q(ps->release());
743  TEST_EQUAL(clones, 0);
744  }
745 }
746 
749 
751 
753 
754  public:
756 
757  explicit
758  OnlyTheFirstPostingSource(bool allow_clone_) : allow_clone(allow_clone_) {}
759 
760  PostingSource* clone() const override {
761  return allow_clone ? new OnlyTheFirstPostingSource(true) : nullptr;
762  }
763 
764  void init(const Xapian::Database& db) override {
765  did = 0;
766  if (shard_index == 0) {
767  last_docid = db.get_lastdocid();
768  } else {
769  last_docid = 0;
770  }
771  ++shard_index;
772  }
773 
774  Xapian::doccount get_termfreq_min() const override { return 0; }
775 
777  return last_docid / 2;
778  }
779 
780  Xapian::doccount get_termfreq_max() const override { return last_docid; }
781 
782  void next(double wt) override {
783  (void)wt;
784  ++did;
785  if (did > last_docid) did = 0;
786  }
787 
788  void skip_to(Xapian::docid to_did, double wt) override {
789  (void)wt;
790  did = to_did;
791  if (did > last_docid) did = 0;
792  }
793 
794  bool at_end() const override {
795  return did == 0;
796  }
797 
798  Xapian::docid get_docid() const override { return did; }
799 
800  string get_description() const override {
801  return "OnlyTheFirstPostingSource";
802  }
803 };
804 
806 
807 DEFINE_TESTCASE(postingsourceshardindex1, multi && !remote) {
808  Xapian::Database db = get_database("apitest_simpledata");
809 
811 
812  Xapian::Enquire enquire(db);
813  {
814  auto ps = new OnlyTheFirstPostingSource(true);
815  enquire.set_query(Xapian::Query(ps->release()));
816 
817  Xapian::MSet mset = enquire.get_mset(0, 10);
818  mset_expect_order(mset, 1, 3, 5);
819  }
820 
821  {
822  /* Regression test for bug fixed in 1.4.12 - we should get an exception
823  * if we use a PostingSource that doesn't support clone() with a multi
824  * DB.
825  */
826  auto ps = new OnlyTheFirstPostingSource(false);
827  enquire.set_query(Xapian::Query(ps->release()));
828 
830  auto m = enquire.get_mset(0, 10));
831  }
832 }
833 
837 
838  public:
840  Xapian::doccount est_,
841  Xapian::doccount ub_)
842  : lb(lb_), est(est_), ub(ub_)
843  { }
844 
845  PostingSource* clone() const override {
846  return new EstimatePS(lb, est, ub);
847  }
848 
849  void init(const Xapian::Database&) override { }
850 
851  Xapian::doccount get_termfreq_min() const override { return lb; }
852 
853  Xapian::doccount get_termfreq_est() const override { return est; }
854 
855  Xapian::doccount get_termfreq_max() const override { return ub; }
856 
857  void next(double) override {
858  FAIL_TEST("EstimatePS::next() shouldn't be called");
859  }
860 
861  void skip_to(Xapian::docid, double) override {
862  FAIL_TEST("EstimatePS::skip_to() shouldn't be called");
863  }
864 
865  bool at_end() const override {
866  return false;
867  }
868 
869  Xapian::docid get_docid() const override {
870  FAIL_TEST("EstimatePS::get_docid() shouldn't be called");
871  }
872 
873  string get_description() const override { return "EstimatePS"; }
874 };
875 
877 DEFINE_TESTCASE(estimaterounding1, backend && !multi && !remote) {
878  Xapian::Database db = get_database("etext");
879  Xapian::Enquire enquire(db);
880  static const struct { Xapian::doccount lb, est, ub, exp; } testcases[] = {
881  // Test rounding down.
882  {411, 424, 439, 420},
883  {1, 312, 439, 300},
884  // Test rounding up.
885  {411, 426, 439, 430},
886  {123, 351, 439, 400},
887  // Rounding based on estimate size if smaller than range size.
888  {1, 12, 439, 10},
889  // Round "5" away from the nearer bound.
890  {1, 15, 439, 20},
891  {1, 350, 439, 300},
892  // Check we round up if rounding down would be out of range.
893  {411, 416, 439, 420},
894  {411, 412, 439, 420},
895  // Check we round down if rounding up would be out of range.
896  {111, 133, 138, 130},
897  {111, 137, 138, 130},
898  // Check we don't round if either way would be out of range.
899  {411, 415, 419, 415},
900  // Leave small estimates alone.
901  {1, 6, 439, 6},
902  };
903  for (auto& t : testcases) {
904  EstimatePS ps(t.lb, t.est, t.ub);
905  enquire.set_query(Xapian::Query(&ps));
906  Xapian::MSet mset = enquire.get_mset(0, 0);
907  // MSet::get_description() includes bounds and raw estimate.
908  tout << mset.get_description() << '\n';
909  TEST_EQUAL(mset.get_matches_estimated(), t.exp);
910  }
911 }
DEFINE_TESTCASE(externalsource1, backend &&!remote &&!multi)
static void make_matchtimelimit1_db(Xapian::WritableDatabase &db, const string &)
static const testcase testcases[]
Definition: api_unicode.cc:39
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
test functionality of the Xapian API
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
void next(double) override
Advance the current position to the next matching document.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid to_did, double) override
Advance to the specified docid.
double get_weight() const override
Return the weight contribution for the current document.
Xapian::docid get_docid() const override
Return the current docid.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
string get_description() const override
Return a string describing this object.
ChangeMaxweightPostingSource(Xapian::docid maxid_accessed_)
Xapian::doccount & doclen_ub
CheckBoundsPostingSource(Xapian::doccount &doclen_lb_, Xapian::doccount &doclen_ub_)
Xapian::doccount & doclen_lb
void init(const Xapian::Database &database) override
Set this PostingSource to the start of the list of postings.
CheckBoundsPostingSource * clone() const override
Clone the posting source.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
CloneTestPostingSource(int &clone_count_)
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
string get_description() const override
Return a string describing this object.
void next(double) override
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
bool at_end() const override
Return true if the current position is past the last entry in this list.
Xapian::docid get_docid() const override
Return the current docid.
PostingSource * clone() const override
Clone the posting source.
PostingSource subclass for injecting tf bounds and estimate.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
EstimatePS(Xapian::doccount lb_, Xapian::doccount est_, Xapian::doccount ub_)
string get_description() const override
Return a string describing this object.
PostingSource * clone() const override
Clone the posting source.
void next(double) override
Advance the current position to the next matching document.
bool at_end() const override
Return true if the current position is past the last entry in this list.
Xapian::docid get_docid() const override
Return the current docid.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::doccount est
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
PostingSource * clone() const override
Clone the posting source.
Xapian::docid get_docid() const override
Return the current docid.
string get_description() const override
Return a string describing this object.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
double get_weight() const override
Return the weight contribution for the current document.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
void next(double wt) override
Advance the current position to the next matching document.
MyDontAskWeightPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
bool at_end() const override
Return true if the current position is past the last entry in this list.
void init(const Xapian::Database &db) override
Set this PostingSource to the start of the list of postings.
string get_description() const override
Return a string describing this object.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
void next(double wt) override
Advance the current position to the next matching document.
Xapian::doccount num_docs
MyOddPostingSource(const Xapian::Database &db)
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::doccount last_docid
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
PostingSource * clone() const override
Clone the posting source.
MyOddPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
Xapian::docid get_docid() const override
Return the current docid.
bool at_end() const override
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
MyOddWeightingPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
string get_description() const override
Return a string describing this object.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void next(double wt) override
Advance the current position to the next matching document.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
PostingSource * clone() const override
Clone the posting source.
double get_weight() const override
Return the weight contribution for the current document.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
MyOddWeightingPostingSource(const Xapian::Database &db)
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::docid get_docid() const override
Return the current docid.
static Xapian::doccount shard_index
Xapian::docid get_docid() const override
Return the current docid.
OnlyTheFirstPostingSource(bool allow_clone_)
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
void next(double wt) override
Advance the current position to the next matching document.
bool at_end() const override
Return true if the current position is past the last entry in this list.
string get_description() const override
Return a string describing this object.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
PostingSource * clone() const override
Clone the posting source.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void init(const Xapian::Database &db) override
Set this PostingSource to the start of the list of postings.
SlowDecreasingValueWeightPostingSource * clone() const override
Clone the posting source.
void next(double min_wt) override
Advance the current position to the next matching document.
Class implementing a "boolean" weighting scheme.
Definition: weight.h:433
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
Definition: omdatabase.cc:401
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Definition: omdatabase.cc:421
Read weights from a value which is known to decrease as docid increases.
void next(double min_wt)
Advance the current position to the next matching document.
void init(const Xapian::Database &db_)
Set this PostingSource to the start of the list of postings.
A handle representing a document in a Xapian database.
Definition: document.h:61
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
This class provides an interface to the information retrieval system for the purpose of searching.
Definition: enquire.h:152
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
void set_time_limit(double time_limit)
Set a time limit for the match.
Definition: omenquire.cc:932
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:862
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:856
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
@ DESCENDING
docids sort in descending order.
Definition: enquire.h:330
A posting source which returns a fixed weight for all documents.
bool at_end() const
Return true if the current position is past the last entry in this list.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
void next(double min_wt)
Advance the current position to the next matching document.
Xapian::docid get_docid() const
Return the current docid.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
Iterator over a Xapian::MSet.
Definition: mset.h:368
Class representing a list of search results.
Definition: mset.h:44
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:300
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:325
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
Base class which provides an "external" source of postings.
Definition: postingsource.h:48
double get_maxweight() const
Return the currently set upper bound on what get_weight() can return.
PostingSource * release()
Start reference counting this object.
Class representing a query.
Definition: query.h:46
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Xapian::docid get_docid() const
Return the current docid.
A posting source which reads weights from a value slot.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
This class provides read/write access to a database.
Definition: database.h:789
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
string str(int value)
Convert int to std::string.
Definition: str.cc:90
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1382
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
static double est(double l, double r, double n)
Definition: orpostlist.cc:306
<unistd.h>, but with compat.
Convert types to std::string.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:74
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Public interfaces for the Xapian library.