xapian-core  2.0.0
api_postingsource.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008,2009,2011,2015,2016,2019,2024 Olly Betts
5  * Copyright 2008,2009 Lemur Consulting Ltd
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see
20  * <https://www.gnu.org/licenses/>.
21  */
22 
23 #include <config.h>
24 
25 #include "api_postingsource.h"
26 
27 #include <xapian.h>
28 
29 #include <string>
30 #include "safeunistd.h"
31 
32 #include "str.h"
33 #include "testutils.h"
34 #include "apitest.h"
35 
36 using namespace std;
37 
40 
42 
44 
46  Xapian::doccount last_docid_)
47  : num_docs(num_docs_), last_docid(last_docid_), did(0)
48  { }
49 
50  public:
52  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
53  { }
54 
55  PostingSource* clone() const override {
56  return new MyOddPostingSource(num_docs, last_docid);
57  }
58 
59  void reset(const Xapian::Database&, Xapian::doccount) override { did = 0; }
60 
61  // These bounds could be better, but that's not important here.
62  Xapian::doccount get_termfreq_min() const override { return 0; }
63 
64  Xapian::doccount get_termfreq_est() const override { return num_docs / 2; }
65 
66  Xapian::doccount get_termfreq_max() const override { return num_docs; }
67 
68  void next(double wt) override {
69  (void)wt;
70  ++did;
71  if (did % 2 == 0) ++did;
72  }
73 
74  void skip_to(Xapian::docid to_did, double wt) override {
75  (void)wt;
76  did = to_did;
77  if (did % 2 == 0) ++did;
78  }
79 
80  bool at_end() const override {
81  // Doesn't work if last_docid is 2^32 - 1.
82  return did > last_docid;
83  }
84 
85  Xapian::docid get_docid() const override { return did; }
86 
87  string get_description() const override { return "MyOddPostingSource"; }
88 };
89 
90 DEFINE_TESTCASE(externalsource1, backend && !remote && !multi) {
91  // Doesn't work for remote without registering with the server.
92  // Doesn't work for multi because it checks the docid in the
93  // subdatabase.
94  Xapian::Database db(get_database("apitest_phrase"));
95  Xapian::Enquire enq(db);
96  MyOddPostingSource src(db);
97 
98  // Check that passing NULL is rejected as intended.
99  Xapian::PostingSource * nullsrc = NULL;
101 
102  enq.set_query(Xapian::Query(&src));
103 
104  Xapian::MSet mset = enq.get_mset(0, 10);
105  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
106 
108  Xapian::Query("leav"),
109  Xapian::Query(&src));
110  enq.set_query(q);
111 
112  mset = enq.get_mset(0, 10);
113  mset_expect_order(mset, 5, 7, 11, 13, 9);
114 }
115 
116 // Test that trying to use PostingSource with the remote backend throws
117 // Xapian::UnimplementedError as expected (we need to register the class
118 // in xapian-tcpsrv/xapian-progsrv for this to work).
119 DEFINE_TESTCASE(externalsource2, remote) {
120  Xapian::Database db(get_database("apitest_phrase"));
121  Xapian::Enquire enq(db);
122  MyOddPostingSource src(db);
123 
124  enq.set_query(Xapian::Query(&src));
125 
127  Xapian::MSet mset = enq.get_mset(0, 10));
128 
130  Xapian::Query("leav"),
131  Xapian::Query(&src));
132  enq.set_query(q);
133 
135  Xapian::MSet mset = enq.get_mset(0, 10));
136 }
137 
140 
142 
144 
146  Xapian::doccount last_docid_)
147  : num_docs(num_docs_), last_docid(last_docid_), did(0)
148  {
149  set_maxweight(1000);
150  }
151 
152  public:
154  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
155  { }
156 
157  PostingSource* clone() const override {
158  return new MyOddWeightingPostingSource(num_docs, last_docid);
159  }
160 
161  // Deliberately override init() instead of reset() here to test that still
162  // works.
163  void init(const Xapian::Database&) override { did = 0; }
164 
165  double get_weight() const override {
166  return (did % 2) ? 1000 : 0.001;
167  }
168 
169  // These bounds could be better, but that's not important here.
170  Xapian::doccount get_termfreq_min() const override { return 0; }
171 
172  Xapian::doccount get_termfreq_est() const override { return num_docs / 2; }
173 
174  Xapian::doccount get_termfreq_max() const override { return num_docs; }
175 
176  void next(double wt) override {
177  (void)wt;
178  ++did;
179  }
180 
181  void skip_to(Xapian::docid to_did, double wt) override {
182  (void)wt;
183  did = to_did;
184  }
185 
186  bool at_end() const override {
187  // Doesn't work if last_docid is 2^32 - 1.
188  return did > last_docid;
189  }
190 
191  Xapian::docid get_docid() const override { return did; }
192 
193  string get_description() const override {
194  return "MyOddWeightingPostingSource";
195  }
196 };
197 
198 // Like externalsource1, except we use the weight to favour odd documents.
199 DEFINE_TESTCASE(externalsource3, backend && !remote && !multi) {
200  // Doesn't work for remote without registering with the server.
201  // Doesn't work for multi because it checks the docid in the
202  // subdatabase.
203  Xapian::Database db(get_database("apitest_phrase"));
204  Xapian::Enquire enq(db);
206 
207  enq.set_query(Xapian::Query(&src));
208 
209  Xapian::MSet mset = enq.get_mset(0, 10);
210  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17, 2);
211 
213  Xapian::Query("leav"),
214  Xapian::Query(&src));
215  enq.set_query(q);
216 
217  mset = enq.get_mset(0, 5);
218  mset_expect_order(mset, 5, 7, 11, 13, 9);
219 
220  tout << "max possible weight = " << mset.get_max_possible() << '\n';
221  TEST(mset.get_max_possible() > 1000);
222 
223  enq.set_cutoff(0, 1000.001);
224  mset = enq.get_mset(0, 10);
225  mset_expect_order(mset, 5, 7, 11, 13, 9);
226 
227  tout << "max possible weight = " << mset.get_max_possible() << '\n';
228  TEST(mset.get_max_possible() > 1000);
229 
231  mset = enq.get_mset(0, 10);
232  TEST(mset.empty());
233 
234  TEST_EQUAL(mset.get_max_possible(), 500);
235 
237  mset = enq.get_mset(0, 10);
238  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
239 
240  TEST_EQUAL(mset.get_max_possible(), 2000);
241 }
242 
245 
247 
249 
251  Xapian::doccount last_docid_)
252  : num_docs(num_docs_), last_docid(last_docid_), did(0)
253  { }
254 
255  public:
256  MyDontAskWeightPostingSource() : Xapian::PostingSource() {}
257 
258  PostingSource* clone() const override {
259  return new MyDontAskWeightPostingSource(num_docs, last_docid);
260  }
261 
262  void reset(const Xapian::Database& db, Xapian::doccount) override {
263  num_docs = db.get_doccount();
264  last_docid = db.get_lastdocid();
265  did = 0;
266  }
267 
268  double get_weight() const override {
269  FAIL_TEST("MyDontAskWeightPostingSource::get_weight() called");
270  }
271 
272  // These bounds could be better, but that's not important here.
273  Xapian::doccount get_termfreq_min() const override { return num_docs; }
274 
275  Xapian::doccount get_termfreq_est() const override { return num_docs; }
276 
277  Xapian::doccount get_termfreq_max() const override { return num_docs; }
278 
279  void next(double wt) override {
280  (void)wt;
281  ++did;
282  }
283 
284  void skip_to(Xapian::docid to_did, double wt) override {
285  (void)wt;
286  did = to_did;
287  }
288 
289  bool at_end() const override {
290  // Doesn't work if last_docid is 2^32 - 1.
291  return did > last_docid;
292  }
293 
294  Xapian::docid get_docid() const override { return did; }
295 
296  string get_description() const override {
297  return "MyDontAskWeightPostingSource";
298  }
299 };
300 
301 // Check that boolean use doesn't call get_weight().
302 DEFINE_TESTCASE(externalsource4, backend && !remote) {
303  Xapian::Database db(get_database("apitest_phrase"));
304  Xapian::Enquire enq(db);
306 
307  tout << "OP_SCALE_WEIGHT 0\n";
309 
310  Xapian::MSet mset = enq.get_mset(0, 5);
311  mset_expect_order(mset, 1, 2, 3, 4, 5);
312 
313  tout << "OP_FILTER\n";
315  Xapian::Query("leav"),
316  Xapian::Query(&src));
317  enq.set_query(q);
318 
319  mset = enq.get_mset(0, 5);
320  mset_expect_order(mset, 8, 6, 4, 5, 7);
321 
322  tout << "BoolWeight\n";
323  enq.set_query(Xapian::Query(&src));
325 
326  // mset = enq.get_mset(0, 5);
327  // mset_expect_order(mset, 1, 2, 3, 4, 5);
328 }
329 
330 // Check that valueweightsource works correctly.
331 DEFINE_TESTCASE(valueweightsource1, backend) {
332  Xapian::Database db(get_database("apitest_phrase"));
333  Xapian::Enquire enq(db);
335 
336  // Should be in descending order of length
337  tout << "RAW\n";
338  enq.set_query(Xapian::Query(&src));
339  Xapian::MSet mset = enq.get_mset(0, 5);
340  mset_expect_order(mset, 3, 1, 2, 8, 14);
341 
342  // In relevance order
343  tout << "OP_FILTER\n";
345  Xapian::Query("leav"),
346  Xapian::Query(&src));
347  enq.set_query(q);
348  mset = enq.get_mset(0, 5);
349  mset_expect_order(mset, 8, 6, 4, 5, 7);
350 
351  // Should be in descending order of length
352  tout << "OP_FILTER other way\n";
354  Xapian::Query(&src),
355  Xapian::Query("leav"));
356  enq.set_query(q);
357  mset = enq.get_mset(0, 5);
358  mset_expect_order(mset, 8, 14, 9, 13, 7);
359 }
360 
361 // Check that valueweightsource gives the correct bounds for those databases
362 // which support value statistics.
363 DEFINE_TESTCASE(valueweightsource2, valuestats) {
364  Xapian::Database db(get_database("apitest_phrase"));
366  src.reset(db, 0);
367  TEST_EQUAL(src.get_termfreq_min(), 17);
368  TEST_EQUAL(src.get_termfreq_est(), 17);
369  TEST_EQUAL(src.get_termfreq_max(), 17);
370  TEST_EQUAL(src.get_maxweight(), 135);
371 }
372 
373 // Check that valueweightsource skip_to() can stay in the same position.
374 DEFINE_TESTCASE(valueweightsource3, valuestats) {
375  Xapian::Database db(get_database("apitest_phrase"));
377  src.reset(db, 0);
378  TEST(!src.at_end());
379  src.skip_to(8, 0.0);
380  TEST(!src.at_end());
381  TEST_EQUAL(src.get_docid(), 8);
382  src.skip_to(8, 0.0);
383  TEST(!src.at_end());
384  TEST_EQUAL(src.get_docid(), 8);
385 }
386 
387 // Check that fixedweightsource works correctly.
388 DEFINE_TESTCASE(fixedweightsource1, backend) {
389  Xapian::Database db(get_database("apitest_phrase"));
390  Xapian::Enquire enq(db);
391  double wt = 5.6;
392 
393  {
395 
396  // Should be in increasing order of docid.
397  enq.set_query(Xapian::Query(&src));
398  Xapian::MSet mset = enq.get_mset(0, 5);
399  mset_expect_order(mset, 1, 2, 3, 4, 5);
400 
401  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
402  TEST_EQUAL(i.get_weight(), wt);
403  }
404  }
405 
406  // Do some direct tests, to check the skip_to() and check() methods work.
407  {
408  // Check next and skip_to().
410  src.reset(db, 0);
411 
412  src.next(1.0);
413  TEST(!src.at_end());
414  TEST_EQUAL(src.get_docid(), 1);
415  src.next(1.0);
416  TEST(!src.at_end());
417  TEST_EQUAL(src.get_docid(), 2);
418  src.skip_to(5, 1.0);
419  TEST(!src.at_end());
420  TEST_EQUAL(src.get_docid(), 5);
421  src.next(wt * 2);
422  TEST(src.at_end());
423  }
424  {
425  // Check check() as the first operation, followed by next.
427  src.reset(db, 0);
428 
429  TEST_EQUAL(src.check(5, 1.0), true);
430  TEST(!src.at_end());
431  TEST_EQUAL(src.get_docid(), 5);
432  src.next(1.0);
433  TEST(!src.at_end());
434  TEST_EQUAL(src.get_docid(), 6);
435  }
436  {
437  // Check check() as the first operation, followed by skip_to().
439  src.reset(db, 0);
440 
441  TEST_EQUAL(src.check(5, 1.0), true);
442  TEST(!src.at_end());
443  TEST_EQUAL(src.get_docid(), 5);
444  src.skip_to(6, 1.0);
445  TEST(!src.at_end());
446  TEST_EQUAL(src.get_docid(), 6);
447  src.skip_to(7, wt * 2);
448  TEST(src.at_end());
449  }
450 }
451 
452 // A posting source which changes the maximum weight.
455 
456  // Maximum docid that get_weight() should be called for.
458 
459  public:
461  : did(0), maxid_accessed(maxid_accessed_) { }
462 
463  void reset(const Xapian::Database&, Xapian::doccount) override { did = 0; }
464 
465  double get_weight() const override {
466  if (did > maxid_accessed) {
467  FAIL_TEST("ChangeMaxweightPostingSource::get_weight() called "
468  "for docid " + str(did) + ", max id accessed "
469  "should be " + str(maxid_accessed));
470  }
471  return 5 - did;
472  }
473 
474  Xapian::doccount get_termfreq_min() const override { return 4; }
475  Xapian::doccount get_termfreq_est() const override { return 4; }
476  Xapian::doccount get_termfreq_max() const override { return 4; }
477 
478  void next(double) override {
479  ++did;
480  set_maxweight(5 - did);
481  }
482 
483  void skip_to(Xapian::docid to_did, double) override {
484  did = to_did;
485  set_maxweight(5 - did);
486  }
487 
488  bool at_end() const override { return did >= 5; }
489  Xapian::docid get_docid() const override { return did; }
490  string get_description() const override {
491  return "ChangeMaxweightPostingSource";
492  }
493 };
494 
495 // Test a posting source with a variable maxweight.
496 DEFINE_TESTCASE(changemaxweightsource1, backend && !remote && !multi) {
497  // The ChangeMaxweightPostingSource doesn't work with multi or remote.
498  Xapian::Database db(get_database("apitest_phrase"));
499  Xapian::Enquire enq(db);
500 
501  {
504 
506  Xapian::Query(&src1), Xapian::Query(&src2));
507  enq.set_query(q);
508  // Set descending docid order so that the matcher isn't able to
509  // terminate early after 4 documents just because weight == maxweight.
510  enq.set_docid_order(enq.DESCENDING);
511 
512  Xapian::MSet mset = enq.get_mset(0, 4);
513  TEST(src1.at_end());
514  mset_expect_order(mset, 1, 2, 3, 4);
515  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
516  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
517  }
518  }
519 
520  {
523 
525  Xapian::Query(&src1), Xapian::Query(&src2));
526  enq.set_query(q);
527 
528  Xapian::MSet mset = enq.get_mset(0, 2);
529  TEST(!src1.at_end());
530  TEST_EQUAL(src1.get_docid(), 3);
531  TEST_EQUAL_DOUBLE(src1.get_maxweight(), 2.0);
532  mset_expect_order(mset, 1, 2);
533  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
534  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
535  }
536  }
537 }
538 
539 // Test using a valueweightpostingsource which has no entries.
540 DEFINE_TESTCASE(emptyvalwtsource1, backend && !remote && !multi) {
541  Xapian::Database db(get_database("apitest_phrase"));
542  Xapian::Enquire enq(db);
543 
544  Xapian::ValueWeightPostingSource src2(11); // A non-empty slot.
545  Xapian::ValueWeightPostingSource src3(100); // An empty slot.
546  Xapian::Query q1("leav");
547  Xapian::Query q2(&src2);
548  Xapian::Query q3(&src3);
550 
551  // Perform search without ORring with the posting source.
552  Xapian::doccount size1;
553  {
554  enq.set_query(q1);
555  Xapian::MSet mset = enq.get_mset(0, 10);
556  TEST_REL(mset.get_max_possible(), >, 0.0);
557  size1 = mset.size();
558  TEST_REL(size1, >, 0);
559  }
560 
561  // Perform a search with just the non-empty posting source, checking it
562  // returns something.
563  {
564  enq.set_query(q2);
565  Xapian::MSet mset = enq.get_mset(0, 10);
566  TEST_REL(mset.get_max_possible(), >, 0.0);
567  TEST_REL(mset.size(), >, 0);
568  }
569 
570  // Perform a search with just the empty posting source, checking it returns
571  // nothing.
572  {
573  enq.set_query(q3);
574  Xapian::MSet mset = enq.get_mset(0, 10);
575 
576  // get_max_possible() returns 0 here for backends which track the upper
577  // bound on value slot entries, MAX_DBL for backends which don't.
578  // Either is valid.
579  TEST_REL(mset.get_max_possible(), >=, 0.0);
580 
581  TEST_EQUAL(mset.size(), 0);
582  }
583 
584  // Perform a search with the posting source ORred with the normal query.
585  // This is a regression test - it used to return nothing.
586  {
587  enq.set_query(q);
588  Xapian::MSet mset = enq.get_mset(0, 10);
589  TEST_REL(mset.get_max_possible(), >, 0.0);
590  TEST_REL(mset.size(), >, 0.0);
591  TEST_EQUAL(mset.size(), size1);
592  }
593 }
594 
595 #ifdef HAVE_TIMER_CREATE
596 class SlowDecreasingValueWeightPostingSource
598  public:
599  int & count;
600 
601  SlowDecreasingValueWeightPostingSource(int & count_)
602  : Xapian::DecreasingValueWeightPostingSource(0), count(count_) { }
603 
604  SlowDecreasingValueWeightPostingSource* clone() const override {
605  return new SlowDecreasingValueWeightPostingSource(count);
606  }
607 
608  void next(double min_wt) override {
609  sleep(1);
610  ++count;
612  }
613 };
614 
615 static void
616 make_matchtimelimit1_db(Xapian::WritableDatabase &db, const string &)
617 {
618  for (int wt = 20; wt > 0; --wt) {
619  Xapian::Document doc;
620  doc.add_value(0, Xapian::sortable_serialise(double(wt)));
621  db.add_document(doc);
622  }
623 }
624 #endif
625 
626 // FIXME: This doesn't run for remote databases (we'd need to register
627 // SlowDecreasingValueWeightPostingSource on the remote).
628 DEFINE_TESTCASE(matchtimelimit1, backend && !remote)
629 {
630 #ifndef HAVE_TIMER_CREATE
631  SKIP_TEST("Enquire::set_time_limit() not implemented for this platform");
632 #else
633  Xapian::Database db = get_database("matchtimelimit1",
634  make_matchtimelimit1_db);
635 
636  int count = 0;
637  SlowDecreasingValueWeightPostingSource src(count);
638  src.reset(db, 0);
639  Xapian::Enquire enquire(db);
640  enquire.set_query(Xapian::Query(&src));
641 
642  enquire.set_time_limit(1.5);
643 
644  Xapian::MSet mset = enquire.get_mset(0, 1, 1000);
645  TEST_EQUAL(mset.size(), 1);
646  TEST_EQUAL(count, 2);
647 #endif
648 }
649 
652  public:
654 
656 
658  Xapian::doccount& doclen_ub_)
659  : Xapian::DecreasingValueWeightPostingSource(0),
660  doclen_lb(doclen_lb_),
661  doclen_ub(doclen_ub_) { }
662 
663  CheckBoundsPostingSource* clone() const override {
664  return new CheckBoundsPostingSource(doclen_lb, doclen_ub);
665  }
666 
667  void reset(const Xapian::Database& database,
668  Xapian::doccount shard_index) override {
669  doclen_lb = database.get_doclength_lower_bound();
670  doclen_ub = database.get_doclength_upper_bound();
672  shard_index);
673  }
674 };
675 
676 // Test that doclength bounds are correct.
677 // Regression test for bug fixed in 1.2.25 and 1.4.1.
678 DEFINE_TESTCASE(postingsourcebounds1, backend && !remote)
679 {
680  Xapian::Database db = get_database("apitest_simpledata");
681 
682  Xapian::doccount doclen_lb = 0, doclen_ub = 0;
683  CheckBoundsPostingSource ps(doclen_lb, doclen_ub);
684 
685  Xapian::Enquire enquire(db);
686  enquire.set_query(Xapian::Query(&ps));
687 
688  Xapian::MSet mset = enquire.get_mset(0, 1);
689 
690  TEST_EQUAL(doclen_lb, db.get_doclength_lower_bound());
691  TEST_EQUAL(doclen_ub, db.get_doclength_upper_bound());
692 }
693 
694 // PostingSource which really just counts the clone() calls.
695 // Never actually matches anything, but pretends it might.
698 
699  public:
700  CloneTestPostingSource(int& clone_count_)
701  : clone_count(clone_count_)
702  { }
703 
704  PostingSource* clone() const override {
705  ++clone_count;
706  return new CloneTestPostingSource(clone_count);
707  }
708 
709  void reset(const Xapian::Database&, Xapian::doccount) override { }
710 
711  Xapian::doccount get_termfreq_min() const override { return 0; }
712 
713  Xapian::doccount get_termfreq_est() const override { return 1; }
714 
715  Xapian::doccount get_termfreq_max() const override { return 2; }
716 
717  void next(double) override { }
718 
719  void skip_to(Xapian::docid, double) override { }
720 
721  bool at_end() const override {
722  return true;
723  }
724 
725  Xapian::docid get_docid() const override { return 0; }
726 
727  string get_description() const override { return "CloneTestPostingSource"; }
728 };
729 
731 DEFINE_TESTCASE(postingsourceclone1, !backend)
732 {
733  // This fails with 1.3.5-1.4.0 inclusive.
734  {
735  int clones = 0;
736  CloneTestPostingSource ps(clones);
737  TEST_EQUAL(clones, 0);
738  Xapian::Query q(&ps);
739  TEST_EQUAL(clones, 1);
740  }
741 
742  // Check that clone() isn't needlessly called if reference counting has
743  // been turned on for the PostingSource.
744  {
745  int clones = 0;
747  TEST_EQUAL(clones, 0);
748  Xapian::Query q(ps->release());
749  TEST_EQUAL(clones, 0);
750  }
751 }
752 
755 
757 
759 
760  public:
761  explicit
762  OnlyTheFirstPostingSource(bool allow_clone_) : allow_clone(allow_clone_) {}
763 
764  PostingSource* clone() const override {
765  return allow_clone ? new OnlyTheFirstPostingSource(true) : nullptr;
766  }
767 
768  void reset(const Xapian::Database& db,
769  Xapian::doccount shard_index) override {
770  did = 0;
771  if (shard_index == 0) {
772  last_docid = db.get_lastdocid();
773  } else {
774  last_docid = 0;
775  }
776  }
777 
778  Xapian::doccount get_termfreq_min() const override { return 0; }
779 
781  return last_docid / 2;
782  }
783 
784  Xapian::doccount get_termfreq_max() const override { return last_docid; }
785 
786  void next(double wt) override {
787  (void)wt;
788  ++did;
789  if (did > last_docid) did = 0;
790  }
791 
792  void skip_to(Xapian::docid to_did, double wt) override {
793  (void)wt;
794  did = to_did;
795  if (did > last_docid) did = 0;
796  }
797 
798  bool at_end() const override {
799  return did == 0;
800  }
801 
802  Xapian::docid get_docid() const override { return did; }
803 
804  string get_description() const override {
805  return "OnlyTheFirstPostingSource";
806  }
807 };
808 
809 DEFINE_TESTCASE(postingsourceshardindex1, multi && !remote) {
810  Xapian::Database db = get_database("apitest_simpledata");
811 
812  Xapian::Enquire enquire(db);
813  {
814  auto ps = new OnlyTheFirstPostingSource(true);
815  enquire.set_query(Xapian::Query(ps->release()));
816 
817  Xapian::MSet mset = enquire.get_mset(0, 10);
818  mset_expect_order(mset, 1, 3, 5);
819  }
820 
821  {
822  /* Regression test for bug fixed in 1.4.12 - we should get an exception
823  * if we use a PostingSource that doesn't support clone() with a multi
824  * DB.
825  */
826  auto ps = new OnlyTheFirstPostingSource(false);
827  enquire.set_query(Xapian::Query(ps->release()));
828 
830  auto m = enquire.get_mset(0, 10));
831  }
832 }
833 
837 
838  public:
840  Xapian::doccount est_,
841  Xapian::doccount ub_)
842  : lb(lb_), est(est_), ub(ub_)
843  { }
844 
845  PostingSource* clone() const override {
846  return new EstimatePS(lb, est, ub);
847  }
848 
849  void reset(const Xapian::Database&, Xapian::doccount) override { }
850 
851  Xapian::doccount get_termfreq_min() const override { return lb; }
852 
853  Xapian::doccount get_termfreq_est() const override { return est; }
854 
855  Xapian::doccount get_termfreq_max() const override { return ub; }
856 
857  void next(double) override {
858  FAIL_TEST("EstimatePS::next() shouldn't be called");
859  }
860 
861  void skip_to(Xapian::docid, double) override {
862  FAIL_TEST("EstimatePS::skip_to() shouldn't be called");
863  }
864 
865  bool at_end() const override {
866  return false;
867  }
868 
869  Xapian::docid get_docid() const override {
870  FAIL_TEST("EstimatePS::get_docid() shouldn't be called");
871  }
872 
873  string get_description() const override { return "EstimatePS"; }
874 };
875 
877 DEFINE_TESTCASE(estimaterounding1, backend && !multi && !remote) {
878  Xapian::Database db = get_database("etext");
879  Xapian::Enquire enquire(db);
880  static const struct { Xapian::doccount lb, est, ub, exp; } testcases[] = {
881  // Test rounding down.
882  {411, 424, 439, 420},
883  {1, 312, 439, 300},
884  // Test rounding up.
885  {411, 426, 439, 430},
886  {123, 351, 439, 400},
887  // Rounding based on estimate size if smaller than range size.
888  {1, 12, 439, 10},
889  // Round "5" away from the nearer bound.
890  {1, 15, 439, 20},
891  {1, 350, 439, 300},
892  // Check we round up if rounding down would be out of range.
893  {411, 416, 439, 420},
894  {411, 412, 439, 420},
895  // Check we round down if rounding up would be out of range.
896  {111, 133, 138, 130},
897  {111, 137, 138, 130},
898  // Check we don't round if either way would be out of range.
899  {411, 415, 419, 415},
900  // Leave small estimates alone.
901  {1, 6, 439, 6},
902  };
903  for (auto& t : testcases) {
904  EstimatePS ps(t.lb, t.est, t.ub);
905  enquire.set_query(Xapian::Query(&ps));
906  Xapian::MSet mset = enquire.get_mset(0, 0);
907  // MSet::get_description() includes bounds and raw estimate.
908  tout << mset.get_description() << '\n';
909  TEST_EQUAL(mset.get_matches_estimated(), t.exp);
910  }
911 }
DEFINE_TESTCASE(externalsource1, backend &&!remote &&!multi)
static const testcase testcases[]
Definition: api_unicode.cc:40
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
void next(double) override
Advance the current position to the next matching document.
void reset(const Xapian::Database &, Xapian::doccount) override
Set this PostingSource to the start of the list of postings.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid to_did, double) override
Advance to the specified docid.
double get_weight() const override
Return the weight contribution for the current document.
Xapian::docid get_docid() const override
Return the current docid.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
string get_description() const override
Return a string describing this object.
ChangeMaxweightPostingSource(Xapian::docid maxid_accessed_)
Xapian::doccount & doclen_ub
CheckBoundsPostingSource(Xapian::doccount &doclen_lb_, Xapian::doccount &doclen_ub_)
void reset(const Xapian::Database &database, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
Xapian::doccount & doclen_lb
CheckBoundsPostingSource * clone() const override
Clone the posting source.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
CloneTestPostingSource(int &clone_count_)
void reset(const Xapian::Database &, Xapian::doccount) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
string get_description() const override
Return a string describing this object.
void next(double) override
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
bool at_end() const override
Return true if the current position is past the last entry in this list.
Xapian::docid get_docid() const override
Return the current docid.
PostingSource * clone() const override
Clone the posting source.
PostingSource subclass for injecting tf bounds and estimate.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
EstimatePS(Xapian::doccount lb_, Xapian::doccount est_, Xapian::doccount ub_)
string get_description() const override
Return a string describing this object.
PostingSource * clone() const override
Clone the posting source.
void next(double) override
Advance the current position to the next matching document.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void reset(const Xapian::Database &, Xapian::doccount) override
Set this PostingSource to the start of the list of postings.
Xapian::docid get_docid() const override
Return the current docid.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::doccount est
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
PostingSource * clone() const override
Clone the posting source.
Xapian::docid get_docid() const override
Return the current docid.
string get_description() const override
Return a string describing this object.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
double get_weight() const override
Return the weight contribution for the current document.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
void next(double wt) override
Advance the current position to the next matching document.
void reset(const Xapian::Database &db, Xapian::doccount) override
Set this PostingSource to the start of the list of postings.
MyDontAskWeightPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
bool at_end() const override
Return true if the current position is past the last entry in this list.
string get_description() const override
Return a string describing this object.
void next(double wt) override
Advance the current position to the next matching document.
Xapian::doccount num_docs
MyOddPostingSource(const Xapian::Database &db)
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::doccount last_docid
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
void reset(const Xapian::Database &, Xapian::doccount) override
Set this PostingSource to the start of the list of postings.
PostingSource * clone() const override
Clone the posting source.
MyOddPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
Xapian::docid get_docid() const override
Return the current docid.
bool at_end() const override
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
MyOddWeightingPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
string get_description() const override
Return a string describing this object.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void next(double wt) override
Advance the current position to the next matching document.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
PostingSource * clone() const override
Clone the posting source.
double get_weight() const override
Return the weight contribution for the current document.
void init(const Xapian::Database &) override
Older method which did the same job as reset().
MyOddWeightingPostingSource(const Xapian::Database &db)
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::docid get_docid() const override
Return the current docid.
Xapian::docid get_docid() const override
Return the current docid.
OnlyTheFirstPostingSource(bool allow_clone_)
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
void next(double wt) override
Advance the current position to the next matching document.
bool at_end() const override
Return true if the current position is past the last entry in this list.
string get_description() const override
Return a string describing this object.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
void reset(const Xapian::Database &db, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
PostingSource * clone() const override
Clone the posting source.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Class implementing a "boolean" weighting scheme.
Definition: weight.h:678
An indexed database of documents.
Definition: database.h:75
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
Definition: database.cc:302
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: database.cc:239
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Definition: database.cc:308
Read weights from a value which is known to decrease as docid increases.
void reset(const Database &db_, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
void next(double min_wt) override
Advance the current position to the next matching document.
Class representing a document.
Definition: document.h:64
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Definition: document.cc:191
Querying session.
Definition: enquire.h:57
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
Definition: enquire.cc:85
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
void set_time_limit(double time_limit)
Set a time limit for the match.
Definition: enquire.cc:194
void set_cutoff(int percent_threshold, double weight_threshold=0)
Set lower bounds on percentage and/or weight.
Definition: enquire.cc:172
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: enquire.cc:91
@ DESCENDING
docids sort in descending order.
Definition: enquire.h:134
A posting source which returns a fixed weight for all documents.
Xapian::docid get_docid() const override
Return the current docid.
void next(double min_wt) override
Advance the current position to the next matching document.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void reset(const Database &db_, Xapian::doccount shard_index) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid min_docid, double min_wt) override
Advance to the specified docid.
bool check(Xapian::docid min_docid, double min_wt) override
Check if the specified docid occurs.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
Iterator over a Xapian::MSet.
Definition: mset.h:535
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: mset.cc:368
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:467
std::string get_description() const
Return a string describing this object.
Definition: mset.cc:394
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: mset.cc:324
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
PostingSource * release()
Start reference counting this object.
double get_maxweight() const noexcept
Return the currently set upper bound on what get_weight() can return.
Class representing a query.
Definition: query.h:45
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Xapian::docid get_docid() const
Return the current docid.
A posting source which reads weights from a value slot.
void reset(const Database &db_, Xapian::doccount shard_index)
Set this PostingSource to the start of the list of postings.
This class provides read/write access to a database.
Definition: database.h:964
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1229
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
<unistd.h>, but with compat.
Convert types to std::string.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:65
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:293
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:224
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:112
Public interfaces for the Xapian library.