xapian-core  1.4.26
api_postingsource.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008,2009,2011,2015,2016,2019,2024 Olly Betts
5  * Copyright 2008,2009 Lemur Consulting Ltd
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_postingsource.h"
27 
28 #include <xapian.h>
29 
30 #include <string>
31 #include "safeunistd.h"
32 
33 #include "str.h"
34 #include "testutils.h"
35 #include "apitest.h"
36 
37 using namespace std;
38 
41 
43 
45 
47  Xapian::doccount last_docid_)
48  : num_docs(num_docs_), last_docid(last_docid_), did(0)
49  { }
50 
51  public:
53  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
54  { }
55 
56  PostingSource* clone() const override {
57  return new MyOddPostingSource(num_docs, last_docid);
58  }
59 
60  void init(const Xapian::Database&) override { did = 0; }
61 
62  // These bounds could be better, but that's not important here.
63  Xapian::doccount get_termfreq_min() const override { return 0; }
64 
65  Xapian::doccount get_termfreq_est() const override { return num_docs / 2; }
66 
67  Xapian::doccount get_termfreq_max() const override { return num_docs; }
68 
69  void next(double wt) override {
70  (void)wt;
71  ++did;
72  if (did % 2 == 0) ++did;
73  }
74 
75  void skip_to(Xapian::docid to_did, double wt) override {
76  (void)wt;
77  did = to_did;
78  if (did % 2 == 0) ++did;
79  }
80 
81  bool at_end() const override {
82  // Doesn't work if last_docid is 2^32 - 1.
83  return did > last_docid;
84  }
85 
86  Xapian::docid get_docid() const override { return did; }
87 
88  string get_description() const override { return "MyOddPostingSource"; }
89 };
90 
91 DEFINE_TESTCASE(externalsource1, backend && !remote && !multi) {
92  // Doesn't work for remote without registering with the server.
93  // Doesn't work for multi because it checks the docid in the
94  // subdatabase.
95  Xapian::Database db(get_database("apitest_phrase"));
96  Xapian::Enquire enq(db);
97  MyOddPostingSource src(db);
98 
99  // Check that passing NULL is rejected as intended.
100  Xapian::PostingSource * nullsrc = NULL;
102 
103  enq.set_query(Xapian::Query(&src));
104 
105  Xapian::MSet mset = enq.get_mset(0, 10);
106  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
107 
109  Xapian::Query("leav"),
110  Xapian::Query(&src));
111  enq.set_query(q);
112 
113  mset = enq.get_mset(0, 10);
114  mset_expect_order(mset, 5, 7, 11, 13, 9);
115 }
116 
117 // Test that trying to use PostingSource with the remote backend throws
118 // Xapian::UnimplementedError as expected (we need to register the class
119 // in xapian-tcpsrv/xapian-progsrv for this to work).
120 DEFINE_TESTCASE(externalsource2, remote) {
121  Xapian::Database db(get_database("apitest_phrase"));
122  Xapian::Enquire enq(db);
123  MyOddPostingSource src(db);
124 
125  enq.set_query(Xapian::Query(&src));
126 
128  Xapian::MSet mset = enq.get_mset(0, 10));
129 
131  Xapian::Query("leav"),
132  Xapian::Query(&src));
133  enq.set_query(q);
134 
136  Xapian::MSet mset = enq.get_mset(0, 10));
137 }
138 
141 
143 
145 
147  Xapian::doccount last_docid_)
148  : num_docs(num_docs_), last_docid(last_docid_), did(0)
149  {
150  set_maxweight(1000);
151  }
152 
153  public:
155  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
156  { }
157 
158  PostingSource* clone() const override {
159  return new MyOddWeightingPostingSource(num_docs, last_docid);
160  }
161 
162  void init(const Xapian::Database&) override { did = 0; }
163 
164  double get_weight() const override {
165  return (did % 2) ? 1000 : 0.001;
166  }
167 
168  // These bounds could be better, but that's not important here.
169  Xapian::doccount get_termfreq_min() const override { return 0; }
170 
171  Xapian::doccount get_termfreq_est() const override { return num_docs / 2; }
172 
173  Xapian::doccount get_termfreq_max() const override { return num_docs; }
174 
175  void next(double wt) override {
176  (void)wt;
177  ++did;
178  }
179 
180  void skip_to(Xapian::docid to_did, double wt) override {
181  (void)wt;
182  did = to_did;
183  }
184 
185  bool at_end() const override {
186  // Doesn't work if last_docid is 2^32 - 1.
187  return did > last_docid;
188  }
189 
190  Xapian::docid get_docid() const override { return did; }
191 
192  string get_description() const override {
193  return "MyOddWeightingPostingSource";
194  }
195 };
196 
197 // Like externalsource1, except we use the weight to favour odd documents.
198 DEFINE_TESTCASE(externalsource3, backend && !remote && !multi) {
199  // Doesn't work for remote without registering with the server.
200  // Doesn't work for multi because it checks the docid in the
201  // subdatabase.
202  Xapian::Database db(get_database("apitest_phrase"));
203  Xapian::Enquire enq(db);
205 
206  enq.set_query(Xapian::Query(&src));
207 
208  Xapian::MSet mset = enq.get_mset(0, 10);
209  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17, 2);
210 
212  Xapian::Query("leav"),
213  Xapian::Query(&src));
214  enq.set_query(q);
215 
216  mset = enq.get_mset(0, 5);
217  mset_expect_order(mset, 5, 7, 11, 13, 9);
218 
219  tout << "max possible weight = " << mset.get_max_possible() << '\n';
220  TEST(mset.get_max_possible() > 1000);
221 
222  enq.set_cutoff(0, 1000.001);
223  mset = enq.get_mset(0, 10);
224  mset_expect_order(mset, 5, 7, 11, 13, 9);
225 
226  tout << "max possible weight = " << mset.get_max_possible() << '\n';
227  TEST(mset.get_max_possible() > 1000);
228 
230  mset = enq.get_mset(0, 10);
231  TEST(mset.empty());
232 
233  TEST_EQUAL(mset.get_max_possible(), 500);
234 
236  mset = enq.get_mset(0, 10);
237  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
238 
239  TEST_EQUAL(mset.get_max_possible(), 2000);
240 }
241 
244 
246 
248 
250  Xapian::doccount last_docid_)
251  : num_docs(num_docs_), last_docid(last_docid_), did(0)
252  { }
253 
254  public:
255  MyDontAskWeightPostingSource() : Xapian::PostingSource() {}
256 
257  PostingSource* clone() const override {
258  return new MyDontAskWeightPostingSource(num_docs, last_docid);
259  }
260 
261  void init(const Xapian::Database& db) override {
262  num_docs = db.get_doccount();
263  last_docid = db.get_lastdocid();
264  did = 0;
265  }
266 
267  double get_weight() const override {
268  FAIL_TEST("MyDontAskWeightPostingSource::get_weight() called");
269  }
270 
271  // These bounds could be better, but that's not important here.
272  Xapian::doccount get_termfreq_min() const override { return num_docs; }
273 
274  Xapian::doccount get_termfreq_est() const override { return num_docs; }
275 
276  Xapian::doccount get_termfreq_max() const override { return num_docs; }
277 
278  void next(double wt) override {
279  (void)wt;
280  ++did;
281  }
282 
283  void skip_to(Xapian::docid to_did, double wt) override {
284  (void)wt;
285  did = to_did;
286  }
287 
288  bool at_end() const override {
289  // Doesn't work if last_docid is 2^32 - 1.
290  return did > last_docid;
291  }
292 
293  Xapian::docid get_docid() const override { return did; }
294 
295  string get_description() const override {
296  return "MyDontAskWeightPostingSource";
297  }
298 };
299 
300 // Check that boolean use doesn't call get_weight().
301 DEFINE_TESTCASE(externalsource4, backend && !remote) {
302  Xapian::Database db(get_database("apitest_phrase"));
303  Xapian::Enquire enq(db);
305 
306  tout << "OP_SCALE_WEIGHT 0\n";
308 
309  Xapian::MSet mset = enq.get_mset(0, 5);
310  mset_expect_order(mset, 1, 2, 3, 4, 5);
311 
312  tout << "OP_FILTER\n";
314  Xapian::Query("leav"),
315  Xapian::Query(&src));
316  enq.set_query(q);
317 
318  mset = enq.get_mset(0, 5);
319  mset_expect_order(mset, 8, 6, 4, 5, 7);
320 
321  tout << "BoolWeight\n";
322  enq.set_query(Xapian::Query(&src));
324 
325  // mset = enq.get_mset(0, 5);
326  // mset_expect_order(mset, 1, 2, 3, 4, 5);
327 }
328 
329 // Check that valueweightsource works correctly.
330 DEFINE_TESTCASE(valueweightsource1, backend) {
331  Xapian::Database db(get_database("apitest_phrase"));
332  Xapian::Enquire enq(db);
334 
335  // Should be in descending order of length
336  tout << "RAW\n";
337  enq.set_query(Xapian::Query(&src));
338  Xapian::MSet mset = enq.get_mset(0, 5);
339  mset_expect_order(mset, 3, 1, 2, 8, 14);
340 
341  // In relevance order
342  tout << "OP_FILTER\n";
344  Xapian::Query("leav"),
345  Xapian::Query(&src));
346  enq.set_query(q);
347  mset = enq.get_mset(0, 5);
348  mset_expect_order(mset, 8, 6, 4, 5, 7);
349 
350  // Should be in descending order of length
351  tout << "OP_FILTER other way\n";
353  Xapian::Query(&src),
354  Xapian::Query("leav"));
355  enq.set_query(q);
356  mset = enq.get_mset(0, 5);
357  mset_expect_order(mset, 8, 14, 9, 13, 7);
358 }
359 
360 // Check that valueweightsource gives the correct bounds for those databases
361 // which support value statistics.
362 DEFINE_TESTCASE(valueweightsource2, valuestats) {
363  Xapian::Database db(get_database("apitest_phrase"));
365  src.init(db);
366  TEST_EQUAL(src.get_termfreq_min(), 17);
367  TEST_EQUAL(src.get_termfreq_est(), 17);
368  TEST_EQUAL(src.get_termfreq_max(), 17);
369  TEST_EQUAL(src.get_maxweight(), 135);
370 }
371 
372 // Check that valueweightsource skip_to() can stay in the same position.
373 DEFINE_TESTCASE(valueweightsource3, valuestats) {
374  Xapian::Database db(get_database("apitest_phrase"));
376  src.init(db);
377  TEST(!src.at_end());
378  src.skip_to(8, 0.0);
379  TEST(!src.at_end());
380  TEST_EQUAL(src.get_docid(), 8);
381  src.skip_to(8, 0.0);
382  TEST(!src.at_end());
383  TEST_EQUAL(src.get_docid(), 8);
384 }
385 
386 // Check that fixedweightsource works correctly.
387 DEFINE_TESTCASE(fixedweightsource1, backend) {
388  Xapian::Database db(get_database("apitest_phrase"));
389  Xapian::Enquire enq(db);
390  double wt = 5.6;
391 
392  {
394 
395  // Should be in increasing order of docid.
396  enq.set_query(Xapian::Query(&src));
397  Xapian::MSet mset = enq.get_mset(0, 5);
398  mset_expect_order(mset, 1, 2, 3, 4, 5);
399 
400  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
401  TEST_EQUAL(i.get_weight(), wt);
402  }
403  }
404 
405  // Do some direct tests, to check the skip_to() and check() methods work.
406  {
407  // Check next and skip_to().
409  src.init(db);
410 
411  src.next(1.0);
412  TEST(!src.at_end());
413  TEST_EQUAL(src.get_docid(), 1);
414  src.next(1.0);
415  TEST(!src.at_end());
416  TEST_EQUAL(src.get_docid(), 2);
417  src.skip_to(5, 1.0);
418  TEST(!src.at_end());
419  TEST_EQUAL(src.get_docid(), 5);
420  src.next(wt * 2);
421  TEST(src.at_end());
422  }
423  {
424  // Check check() as the first operation, followed by next.
426  src.init(db);
427 
428  TEST_EQUAL(src.check(5, 1.0), true);
429  TEST(!src.at_end());
430  TEST_EQUAL(src.get_docid(), 5);
431  src.next(1.0);
432  TEST(!src.at_end());
433  TEST_EQUAL(src.get_docid(), 6);
434  }
435  {
436  // Check check() as the first operation, followed by skip_to().
438  src.init(db);
439 
440  TEST_EQUAL(src.check(5, 1.0), true);
441  TEST(!src.at_end());
442  TEST_EQUAL(src.get_docid(), 5);
443  src.skip_to(6, 1.0);
444  TEST(!src.at_end());
445  TEST_EQUAL(src.get_docid(), 6);
446  src.skip_to(7, wt * 2);
447  TEST(src.at_end());
448  }
449 }
450 
451 // A posting source which changes the maximum weight.
454 
455  // Maximum docid that get_weight() should be called for.
457 
458  public:
460  : did(0), maxid_accessed(maxid_accessed_) { }
461 
462  void init(const Xapian::Database&) override { did = 0; }
463 
464  double get_weight() const override {
465  if (did > maxid_accessed) {
466  FAIL_TEST("ChangeMaxweightPostingSource::get_weight() called "
467  "for docid " + str(did) + ", max id accessed "
468  "should be " + str(maxid_accessed));
469  }
470  return 5 - did;
471  }
472 
473  Xapian::doccount get_termfreq_min() const override { return 4; }
474  Xapian::doccount get_termfreq_est() const override { return 4; }
475  Xapian::doccount get_termfreq_max() const override { return 4; }
476 
477  void next(double) override {
478  ++did;
479  set_maxweight(5 - did);
480  }
481 
482  void skip_to(Xapian::docid to_did, double) override {
483  did = to_did;
484  set_maxweight(5 - did);
485  }
486 
487  bool at_end() const override { return did >= 5; }
488  Xapian::docid get_docid() const override { return did; }
489  string get_description() const override {
490  return "ChangeMaxweightPostingSource";
491  }
492 };
493 
494 // Test a posting source with a variable maxweight.
495 DEFINE_TESTCASE(changemaxweightsource1, backend && !remote && !multi) {
496  // The ChangeMaxweightPostingSource doesn't work with multi or remote.
497  Xapian::Database db(get_database("apitest_phrase"));
498  Xapian::Enquire enq(db);
499 
500  {
503 
505  Xapian::Query(&src1), Xapian::Query(&src2));
506  enq.set_query(q);
507  // Set descending docid order so that the matcher isn't able to
508  // terminate early after 4 documents just because weight == maxweight.
509  enq.set_docid_order(enq.DESCENDING);
510 
511  Xapian::MSet mset = enq.get_mset(0, 4);
512  TEST(src1.at_end());
513  mset_expect_order(mset, 1, 2, 3, 4);
514  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
515  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
516  }
517  }
518 
519  {
522 
524  Xapian::Query(&src1), Xapian::Query(&src2));
525  enq.set_query(q);
526 
527  Xapian::MSet mset = enq.get_mset(0, 2);
528  TEST(!src1.at_end());
529  TEST_EQUAL(src1.get_docid(), 3);
530  TEST_EQUAL_DOUBLE(src1.get_maxweight(), 2.0);
531  mset_expect_order(mset, 1, 2);
532  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
533  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
534  }
535  }
536 }
537 
538 // Test using a valueweightpostingsource which has no entries.
539 DEFINE_TESTCASE(emptyvalwtsource1, backend && !remote && !multi) {
540  Xapian::Database db(get_database("apitest_phrase"));
541  Xapian::Enquire enq(db);
542 
543  Xapian::ValueWeightPostingSource src2(11); // A non-empty slot.
544  Xapian::ValueWeightPostingSource src3(100); // An empty slot.
545  Xapian::Query q1("leav");
546  Xapian::Query q2(&src2);
547  Xapian::Query q3(&src3);
549 
550  // Perform search without ORring with the posting source.
551  Xapian::doccount size1;
552  {
553  enq.set_query(q1);
554  Xapian::MSet mset = enq.get_mset(0, 10);
555  TEST_REL(mset.get_max_possible(), >, 0.0);
556  size1 = mset.size();
557  TEST_REL(size1, >, 0);
558  }
559 
560  // Perform a search with just the non-empty posting source, checking it
561  // returns something.
562  {
563  enq.set_query(q2);
564  Xapian::MSet mset = enq.get_mset(0, 10);
565  TEST_REL(mset.get_max_possible(), >, 0.0);
566  TEST_REL(mset.size(), >, 0);
567  }
568 
569  // Perform a search with just the empty posting source, checking it returns
570  // nothing.
571  {
572  enq.set_query(q3);
573  Xapian::MSet mset = enq.get_mset(0, 10);
574 
575  // get_max_possible() returns 0 here for backends which track the upper
576  // bound on value slot entries, MAX_DBL for backends which don't.
577  // Either is valid.
578  TEST_REL(mset.get_max_possible(), >=, 0.0);
579 
580  TEST_EQUAL(mset.size(), 0);
581  }
582 
583  // Perform a search with the posting source ORred with the normal query.
584  // This is a regression test - it used to return nothing.
585  {
586  enq.set_query(q);
587  Xapian::MSet mset = enq.get_mset(0, 10);
588  TEST_REL(mset.get_max_possible(), >, 0.0);
589  TEST_REL(mset.size(), >, 0.0);
590  TEST_EQUAL(mset.size(), size1);
591  }
592 }
593 
596  public:
597  int & count;
598 
600  : Xapian::DecreasingValueWeightPostingSource(0), count(count_) { }
601 
603  return new SlowDecreasingValueWeightPostingSource(count);
604  }
605 
606  void next(double min_wt) override {
607  sleep(1);
608  ++count;
610  }
611 };
612 
613 static void
615 {
616  for (int wt = 20; wt > 0; --wt) {
617  Xapian::Document doc;
618  doc.add_value(0, Xapian::sortable_serialise(double(wt)));
619  db.add_document(doc);
620  }
621 }
622 
623 // FIXME: This doesn't run for remote databases (we'd need to register
624 // SlowDecreasingValueWeightPostingSource on the remote).
625 DEFINE_TESTCASE(matchtimelimit1, backend && !remote)
626 {
627 #ifndef HAVE_TIMER_CREATE
628  SKIP_TEST("Enquire::set_time_limit() not implemented for this platform");
629 #endif
630  Xapian::Database db = get_database("matchtimelimit1",
632 
633  int count = 0;
635  src.init(db);
636  Xapian::Enquire enquire(db);
637  enquire.set_query(Xapian::Query(&src));
638 
639  enquire.set_time_limit(1.5);
640 
641  Xapian::MSet mset = enquire.get_mset(0, 1, 1000);
642  TEST_EQUAL(mset.size(), 1);
643  TEST_EQUAL(count, 2);
644 }
645 
648  public:
650 
652 
654  Xapian::doccount& doclen_ub_)
655  : Xapian::DecreasingValueWeightPostingSource(0),
656  doclen_lb(doclen_lb_),
657  doclen_ub(doclen_ub_) { }
658 
659  CheckBoundsPostingSource* clone() const override {
660  return new CheckBoundsPostingSource(doclen_lb, doclen_ub);
661  }
662 
663  void init(const Xapian::Database& database) override {
664  doclen_lb = database.get_doclength_lower_bound();
665  doclen_ub = database.get_doclength_upper_bound();
667  }
668 };
669 
670 // Test that doclength bounds are correct.
671 // Regression test for bug fixed in 1.2.25 and 1.4.1.
672 DEFINE_TESTCASE(postingsourcebounds1, backend && !remote)
673 {
674  Xapian::Database db = get_database("apitest_simpledata");
675 
676  Xapian::doccount doclen_lb = 0, doclen_ub = 0;
677  CheckBoundsPostingSource ps(doclen_lb, doclen_ub);
678 
679  Xapian::Enquire enquire(db);
680  enquire.set_query(Xapian::Query(&ps));
681 
682  Xapian::MSet mset = enquire.get_mset(0, 1);
683 
684  TEST_EQUAL(doclen_lb, db.get_doclength_lower_bound());
685  TEST_EQUAL(doclen_ub, db.get_doclength_upper_bound());
686 }
687 
688 // PostingSource which really just counts the clone() calls.
689 // Never actually matches anything, but pretends it might.
692 
693  public:
694  CloneTestPostingSource(int& clone_count_)
695  : clone_count(clone_count_)
696  { }
697 
698  PostingSource* clone() const override {
699  ++clone_count;
700  return new CloneTestPostingSource(clone_count);
701  }
702 
703  void init(const Xapian::Database&) override { }
704 
705  Xapian::doccount get_termfreq_min() const override { return 0; }
706 
707  Xapian::doccount get_termfreq_est() const override { return 1; }
708 
709  Xapian::doccount get_termfreq_max() const override { return 2; }
710 
711  void next(double) override { }
712 
713  void skip_to(Xapian::docid, double) override { }
714 
715  bool at_end() const override {
716  return true;
717  }
718 
719  Xapian::docid get_docid() const override { return 0; }
720 
721  string get_description() const override { return "CloneTestPostingSource"; }
722 };
723 
725 DEFINE_TESTCASE(postingsourceclone1, !backend)
726 {
727  // This fails with 1.3.5-1.4.0 inclusive.
728  {
729  int clones = 0;
730  CloneTestPostingSource ps(clones);
731  TEST_EQUAL(clones, 0);
732  Xapian::Query q(&ps);
733  TEST_EQUAL(clones, 1);
734  }
735 
736  // Check that clone() isn't needlessly called if reference counting has
737  // been turned on for the PostingSource.
738  {
739  int clones = 0;
741  TEST_EQUAL(clones, 0);
742  Xapian::Query q(ps->release());
743  TEST_EQUAL(clones, 0);
744  }
745 }
746 
749 
751 
753 
754  public:
756 
757  explicit
758  OnlyTheFirstPostingSource(bool allow_clone_) : allow_clone(allow_clone_) {}
759 
760  PostingSource* clone() const override {
761  return allow_clone ? new OnlyTheFirstPostingSource(true) : nullptr;
762  }
763 
764  void init(const Xapian::Database& db) override {
765  did = 0;
766  if (shard_index == 0) {
767  last_docid = db.get_lastdocid();
768  } else {
769  last_docid = 0;
770  }
771  ++shard_index;
772  }
773 
774  Xapian::doccount get_termfreq_min() const override { return 0; }
775 
777  return last_docid / 2;
778  }
779 
780  Xapian::doccount get_termfreq_max() const override { return last_docid; }
781 
782  void next(double wt) override {
783  (void)wt;
784  ++did;
785  if (did > last_docid) did = 0;
786  }
787 
788  void skip_to(Xapian::docid to_did, double wt) override {
789  (void)wt;
790  did = to_did;
791  if (did > last_docid) did = 0;
792  }
793 
794  bool at_end() const override {
795  return did == 0;
796  }
797 
798  Xapian::docid get_docid() const override { return did; }
799 
800  string get_description() const override {
801  return "OnlyTheFirstPostingSource";
802  }
803 };
804 
806 
807 DEFINE_TESTCASE(postingsourceshardindex1, multi && !remote) {
808  Xapian::Database db = get_database("apitest_simpledata");
809 
811 
812  Xapian::Enquire enquire(db);
813  {
814  auto ps = new OnlyTheFirstPostingSource(true);
815  enquire.set_query(Xapian::Query(ps->release()));
816 
817  Xapian::MSet mset = enquire.get_mset(0, 10);
818  mset_expect_order(mset, 1, 3, 5);
819  }
820 
821  {
822  /* Regression test for bug fixed in 1.4.12 - we should get an exception
823  * if we use a PostingSource that doesn't support clone() with a multi
824  * DB.
825  */
826  auto ps = new OnlyTheFirstPostingSource(false);
827  enquire.set_query(Xapian::Query(ps->release()));
828 
830  auto m = enquire.get_mset(0, 10));
831  }
832 }
833 
837 
838  public:
840  Xapian::doccount est_,
841  Xapian::doccount ub_)
842  : lb(lb_), est(est_), ub(ub_)
843  { }
844 
845  PostingSource* clone() const override {
846  return new EstimatePS(lb, est, ub);
847  }
848 
849  void init(const Xapian::Database&) override { }
850 
851  Xapian::doccount get_termfreq_min() const override { return lb; }
852 
853  Xapian::doccount get_termfreq_est() const override { return est; }
854 
855  Xapian::doccount get_termfreq_max() const override { return ub; }
856 
857  void next(double) override {
858  FAIL_TEST("EstimatePS::next() shouldn't be called");
859  }
860 
861  void skip_to(Xapian::docid, double) override {
862  FAIL_TEST("EstimatePS::skip_to() shouldn't be called");
863  }
864 
865  bool at_end() const override {
866  return false;
867  }
868 
869  Xapian::docid get_docid() const override {
870  FAIL_TEST("EstimatePS::get_docid() shouldn't be called");
871  }
872 
873  string get_description() const override { return "EstimatePS"; }
874 };
875 
877 DEFINE_TESTCASE(estimaterounding1, backend && !multi && !remote) {
878  Xapian::Database db = get_database("etext");
879  Xapian::Enquire enquire(db);
880  static const struct { Xapian::doccount lb, est, ub, exp; } testcases[] = {
881  // Test rounding down.
882  {411, 424, 439, 420},
883  {1, 312, 439, 300},
884  // Test rounding up.
885  {411, 426, 439, 430},
886  {123, 351, 439, 400},
887  // Rounding based on estimate size if smaller than range size.
888  {1, 12, 439, 10},
889  // Round "5" away from the nearer bound.
890  {1, 15, 439, 20},
891  {1, 350, 439, 300},
892  // Check we round up if rounding down would be out of range.
893  {411, 416, 439, 420},
894  {411, 412, 439, 420},
895  // Check we round down if rounding up would be out of range.
896  {111, 133, 138, 130},
897  {111, 137, 138, 130},
898  // Check we don't round if either way would be out of range.
899  {411, 415, 419, 415},
900  // Leave small estimates alone.
901  {1, 6, 439, 6},
902  };
903  for (auto& t : testcases) {
904  EstimatePS ps(t.lb, t.est, t.ub);
905  enquire.set_query(Xapian::Query(&ps));
906  Xapian::MSet mset = enquire.get_mset(0, 0);
907  // MSet::get_description() includes bounds and raw estimate.
908  tout << mset.get_description() << '\n';
909  TEST_EQUAL(mset.get_matches_estimated(), t.exp);
910  }
911 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
double get_weight() const override
Return the weight contribution for the current document.
OnlyTheFirstPostingSource(bool allow_clone_)
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:856
PostingSource * clone() const override
Clone the posting source.
bool at_end() const override
Return true if the current position is past the last entry in this list.
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
This class is used to access a database, or a group of databases.
Definition: database.h:68
bool at_end() const override
Return true if the current position is past the last entry in this list.
void next(double min_wt) override
Advance the current position to the next matching document.
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:325
bool at_end() const override
Return true if the current position is past the last entry in this list.
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:862
bool at_end() const override
Return true if the current position is past the last entry in this list.
EstimatePS(Xapian::doccount lb_, Xapian::doccount est_, Xapian::doccount ub_)
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
Xapian::docid get_docid() const override
Return the current docid.
Xapian::docid get_docid() const
Return the current docid.
string get_description() const override
Return a string describing this object.
Xapian::doccount ub
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
Definition: omdatabase.cc:401
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:300
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
PostingSource * clone() const override
Clone the posting source.
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
bool at_end() const
Return true if the current position is past the last entry in this list.
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
void next(double wt) override
Advance the current position to the next matching document.
string get_description() const override
Return a string describing this object.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Xapian::docid get_docid() const override
Return the current docid.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938
void skip_to(Xapian::docid to_did, double) override
Advance to the specified docid.
Convert types to std::string.
static double est(double l, double r, double n)
Definition: orpostlist.cc:306
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1382
MyOddWeightingPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
ChangeMaxweightPostingSource(Xapian::docid maxid_accessed_)
Xapian::doccount & doclen_ub
void init(const Xapian::Database &database) override
Set this PostingSource to the start of the list of postings.
string get_description() const override
Return a string describing this object.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
Xapian::docid get_docid() const override
Return the current docid.
PostingSource * clone() const override
Clone the posting source.
Xapian::doccount num_docs
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
double get_weight() const override
Return the weight contribution for the current document.
Xapian::doccount last_docid
Read weights from a value which is known to decrease as docid increases.
test functionality of the Xapian API
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
MyOddWeightingPostingSource(const Xapian::Database &db)
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Definition: omdatabase.cc:421
DEFINE_TESTCASE(externalsource1, backend &&!remote &&!multi)
void next(double) override
Advance the current position to the next matching document.
string get_description() const override
Return a string describing this object.
Class implementing a "boolean" weighting scheme.
Definition: weight.h:433
PostingSource * clone() const override
Clone the posting source.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
bool at_end() const override
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
This class provides read/write access to a database.
Definition: database.h:789
Xapian::docid get_docid() const override
Return the current docid.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
Iterator over a Xapian::MSet.
Definition: mset.h:368
Scale the weight contributed by a subquery.
Definition: query.h:166
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
Public interfaces for the Xapian library.
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
bool at_end() const override
Return true if the current position is past the last entry in this list.
A posting source which returns a fixed weight for all documents.
void set_time_limit(double time_limit)
Set a time limit for the match.
Definition: omenquire.cc:932
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
void init(const Xapian::Database &db) override
Set this PostingSource to the start of the list of postings.
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
MyOddPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
static Xapian::doccount shard_index
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::docid get_docid() const override
Return the current docid.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
void skip_to(Xapian::docid to_did, double wt) override
Advance to the specified docid.
Xapian::docid get_docid() const override
Return the current docid.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
PostingSource * clone() const override
Clone the posting source.
CheckBoundsPostingSource(Xapian::doccount &doclen_lb_, Xapian::doccount &doclen_ub_)
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
void next(double) override
Advance the current position to the next matching document.
void next(double wt) override
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
A posting source which reads weights from a value slot.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
Match only documents which all subqueries match.
Definition: query.h:84
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
string get_description() const override
Return a string describing this object.
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
double get_weight() const override
Return the weight contribution for the current document.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
MyOddPostingSource(const Xapian::Database &db)
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:74
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
CloneTestPostingSource(int &clone_count_)
void init(const Xapian::Database &db_)
Set this PostingSource to the start of the list of postings.
Xapian::doccount & doclen_lb
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
PostingSource subclass for injecting tf bounds and estimate.
Xapian::docid get_docid() const
Return the current docid.
Match documents which at least one subquery matches.
Definition: query.h:92
void init(const Xapian::Database &db) override
Set this PostingSource to the start of the list of postings.
Xapian-specific test helper functions and macros.
PostingSource * clone() const override
Clone the posting source.
bool at_end() const override
Return true if the current position is past the last entry in this list.
MyDontAskWeightPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
PostingSource * release()
Start reference counting this object.
<unistd.h>, but with compat.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
double get_maxweight() const
Return the currently set upper bound on what get_weight() can return.
void next(double min_wt)
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
SlowDecreasingValueWeightPostingSource * clone() const override
Clone the posting source.
void next(double min_wt)
Advance the current position to the next matching document.
string get_description() const override
Return a string describing this object.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
string get_description() const override
Return a string describing this object.
void next(double wt) override
Advance the current position to the next matching document.
static void make_matchtimelimit1_db(Xapian::WritableDatabase &db, const string &)
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
void next(double) override
Advance the current position to the next matching document.
Xapian::docid get_docid() const override
Return the current docid.
CheckBoundsPostingSource * clone() const override
Clone the posting source.
void next(double wt) override
Advance the current position to the next matching document.
A handle representing a document in a Xapian database.
Definition: document.h:61
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
docids sort in descending order.
Definition: enquire.h:330
static const testcase testcases[]
Definition: api_unicode.cc:39