xapian-core  1.4.25
api_postingsource.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008,2009,2011,2015,2016,2019 Olly Betts
5  * Copyright 2008,2009 Lemur Consulting Ltd
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_postingsource.h"
27 
28 #include <xapian.h>
29 
30 #include <string>
31 #include "safeunistd.h"
32 
33 #include "str.h"
34 #include "testutils.h"
35 #include "apitest.h"
36 
37 using namespace std;
38 
41 
43 
45 
47  Xapian::doccount last_docid_)
48  : num_docs(num_docs_), last_docid(last_docid_), did(0)
49  { }
50 
51  public:
53  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
54  { }
55 
56  PostingSource * clone() const { return new MyOddPostingSource(num_docs, last_docid); }
57 
58  void init(const Xapian::Database &) { did = 0; }
59 
60  // These bounds could be better, but that's not important here.
61  Xapian::doccount get_termfreq_min() const { return 0; }
62 
63  Xapian::doccount get_termfreq_est() const { return num_docs / 2; }
64 
65  Xapian::doccount get_termfreq_max() const { return num_docs; }
66 
67  void next(double wt) {
68  (void)wt;
69  ++did;
70  if (did % 2 == 0) ++did;
71  }
72 
73  void skip_to(Xapian::docid to_did, double wt) {
74  (void)wt;
75  did = to_did;
76  if (did % 2 == 0) ++did;
77  }
78 
79  bool at_end() const {
80  // Doesn't work if last_docid is 2^32 - 1.
81  return did > last_docid;
82  }
83 
84  Xapian::docid get_docid() const { return did; }
85 
86  string get_description() const { return "MyOddPostingSource"; }
87 };
88 
89 DEFINE_TESTCASE(externalsource1, backend && !remote && !multi) {
90  // Doesn't work for remote without registering with the server.
91  // Doesn't work for multi because it checks the docid in the
92  // subdatabase.
93  Xapian::Database db(get_database("apitest_phrase"));
94  Xapian::Enquire enq(db);
95  MyOddPostingSource src(db);
96 
97  // Check that passing NULL is rejected as intended.
98  Xapian::PostingSource * nullsrc = NULL;
100 
101  enq.set_query(Xapian::Query(&src));
102 
103  Xapian::MSet mset = enq.get_mset(0, 10);
104  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
105 
107  Xapian::Query("leav"),
108  Xapian::Query(&src));
109  enq.set_query(q);
110 
111  mset = enq.get_mset(0, 10);
112  mset_expect_order(mset, 5, 7, 11, 13, 9);
113 }
114 
115 // Test that trying to use PostingSource with the remote backend throws
116 // Xapian::UnimplementedError as expected (we need to register the class
117 // in xapian-tcpsrv/xapian-progsrv for this to work).
118 DEFINE_TESTCASE(externalsource2, remote) {
119  Xapian::Database db(get_database("apitest_phrase"));
120  Xapian::Enquire enq(db);
121  MyOddPostingSource src(db);
122 
123  enq.set_query(Xapian::Query(&src));
124 
126  Xapian::MSet mset = enq.get_mset(0, 10));
127 
129  Xapian::Query("leav"),
130  Xapian::Query(&src));
131  enq.set_query(q);
132 
134  Xapian::MSet mset = enq.get_mset(0, 10));
135 }
136 
139 
141 
143 
145  Xapian::doccount last_docid_)
146  : num_docs(num_docs_), last_docid(last_docid_), did(0)
147  {
148  set_maxweight(1000);
149  }
150 
151  public:
153  : num_docs(db.get_doccount()), last_docid(db.get_lastdocid()), did(0)
154  { }
155 
156  PostingSource * clone() const {
157  return new MyOddWeightingPostingSource(num_docs, last_docid);
158  }
159 
160  void init(const Xapian::Database &) { did = 0; }
161 
162  double get_weight() const {
163  return (did % 2) ? 1000 : 0.001;
164  }
165 
166  // These bounds could be better, but that's not important here.
167  Xapian::doccount get_termfreq_min() const { return 0; }
168 
169  Xapian::doccount get_termfreq_est() const { return num_docs / 2; }
170 
171  Xapian::doccount get_termfreq_max() const { return num_docs; }
172 
173  void next(double wt) {
174  (void)wt;
175  ++did;
176  }
177 
178  void skip_to(Xapian::docid to_did, double wt) {
179  (void)wt;
180  did = to_did;
181  }
182 
183  bool at_end() const {
184  // Doesn't work if last_docid is 2^32 - 1.
185  return did > last_docid;
186  }
187 
188  Xapian::docid get_docid() const { return did; }
189 
190  string get_description() const {
191  return "MyOddWeightingPostingSource";
192  }
193 };
194 
195 // Like externalsource1, except we use the weight to favour odd documents.
196 DEFINE_TESTCASE(externalsource3, backend && !remote && !multi) {
197  // Doesn't work for remote without registering with the server.
198  // Doesn't work for multi because it checks the docid in the
199  // subdatabase.
200  Xapian::Database db(get_database("apitest_phrase"));
201  Xapian::Enquire enq(db);
203 
204  enq.set_query(Xapian::Query(&src));
205 
206  Xapian::MSet mset = enq.get_mset(0, 10);
207  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17, 2);
208 
210  Xapian::Query("leav"),
211  Xapian::Query(&src));
212  enq.set_query(q);
213 
214  mset = enq.get_mset(0, 5);
215  mset_expect_order(mset, 5, 7, 11, 13, 9);
216 
217  tout << "max possible weight = " << mset.get_max_possible() << '\n';
218  TEST(mset.get_max_possible() > 1000);
219 
220  enq.set_cutoff(0, 1000.001);
221  mset = enq.get_mset(0, 10);
222  mset_expect_order(mset, 5, 7, 11, 13, 9);
223 
224  tout << "max possible weight = " << mset.get_max_possible() << '\n';
225  TEST(mset.get_max_possible() > 1000);
226 
228  mset = enq.get_mset(0, 10);
229  TEST(mset.empty());
230 
231  TEST_EQUAL(mset.get_max_possible(), 500);
232 
234  mset = enq.get_mset(0, 10);
235  mset_expect_order(mset, 1, 3, 5, 7, 9, 11, 13, 15, 17);
236 
237  TEST_EQUAL(mset.get_max_possible(), 2000);
238 }
239 
242 
244 
246 
248  Xapian::doccount last_docid_)
249  : num_docs(num_docs_), last_docid(last_docid_), did(0)
250  { }
251 
252  public:
253  MyDontAskWeightPostingSource() : Xapian::PostingSource() {}
254 
255  PostingSource * clone() const { return new MyDontAskWeightPostingSource(num_docs, last_docid); }
256 
257  void init(const Xapian::Database &db) {
258  num_docs = db.get_doccount();
259  last_docid = db.get_lastdocid();
260  did = 0;
261  }
262 
263  double get_weight() const {
264  FAIL_TEST("MyDontAskWeightPostingSource::get_weight() called");
265  }
266 
267  // These bounds could be better, but that's not important here.
268  Xapian::doccount get_termfreq_min() const { return num_docs; }
269 
270  Xapian::doccount get_termfreq_est() const { return num_docs; }
271 
272  Xapian::doccount get_termfreq_max() const { return num_docs; }
273 
274  void next(double wt) {
275  (void)wt;
276  ++did;
277  }
278 
279  void skip_to(Xapian::docid to_did, double wt) {
280  (void)wt;
281  did = to_did;
282  }
283 
284  bool at_end() const {
285  // Doesn't work if last_docid is 2^32 - 1.
286  return did > last_docid;
287  }
288 
289  Xapian::docid get_docid() const { return did; }
290 
291  string get_description() const {
292  return "MyDontAskWeightPostingSource";
293  }
294 };
295 
296 // Check that boolean use doesn't call get_weight().
297 DEFINE_TESTCASE(externalsource4, backend && !remote) {
298  Xapian::Database db(get_database("apitest_phrase"));
299  Xapian::Enquire enq(db);
301 
302  tout << "OP_SCALE_WEIGHT 0\n";
304 
305  Xapian::MSet mset = enq.get_mset(0, 5);
306  mset_expect_order(mset, 1, 2, 3, 4, 5);
307 
308  tout << "OP_FILTER\n";
310  Xapian::Query("leav"),
311  Xapian::Query(&src));
312  enq.set_query(q);
313 
314  mset = enq.get_mset(0, 5);
315  mset_expect_order(mset, 8, 6, 4, 5, 7);
316 
317  tout << "BoolWeight\n";
318  enq.set_query(Xapian::Query(&src));
320 
321  // mset = enq.get_mset(0, 5);
322  // mset_expect_order(mset, 1, 2, 3, 4, 5);
323 }
324 
325 // Check that valueweightsource works correctly.
326 DEFINE_TESTCASE(valueweightsource1, backend) {
327  Xapian::Database db(get_database("apitest_phrase"));
328  Xapian::Enquire enq(db);
330 
331  // Should be in descending order of length
332  tout << "RAW\n";
333  enq.set_query(Xapian::Query(&src));
334  Xapian::MSet mset = enq.get_mset(0, 5);
335  mset_expect_order(mset, 3, 1, 2, 8, 14);
336 
337  // In relevance order
338  tout << "OP_FILTER\n";
340  Xapian::Query("leav"),
341  Xapian::Query(&src));
342  enq.set_query(q);
343  mset = enq.get_mset(0, 5);
344  mset_expect_order(mset, 8, 6, 4, 5, 7);
345 
346  // Should be in descending order of length
347  tout << "OP_FILTER other way\n";
349  Xapian::Query(&src),
350  Xapian::Query("leav"));
351  enq.set_query(q);
352  mset = enq.get_mset(0, 5);
353  mset_expect_order(mset, 8, 14, 9, 13, 7);
354 }
355 
356 // Check that valueweightsource gives the correct bounds for those databases
357 // which support value statistics.
358 DEFINE_TESTCASE(valueweightsource2, valuestats) {
359  Xapian::Database db(get_database("apitest_phrase"));
361  src.init(db);
362  TEST_EQUAL(src.get_termfreq_min(), 17);
363  TEST_EQUAL(src.get_termfreq_est(), 17);
364  TEST_EQUAL(src.get_termfreq_max(), 17);
365  TEST_EQUAL(src.get_maxweight(), 135);
366 }
367 
368 // Check that valueweightsource skip_to() can stay in the same position.
369 DEFINE_TESTCASE(valueweightsource3, valuestats) {
370  Xapian::Database db(get_database("apitest_phrase"));
372  src.init(db);
373  TEST(!src.at_end());
374  src.skip_to(8, 0.0);
375  TEST(!src.at_end());
376  TEST_EQUAL(src.get_docid(), 8);
377  src.skip_to(8, 0.0);
378  TEST(!src.at_end());
379  TEST_EQUAL(src.get_docid(), 8);
380 }
381 
382 // Check that fixedweightsource works correctly.
383 DEFINE_TESTCASE(fixedweightsource1, backend) {
384  Xapian::Database db(get_database("apitest_phrase"));
385  Xapian::Enquire enq(db);
386  double wt = 5.6;
387 
388  {
390 
391  // Should be in increasing order of docid.
392  enq.set_query(Xapian::Query(&src));
393  Xapian::MSet mset = enq.get_mset(0, 5);
394  mset_expect_order(mset, 1, 2, 3, 4, 5);
395 
396  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
397  TEST_EQUAL(i.get_weight(), wt);
398  }
399  }
400 
401  // Do some direct tests, to check the skip_to() and check() methods work.
402  {
403  // Check next and skip_to().
405  src.init(db);
406 
407  src.next(1.0);
408  TEST(!src.at_end());
409  TEST_EQUAL(src.get_docid(), 1);
410  src.next(1.0);
411  TEST(!src.at_end());
412  TEST_EQUAL(src.get_docid(), 2);
413  src.skip_to(5, 1.0);
414  TEST(!src.at_end());
415  TEST_EQUAL(src.get_docid(), 5);
416  src.next(wt * 2);
417  TEST(src.at_end());
418  }
419  {
420  // Check check() as the first operation, followed by next.
422  src.init(db);
423 
424  TEST_EQUAL(src.check(5, 1.0), true);
425  TEST(!src.at_end());
426  TEST_EQUAL(src.get_docid(), 5);
427  src.next(1.0);
428  TEST(!src.at_end());
429  TEST_EQUAL(src.get_docid(), 6);
430  }
431  {
432  // Check check() as the first operation, followed by skip_to().
434  src.init(db);
435 
436  TEST_EQUAL(src.check(5, 1.0), true);
437  TEST(!src.at_end());
438  TEST_EQUAL(src.get_docid(), 5);
439  src.skip_to(6, 1.0);
440  TEST(!src.at_end());
441  TEST_EQUAL(src.get_docid(), 6);
442  src.skip_to(7, wt * 2);
443  TEST(src.at_end());
444  }
445 }
446 
447 // A posting source which changes the maximum weight.
450 
451  // Maximum docid that get_weight() should be called for.
453 
454  public:
456  : did(0), maxid_accessed(maxid_accessed_) { }
457 
458  void init(const Xapian::Database &) { did = 0; }
459 
460  double get_weight() const {
461  if (did > maxid_accessed) {
462  FAIL_TEST("ChangeMaxweightPostingSource::get_weight() called "
463  "for docid " + str(did) + ", max id accessed "
464  "should be " + str(maxid_accessed));
465  }
466  return 5 - did;
467  }
468 
469  Xapian::doccount get_termfreq_min() const { return 4; }
470  Xapian::doccount get_termfreq_est() const { return 4; }
471  Xapian::doccount get_termfreq_max() const { return 4; }
472 
473  void next(double) {
474  ++did;
475  set_maxweight(5 - did);
476  }
477 
478  void skip_to(Xapian::docid to_did, double) {
479  did = to_did;
480  set_maxweight(5 - did);
481  }
482 
483  bool at_end() const { return did >= 5; }
484  Xapian::docid get_docid() const { return did; }
485  string get_description() const { return "ChangeMaxweightPostingSource"; }
486 };
487 
488 // Test a posting source with a variable maxweight.
489 DEFINE_TESTCASE(changemaxweightsource1, backend && !remote && !multi) {
490  // The ChangeMaxweightPostingSource doesn't work with multi or remote.
491  Xapian::Database db(get_database("apitest_phrase"));
492  Xapian::Enquire enq(db);
493 
494  {
497 
499  Xapian::Query(&src1), Xapian::Query(&src2));
500  enq.set_query(q);
501  // Set descending docid order so that the matcher isn't able to
502  // terminate early after 4 documents just because weight == maxweight.
503  enq.set_docid_order(enq.DESCENDING);
504 
505  Xapian::MSet mset = enq.get_mset(0, 4);
506  TEST(src1.at_end());
507  mset_expect_order(mset, 1, 2, 3, 4);
508  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
509  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
510  }
511  }
512 
513  {
516 
518  Xapian::Query(&src1), Xapian::Query(&src2));
519  enq.set_query(q);
520 
521  Xapian::MSet mset = enq.get_mset(0, 2);
522  TEST(!src1.at_end());
523  TEST_EQUAL(src1.get_docid(), 3);
524  TEST_EQUAL_DOUBLE(src1.get_maxweight(), 2.0);
525  mset_expect_order(mset, 1, 2);
526  for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
527  TEST_EQUAL_DOUBLE(i.get_weight(), 7.5 - *i);
528  }
529  }
530 }
531 
532 // Test using a valueweightpostingsource which has no entries.
533 DEFINE_TESTCASE(emptyvalwtsource1, backend && !remote && !multi) {
534  Xapian::Database db(get_database("apitest_phrase"));
535  Xapian::Enquire enq(db);
536 
537  Xapian::ValueWeightPostingSource src2(11); // A non-empty slot.
538  Xapian::ValueWeightPostingSource src3(100); // An empty slot.
539  Xapian::Query q1("leav");
540  Xapian::Query q2(&src2);
541  Xapian::Query q3(&src3);
543 
544  // Perform search without ORring with the posting source.
545  Xapian::doccount size1;
546  {
547  enq.set_query(q1);
548  Xapian::MSet mset = enq.get_mset(0, 10);
549  TEST_REL(mset.get_max_possible(), >, 0.0);
550  size1 = mset.size();
551  TEST_REL(size1, >, 0);
552  }
553 
554  // Perform a search with just the non-empty posting source, checking it
555  // returns something.
556  {
557  enq.set_query(q2);
558  Xapian::MSet mset = enq.get_mset(0, 10);
559  TEST_REL(mset.get_max_possible(), >, 0.0);
560  TEST_REL(mset.size(), >, 0);
561  }
562 
563  // Perform a search with just the empty posting source, checking it returns
564  // nothing.
565  {
566  enq.set_query(q3);
567  Xapian::MSet mset = enq.get_mset(0, 10);
568 
569  // get_max_possible() returns 0 here for backends which track the upper
570  // bound on value slot entries, MAX_DBL for backends which don't.
571  // Either is valid.
572  TEST_REL(mset.get_max_possible(), >=, 0.0);
573 
574  TEST_EQUAL(mset.size(), 0);
575  }
576 
577  // Perform a search with the posting source ORred with the normal query.
578  // This is a regression test - it used to return nothing.
579  {
580  enq.set_query(q);
581  Xapian::MSet mset = enq.get_mset(0, 10);
582  TEST_REL(mset.get_max_possible(), >, 0.0);
583  TEST_REL(mset.size(), >, 0.0);
584  TEST_EQUAL(mset.size(), size1);
585  }
586 }
587 
590  public:
591  int & count;
592 
594  : Xapian::DecreasingValueWeightPostingSource(0), count(count_) { }
595 
597  {
598  return new SlowDecreasingValueWeightPostingSource(count);
599  }
600 
601  void next(double min_wt) {
602  sleep(1);
603  ++count;
605  }
606 };
607 
608 static void
610 {
611  for (int wt = 20; wt > 0; --wt) {
612  Xapian::Document doc;
613  doc.add_value(0, Xapian::sortable_serialise(double(wt)));
614  db.add_document(doc);
615  }
616 }
617 
618 // FIXME: This doesn't run for remote databases (we'd need to register
619 // SlowDecreasingValueWeightPostingSource on the remote).
620 DEFINE_TESTCASE(matchtimelimit1, backend && !remote)
621 {
622 #ifndef HAVE_TIMER_CREATE
623  SKIP_TEST("Enquire::set_time_limit() not implemented for this platform");
624 #endif
625  Xapian::Database db = get_database("matchtimelimit1",
627 
628  int count = 0;
630  src.init(db);
631  Xapian::Enquire enquire(db);
632  enquire.set_query(Xapian::Query(&src));
633 
634  enquire.set_time_limit(1.5);
635 
636  Xapian::MSet mset = enquire.get_mset(0, 1, 1000);
637  TEST_EQUAL(mset.size(), 1);
638  TEST_EQUAL(count, 2);
639 }
640 
643  public:
645 
647 
649  Xapian::doccount& doclen_ub_)
650  : Xapian::DecreasingValueWeightPostingSource(0),
651  doclen_lb(doclen_lb_),
652  doclen_ub(doclen_ub_) { }
653 
655  {
656  return new CheckBoundsPostingSource(doclen_lb, doclen_ub);
657  }
658 
659  void init(const Xapian::Database& database) {
660  doclen_lb = database.get_doclength_lower_bound();
661  doclen_ub = database.get_doclength_upper_bound();
663  }
664 };
665 
666 // Test that doclength bounds are correct.
667 // Regression test for bug fixed in 1.2.25 and 1.4.1.
668 DEFINE_TESTCASE(postingsourcebounds1, backend && !remote)
669 {
670  Xapian::Database db = get_database("apitest_simpledata");
671 
672  Xapian::doccount doclen_lb = 0, doclen_ub = 0;
673  CheckBoundsPostingSource ps(doclen_lb, doclen_ub);
674 
675  Xapian::Enquire enquire(db);
676  enquire.set_query(Xapian::Query(&ps));
677 
678  Xapian::MSet mset = enquire.get_mset(0, 1);
679 
680  TEST_EQUAL(doclen_lb, db.get_doclength_lower_bound());
681  TEST_EQUAL(doclen_ub, db.get_doclength_upper_bound());
682 }
683 
684 // PostingSource which really just counts the clone() calls.
685 // Never actually matches anything, but pretends it might.
688 
689  public:
690  CloneTestPostingSource(int& clone_count_)
691  : clone_count(clone_count_)
692  { }
693 
694  PostingSource * clone() const {
695  ++clone_count;
696  return new CloneTestPostingSource(clone_count);
697  }
698 
699  void init(const Xapian::Database&) { }
700 
701  Xapian::doccount get_termfreq_min() const { return 0; }
702 
703  Xapian::doccount get_termfreq_est() const { return 1; }
704 
705  Xapian::doccount get_termfreq_max() const { return 2; }
706 
707  void next(double) { }
708 
709  void skip_to(Xapian::docid, double) { }
710 
711  bool at_end() const {
712  return true;
713  }
714 
715  Xapian::docid get_docid() const { return 0; }
716 
717  string get_description() const { return "CloneTestPostingSource"; }
718 };
719 
721 DEFINE_TESTCASE(postingsourceclone1, !backend)
722 {
723  // This fails with 1.3.5-1.4.0 inclusive.
724  {
725  int clones = 0;
726  CloneTestPostingSource ps(clones);
727  TEST_EQUAL(clones, 0);
728  Xapian::Query q(&ps);
729  TEST_EQUAL(clones, 1);
730  }
731 
732  // Check that clone() isn't needlessly called if reference counting has
733  // been turned on for the PostingSource.
734  {
735  int clones = 0;
737  TEST_EQUAL(clones, 0);
738  Xapian::Query q(ps->release());
739  TEST_EQUAL(clones, 0);
740  }
741 }
742 
745 
747 
749 
750  public:
752 
753  explicit
754  OnlyTheFirstPostingSource(bool allow_clone_) : allow_clone(allow_clone_) {}
755 
756  PostingSource* clone() const {
757  return allow_clone ? new OnlyTheFirstPostingSource(true) : nullptr;
758  }
759 
760  void init(const Xapian::Database& db) {
761  did = 0;
762  if (shard_index == 0) {
763  last_docid = db.get_lastdocid();
764  } else {
765  last_docid = 0;
766  }
767  ++shard_index;
768  }
769 
770  Xapian::doccount get_termfreq_min() const { return 0; }
771 
772  Xapian::doccount get_termfreq_est() const { return last_docid / 2; }
773 
774  Xapian::doccount get_termfreq_max() const { return last_docid; }
775 
776  void next(double wt) {
777  (void)wt;
778  ++did;
779  if (did > last_docid) did = 0;
780  }
781 
782  void skip_to(Xapian::docid to_did, double wt) {
783  (void)wt;
784  did = to_did;
785  if (did > last_docid) did = 0;
786  }
787 
788  bool at_end() const {
789  return did == 0;
790  }
791 
792  Xapian::docid get_docid() const { return did; }
793 
794  string get_description() const { return "OnlyTheFirstPostingSource"; }
795 };
796 
798 
799 DEFINE_TESTCASE(postingsourceshardindex1, multi && !remote) {
800  Xapian::Database db = get_database("apitest_simpledata");
801 
803 
804  Xapian::Enquire enquire(db);
805  {
806  auto ps = new OnlyTheFirstPostingSource(true);
807  enquire.set_query(Xapian::Query(ps->release()));
808 
809  Xapian::MSet mset = enquire.get_mset(0, 10);
810  mset_expect_order(mset, 1, 3, 5);
811  }
812 
813  {
814  /* Regression test for bug fixed in 1.4.12 - we should get an exception
815  * if we use a PostingSource that doesn't support clone() with a multi
816  * DB.
817  */
818  auto ps = new OnlyTheFirstPostingSource(false);
819  enquire.set_query(Xapian::Query(ps->release()));
820 
822  auto m = enquire.get_mset(0, 10));
823  }
824 }
825 
829 
830  public:
832  Xapian::doccount est_,
833  Xapian::doccount ub_)
834  : lb(lb_), est(est_), ub(ub_)
835  { }
836 
837  PostingSource * clone() const { return new EstimatePS(lb, est, ub); }
838 
839  void init(const Xapian::Database &) { }
840 
841  Xapian::doccount get_termfreq_min() const { return lb; }
842 
844 
845  Xapian::doccount get_termfreq_max() const { return ub; }
846 
847  void next(double) {
848  FAIL_TEST("EstimatePS::next() shouldn't be called");
849  }
850 
851  void skip_to(Xapian::docid, double) {
852  FAIL_TEST("EstimatePS::skip_to() shouldn't be called");
853  }
854 
855  bool at_end() const {
856  return false;
857  }
858 
860  FAIL_TEST("EstimatePS::get_docid() shouldn't be called");
861  }
862 
863  string get_description() const { return "EstimatePS"; }
864 };
865 
867 DEFINE_TESTCASE(estimaterounding1, backend && !multi && !remote) {
868  Xapian::Database db = get_database("etext");
869  Xapian::Enquire enquire(db);
870  static const struct { Xapian::doccount lb, est, ub, exp; } testcases[] = {
871  // Test rounding down.
872  {411, 424, 439, 420},
873  {1, 312, 439, 300},
874  // Test rounding up.
875  {411, 426, 439, 430},
876  {123, 351, 439, 400},
877  // Rounding based on estimate size if smaller than range size.
878  {1, 12, 439, 10},
879  // Round "5" away from the nearer bound.
880  {1, 15, 439, 20},
881  {1, 350, 439, 300},
882  // Check we round up if rounding down would be out of range.
883  {411, 416, 439, 420},
884  {411, 412, 439, 420},
885  // Check we round down if rounding up would be out of range.
886  {111, 133, 138, 130},
887  {111, 137, 138, 130},
888  // Check we don't round if either way would be out of range.
889  {411, 415, 419, 415},
890  // Leave small estimates alone.
891  {1, 6, 439, 6},
892  };
893  for (auto& t : testcases) {
894  EstimatePS ps(t.lb, t.est, t.ub);
895  enquire.set_query(Xapian::Query(&ps));
896  Xapian::MSet mset = enquire.get_mset(0, 0);
897  // MSet::get_description() includes bounds and raw estimate.
898  tout << mset.get_description() << '\n';
899  TEST_EQUAL(mset.get_matches_estimated(), t.exp);
900  }
901 }
PostingSource * clone() const
Clone the posting source.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
string get_description() const
Return a string describing this object.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
OnlyTheFirstPostingSource(bool allow_clone_)
void next(double wt)
Advance the current position to the next matching document.
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:850
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
This class is used to access a database, or a group of databases.
Definition: database.h:68
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:325
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:856
Xapian::docid get_docid() const
Return the current docid.
EstimatePS(Xapian::doccount lb_, Xapian::doccount est_, Xapian::doccount ub_)
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
void next(double wt)
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
SlowDecreasingValueWeightPostingSource * clone() const
Clone the posting source.
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount ub
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
Definition: omdatabase.cc:401
void next(double min_wt)
Advance the current position to the next matching document.
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:300
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
bool at_end() const
Return true if the current position is past the last entry in this list.
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
PostingSource * clone() const
Clone the posting source.
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Convert types to std::string.
void skip_to(Xapian::docid, double)
Advance to the specified docid.
static double est(double l, double r, double n)
Definition: orpostlist.cc:306
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1365
PostingSource * clone() const
Clone the posting source.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
MyOddWeightingPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
ChangeMaxweightPostingSource(Xapian::docid maxid_accessed_)
Xapian::doccount & doclen_ub
string get_description() const
Return a string describing this object.
PostingSource * clone() const
Clone the posting source.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount num_docs
Xapian::doccount last_docid
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Read weights from a value which is known to decrease as docid increases.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
test functionality of the Xapian API
void next(double)
Advance the current position to the next matching document.
string get_description() const
Return a string describing this object.
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
void next(double wt)
Advance the current position to the next matching document.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
MyOddWeightingPostingSource(const Xapian::Database &db)
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Definition: omdatabase.cc:421
DEFINE_TESTCASE(externalsource1, backend &&!remote &&!multi)
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
void next(double)
Advance the current position to the next matching document.
void init(const Xapian::Database &database)
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
bool at_end() const
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid, double)
Advance to the specified docid.
This class provides read/write access to a database.
Definition: database.h:789
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:368
Scale the weight contributed by a subquery.
Definition: query.h:166
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
bool at_end() const
Return true if the current position is past the last entry in this list.
Public interfaces for the Xapian library.
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
string get_description() const
Return a string describing this object.
PostingSource * clone() const
Clone the posting source.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
A posting source which returns a fixed weight for all documents.
void set_time_limit(double time_limit)
Set a time limit for the match.
Definition: omenquire.cc:926
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::docid get_docid() const
Return the current docid.
string get_description() const
Return a string describing this object.
PostingSource * clone() const
Clone the posting source.
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
double get_weight() const
Return the weight contribution for the current document.
MyOddPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
string get_description() const
Return a string describing this object.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
void init(const Xapian::Database &db)
Set this PostingSource to the start of the list of postings.
static Xapian::doccount shard_index
bool at_end() const
Return true if the current position is past the last entry in this list.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
Xapian::docid get_docid() const
Return the current docid.
double get_weight() const
Return the weight contribution for the current document.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
CheckBoundsPostingSource(Xapian::doccount &doclen_lb_, Xapian::doccount &doclen_ub_)
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
A posting source which reads weights from a value slot.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
CheckBoundsPostingSource * clone() const
Clone the posting source.
Match only documents which all subqueries match.
Definition: query.h:84
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
MyOddPostingSource(const Xapian::Database &db)
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:74
void skip_to(Xapian::docid to_did, double wt)
Advance to the specified docid.
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
CloneTestPostingSource(int &clone_count_)
void init(const Xapian::Database &db_)
Set this PostingSource to the start of the list of postings.
bool at_end() const
Return true if the current position is past the last entry in this list.
void next(double)
Advance the current position to the next matching document.
Xapian::doccount & doclen_lb
PostingSource subclass for injecting tf bounds and estimate.
Xapian::docid get_docid() const
Return the current docid.
Match documents which at least one subquery matches.
Definition: query.h:92
Xapian-specific test helper functions and macros.
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
MyDontAskWeightPostingSource(Xapian::doccount num_docs_, Xapian::doccount last_docid_)
void init(const Xapian::Database &db)
Set this PostingSource to the start of the list of postings.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
PostingSource * release()
Start reference counting this object.
<unistd.h>, but with compat.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
double get_maxweight() const
Return the currently set upper bound on what get_weight() can return.
void next(double min_wt)
Advance the current position to the next matching document.
Xapian::docid get_docid() const
Return the current docid.
void next(double min_wt)
Advance the current position to the next matching document.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
static void make_matchtimelimit1_db(Xapian::WritableDatabase &db, const string &)
double get_weight() const
Return the weight contribution for the current document.
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
void next(double wt)
Advance the current position to the next matching document.
void skip_to(Xapian::docid to_did, double)
Advance to the specified docid.
A handle representing a document in a Xapian database.
Definition: document.h:61
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
string get_description() const
Return a string describing this object.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
docids sort in descending order.
Definition: enquire.h:328
static const testcase testcases[]
Definition: api_unicode.cc:39