xapian-core  2.0.0
api_percentages.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008,2009 Lemur Consulting Ltd
5  * Copyright (C) 2008,2009,2010,2011,2012,2014,2024 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "api_percentages.h"
25 
26 #define XAPIAN_DEPRECATED(X) X
27 #include <xapian.h>
28 
29 #include "apitest.h"
30 #include "str.h"
31 #include "testutils.h"
32 
33 #include <cfloat>
34 
35 using namespace std;
36 
37 // Test that percentages reported are the same regardless of which part of the
38 // mset is returned, for sort-by-value search. Regression test for bug#216 in
39 // 1.0.10 and earlier with returned percentages.
40 DEFINE_TESTCASE(consistency3, backend) {
41  Xapian::Database db(get_database("apitest_sortconsist"));
42  Xapian::Enquire enquire(db);
43  enquire.set_query(Xapian::Query("foo"));
44  enquire.set_sort_by_value(1, 0);
45  Xapian::doccount lots = 3;
46  Xapian::MSet bigmset = enquire.get_mset(0, lots);
47  TEST_EQUAL(bigmset.size(), lots);
48  for (Xapian::doccount start = 0; start < lots; ++start) {
49  tout << *bigmset[start] << ":" << bigmset[start].get_weight() << ":"
50  << bigmset[start].get_percent() << "%\n";
51  for (Xapian::doccount size = 0; size < lots - start; ++size) {
52  Xapian::MSet mset = enquire.get_mset(start, size);
53  if (mset.size()) {
54  TEST_EQUAL(start + mset.size(),
55  min(start + size, bigmset.size()));
56  } else if (size) {
57  TEST(start >= bigmset.size());
58  }
59  for (Xapian::doccount i = 0; i < mset.size(); ++i) {
60  TEST_EQUAL(*mset[i], *bigmset[start + i]);
61  TEST_EQUAL_DOUBLE(mset[i].get_weight(),
62  bigmset[start + i].get_weight());
63  TEST_EQUAL_DOUBLE(mset[i].get_percent(),
64  bigmset[start + i].get_percent());
65  }
66  }
67  }
68 }
69 
71  vector<pair<Xapian::docid, double>> weights;
72  vector<pair<Xapian::docid, double>>::const_iterator i;
73  bool started;
74 
75  MyPostingSource(const vector<pair<Xapian::docid, double>>& weights_,
76  double max_wt)
77  : weights(weights_), started(false)
78  {
79  set_maxweight(max_wt);
80  }
81 
82  public:
83  MyPostingSource() : started(false) { }
84 
85  PostingSource* clone() const override {
86  return new MyPostingSource(weights, get_maxweight());
87  }
88 
89  void append_docweight(Xapian::docid did, double wt) {
90  weights.push_back(make_pair(did, wt));
91  if (wt > get_maxweight()) set_maxweight(wt);
92  }
93 
94  void reset(const Xapian::Database&, Xapian::doccount) override {
95  started = false;
96  }
97 
98  double get_weight() const override { return i->second; }
99 
101  return weights.size();
102  }
104  return weights.size();
105  }
107  return weights.size();
108  }
109 
110  void next(double /*wt*/) override {
111  if (!started) {
112  i = weights.begin();
113  started = true;
114  } else {
115  ++i;
116  }
117  }
118 
119  bool at_end() const override {
120  return (i == weights.end());
121  }
122 
123  Xapian::docid get_docid() const override { return i->first; }
124 
125  string get_description() const override {
126  return "MyPostingSource";
127  }
128 };
129 
131 DEFINE_TESTCASE(pctcutoff4, backend && !remote && !multi) {
132  // Find the number of DBL_EPSILONs to subtract which result in the
133  // percentage of the second hit being 49% instead of 50%.
134  int epsilons = 0;
135  Xapian::Database db(get_database("apitest_simpledata"));
136  Xapian::Enquire enquire(db);
137  while (true) {
138  MyPostingSource source;
139  source.append_docweight(1, 100);
140  source.append_docweight(2, 50 - epsilons * DBL_EPSILON);
141  enquire.set_query(Xapian::Query(&source));
142  Xapian::MSet mset = enquire.get_mset(0, 10);
143  TEST_EQUAL(mset.size(), 2);
144  if (mset[1].get_percent() != 50) break;
145  ++epsilons;
146  }
147 
148  // Make a set of document weights including ones on either side of the
149  // 49% / 50% boundary.
150  MyPostingSource source;
151  source.append_docweight(1, 100);
152  source.append_docweight(2, 50);
153  source.append_docweight(3, 50 - (epsilons - 1) * DBL_EPSILON);
154  source.append_docweight(4, 50 - epsilons * DBL_EPSILON);
155  source.append_docweight(5, 25);
156 
157  enquire.set_query(Xapian::Query(&source));
158  Xapian::MSet mset1 = enquire.get_mset(0, 10);
159  TEST_EQUAL(mset1.size(), 5);
160  TEST_EQUAL(mset1[2].get_percent(), 50);
161  TEST_EQUAL(mset1[3].get_percent(), 49);
162 
163  // Use various different percentage cutoffs, and check that the values
164  // returned are as expected.
165  int percent = 100;
166  for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
167  int new_percent = mset1.convert_to_percent(i);
168  tout << "mset1 item = " << i.get_percent() << "%\n";
169  if (new_percent != percent) {
170  enquire.set_cutoff(percent);
171  Xapian::MSet mset2 = enquire.get_mset(0, 10);
172  tout << "cutoff = " << percent << "%, "
173  "mset size = " << mset2.size() << "\n";
174  TEST_EQUAL(mset2.size(), i.get_rank());
175  percent = new_percent;
176  }
177  }
178 }
179 
181 DEFINE_TESTCASE(pctcutoff5, backend) {
182  Xapian::Database db(get_database("apitest_simpledata"));
183  Xapian::Enquire enquire(db);
184  enquire.set_query(Xapian::Query("test"));
185  enquire.set_cutoff(42);
186  Xapian::MSet mset;
187 
188  enquire.set_sort_by_value(0, false);
189  TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
190 
191  enquire.set_sort_by_value(0, true);
192  TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
193 
194  enquire.set_sort_by_value_then_relevance(0, false);
195  TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
196 
197  enquire.set_sort_by_value_then_relevance(0, true);
198  TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
199 }
200 
201 // Regression test for bug fixed in 1.0.14.
202 DEFINE_TESTCASE(topercent3, backend) {
203  Xapian::Database db = get_database("apitest_simpledata");
204  Xapian::Enquire enquire(db);
205  enquire.set_sort_by_value(1, false);
206 
207  static const char * const terms[] = { "paragraph", "banana" };
208  enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 2));
209 
210  Xapian::MSet mset = enquire.get_mset(0, 20);
211 
213  for (i = mset.begin(); i != mset.end(); ++i) {
214  // We should never achieve 100%.
215  TEST_REL(i.get_percent(),<,100);
216  }
217 }
218 
219 // Regression test for bug introduced temporarily by the "percent without
220 // termlist" patch.
221 DEFINE_TESTCASE(topercent4, backend) {
222  Xapian::Enquire enquire(get_database("apitest_simpledata"));
223 
225  Xapian::Query("paragraph"),
226  Xapian::Query("queri"));
228  query, Xapian::Query("rubbish"));
229 
230  enquire.set_query(query);
231  Xapian::MSet mset = enquire.get_mset(0, 10);
232 
233  // We should get 50% not 33%.
234  TEST(!mset.empty());
235  TEST_EQUAL(mset[0].get_percent(), 50);
236 }
237 
239 DEFINE_TESTCASE(topercent5, backend) {
240  Xapian::Enquire enquire(get_database("apitest_simpledata"));
242  Xapian::Query("paragraph"), Xapian::Query("xyzzy"));
243  enquire.set_query(q);
244  Xapian::MSet mset = enquire.get_mset(0, 10);
245  TEST(!mset.empty());
246  TEST(mset[0].get_percent() < 100);
247  // It would be odd if the non-existent term was worth more, but in 1.0.x
248  // the top hit got 4% in this testcase. In 1.2.x it gets 50%, which is
249  // better, but >50% would be more natural.
250  TEST_REL(mset[0].get_percent(), >=, 50);
251 
252  // Repeat tests with TradWeight.
254  mset = enquire.get_mset(0, 10);
255  TEST(!mset.empty());
256  TEST(mset[0].get_percent() < 100);
257  TEST_REL(mset[0].get_percent(), >=, 50);
258 }
259 
261 // Regression test for bug#590 fixed in 1.3.1 and 1.2.10.
262 DEFINE_TESTCASE(topercent6, backend) {
263  Xapian::Enquire enquire(get_database("apitest_simpledata"));
265  Xapian::Query("rubbish"), Xapian::Query("letter"));
266  enquire.set_query(q);
267  Xapian::MSet mset = enquire.get_mset(0, 10);
268  TEST(!mset.empty());
269  TEST(mset[0].get_percent() < 100);
270 
271  q = Xapian::Query(q.OP_FILTER, q, Xapian::Query("this"));
272  enquire.set_query(q);
273  Xapian::MSet mset2 = enquire.get_mset(0, 10);
274  TEST(!mset2.empty());
275  TEST_EQUAL(mset[0].get_percent(), mset2[0].get_percent());
276 }
277 
278 static void
280 {
281  for (int i = 1; i <= 6; ++i) {
283  d.set_data(str(i));
284  d.add_term("boom", 2 + (i - 4)*(i - 2));
285  if (i != 5)
286  d.add_boolean_term("XCAT122");
287  db.add_document(d);
288  }
289  db.commit();
290 }
291 
293 // Regression test for bug introduced in 1.2.10 by the original fix for #590,
294 // and fixed in 1.2.13 (and in trunk before 1.3.1 was released).
295 DEFINE_TESTCASE(topercent7, backend) {
297 
298  Xapian::Query q;
299  q = Xapian::Query(q.OP_OR, Xapian::Query("tomb"), Xapian::Query("boom"));
300  q = Xapian::Query(q.OP_AND, q, Xapian::Query("XCAT122"));
301 
302  Xapian::Enquire enq(db);
303  enq.set_query(q);
304  Xapian::MSet m = enq.get_mset(0, 10);
305  TEST(!m.empty());
306  TEST_REL(m[0].get_percent(),>,60);
307 }
308 
309 class ZWeight : public Xapian::Weight {
310  public:
312  need_stat(DOC_LENGTH);
313  }
314 
315  void init(double) override { }
316 
317  Weight* clone() const override {
318  return new ZWeight();
319  }
320 
324  Xapian::termcount) const override {
325  return 0.0;
326  }
327 
328  double get_maxpart() const override {
329  return 0.0;
330  }
331 
334  Xapian::termcount) const override {
335  return 1.0 / doclen;
336  }
337 
338  double get_maxextra() const override {
339  return 1.0;
340  }
341 };
342 
344 DEFINE_TESTCASE(checkzeromaxpartopt1, backend && !remote) {
345  Xapian::Database db = get_database("apitest_simpledata");
346  Xapian::Enquire enquire(db);
347  // "this" indexes all documents, so will get replaced with MatchAll
348  // internally.
349  static const char * const terms[] = { "this", "spoken", "blank" };
350  enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 3));
351  ZWeight wt;
352  enquire.set_weighting_scheme(wt);
353  Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
354  // No documents match all 3 terms, so the score shouldn't be 100%.
355  TEST(mset[0].get_percent() != 100);
356  // Make sure the percentage score isn't 0 or 1 though.
357  TEST_REL(mset[0].get_percent(), >, 1);
358 }
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
static void make_topercent7_db(Xapian::WritableDatabase &db, const string &)
DEFINE_TESTCASE(consistency3, backend)
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
void reset(const Xapian::Database &, Xapian::doccount) override
Set this PostingSource to the start of the list of postings.
MyPostingSource(const vector< pair< Xapian::docid, double >> &weights_, double max_wt)
vector< pair< Xapian::docid, double > >::const_iterator i
Xapian::docid get_docid() const override
Return the current docid.
vector< pair< Xapian::docid, double > > weights
string get_description() const override
Return a string describing this object.
bool at_end() const override
Return true if the current position is past the last entry in this list.
void append_docweight(Xapian::docid did, double wt)
PostingSource * clone() const override
Clone the posting source.
void next(double) override
Advance the current position to the next matching document.
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
double get_weight() const override
Return the weight contribution for the current document.
An indexed database of documents.
Definition: database.h:75
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
Class representing a document.
Definition: document.h:64
void add_boolean_term(std::string_view term)
Add a boolean filter term to the document.
Definition: document.h:145
void set_data(std::string_view data)
Set the document data.
Definition: document.cc:81
void add_term(std::string_view term, Xapian::termcount wdf_inc=1)
Add a term to this document.
Definition: document.cc:87
Querying session.
Definition: enquire.h:57
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
Definition: enquire.cc:85
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
void set_sort_by_value_then_relevance(valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: enquire.cc:123
void set_cutoff(int percent_threshold, double weight_threshold=0)
Set lower bounds on percentage and/or weight.
Definition: enquire.cc:172
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
void set_sort_by_value(valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: enquire.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:535
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:711
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: mset.cc:275
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:467
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
Class representing a query.
Definition: query.h:45
@ OP_XOR
Match documents which an odd number of subqueries match.
Definition: query.h:107
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:1297
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
Abstract base class for weighting schemes.
Definition: weight.h:38
This class provides read/write access to a database.
Definition: database.h:964
void commit()
Commit pending modifications.
Definition: database.cc:543
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
Weight * clone() const override
Clone this object.
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
double get_sumextra(Xapian::termcount doclen, Xapian::termcount, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount, Xapian::termcount) const override
Calculate the weight contribution for this object's term to a document.
void init(double) override
Allow the subclass to perform any initialisation it needs to.
#define false
Definition: header.h:9
string str(int value)
Convert int to std::string.
Definition: str.cc:91
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Convert types to std::string.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:293
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:112
Public interfaces for the Xapian library.