xapian-core  2.0.0
api_anydb.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2024 Olly Betts
7  * Copyright 2006,2008 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, see
22  * <https://www.gnu.org/licenses/>.
23  */
24 
25 #include <config.h>
26 
27 #include "api_anydb.h"
28 
29 #include <algorithm>
30 #include <string>
31 
32 #define XAPIAN_DEPRECATED(X) X
33 #include <xapian.h>
34 #include "testsuite.h"
35 #include "testutils.h"
36 
37 #include "apitest.h"
38 
39 #include <list>
40 
41 using namespace std;
42 
43 static void
45 {
46  Xapian::MSetIterator i = mset.begin();
47  for ( ; i != mset.end(); ++i) {
48  tout << " " << i.get_weight();
49  }
50 }
51 
52 static void
54 {
55  Xapian::MSetIterator i = mset.begin();
56  for ( ; i != mset.end(); ++i) {
57  tout << " " << mset.convert_to_percent(i);
58  }
59 }
60 
61 static Xapian::Query
63  const string & t1 = string(), const string & t2 = string(),
64  const string & t3 = string(), const string & t4 = string(),
65  const string & t5 = string(), const string & t6 = string(),
66  const string & t7 = string(), const string & t8 = string(),
67  const string & t9 = string(), const string & t10 = string())
68 {
69  vector<string> v;
70  Xapian::Stem stemmer("english");
71  if (!t1.empty()) v.push_back(stemmer(t1));
72  if (!t2.empty()) v.push_back(stemmer(t2));
73  if (!t3.empty()) v.push_back(stemmer(t3));
74  if (!t4.empty()) v.push_back(stemmer(t4));
75  if (!t5.empty()) v.push_back(stemmer(t5));
76  if (!t6.empty()) v.push_back(stemmer(t6));
77  if (!t7.empty()) v.push_back(stemmer(t7));
78  if (!t8.empty()) v.push_back(stemmer(t8));
79  if (!t9.empty()) v.push_back(stemmer(t9));
80  if (!t10.empty()) v.push_back(stemmer(t10));
81  return Xapian::Query(op, v.begin(), v.end());
82 }
83 
84 static Xapian::Query
86  const string & t1 = string(), const string & t2 = string(),
87  const string & t3 = string(), const string & t4 = string(),
88  const string & t5 = string(), const string & t6 = string(),
89  const string & t7 = string(), const string & t8 = string(),
90  const string & t9 = string(), const string & t10 = string())
91 {
92  vector<string> v;
93  Xapian::Stem stemmer("english");
94  if (!t1.empty()) v.push_back(stemmer(t1));
95  if (!t2.empty()) v.push_back(stemmer(t2));
96  if (!t3.empty()) v.push_back(stemmer(t3));
97  if (!t4.empty()) v.push_back(stemmer(t4));
98  if (!t5.empty()) v.push_back(stemmer(t5));
99  if (!t6.empty()) v.push_back(stemmer(t6));
100  if (!t7.empty()) v.push_back(stemmer(t7));
101  if (!t8.empty()) v.push_back(stemmer(t8));
102  if (!t9.empty()) v.push_back(stemmer(t9));
103  if (!t10.empty()) v.push_back(stemmer(t10));
104  return Xapian::Query(op, v.begin(), v.end(), parameter);
105 }
106 
107 static Xapian::Query
108 query(const string &t)
109 {
110  return Xapian::Query(Xapian::Stem("english")(t));
111 }
112 
113 // #######################################################################
114 // # Tests start here
115 
116 // tests that the backend doesn't return zero docids
117 DEFINE_TESTCASE(zerodocid1, backend) {
118  // open the database (in this case a simple text file
119  // we prepared earlier)
120 
121  Xapian::Database mydb(get_database("apitest_onedoc"));
122 
123  Xapian::Enquire enquire(mydb);
124 
125  // make a simple query, with one word in it - "word".
126  enquire.set_query(Xapian::Query("word"));
127 
128  // retrieve the top ten results (we only expect one)
129  Xapian::MSet mymset = enquire.get_mset(0, 10);
130 
131  // We've done the query, now check that the result is what
132  // we expect (1 document, with non-zero docid)
133  TEST_MSET_SIZE(mymset, 1);
134 
135  TEST_AND_EXPLAIN(*(mymset.begin()) != 0,
136  "A query on a database returned a zero docid");
137 }
138 
139 // tests that an empty query returns no matches
140 DEFINE_TESTCASE(emptyquery1, backend) {
141  Xapian::Enquire enquire(get_database("apitest_simpledata"));
142 
143  enquire.set_query(Xapian::Query());
144  Xapian::MSet mymset = enquire.get_mset(0, 10);
145  TEST_MSET_SIZE(mymset, 0);
146  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
147  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
148  TEST_EQUAL(mymset.get_matches_estimated(), 0);
152 
153  vector<Xapian::Query> v;
154  enquire.set_query(Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()));
155  mymset = enquire.get_mset(0, 10);
156  TEST_MSET_SIZE(mymset, 0);
157  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
158  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
159  TEST_EQUAL(mymset.get_matches_estimated(), 0);
163 }
164 
165 // tests the document count for a simple query
166 DEFINE_TESTCASE(simplequery1, backend) {
167  Xapian::Enquire enquire(get_database("apitest_simpledata"));
168  enquire.set_query(Xapian::Query("word"));
169  Xapian::MSet mymset = enquire.get_mset(0, 10);
170  TEST_MSET_SIZE(mymset, 2);
171 }
172 
173 // tests for the right documents and weights returned with simple query
174 DEFINE_TESTCASE(simplequery2, backend) {
175  // open the database (in this case a simple text file
176  // we prepared earlier)
177  Xapian::Database db = get_database("apitest_simpledata");
178  Xapian::Enquire enquire(db);
179  enquire.set_query(Xapian::Query("word"));
180 
181  // retrieve the top results
182  Xapian::MSet mymset = enquire.get_mset(0, 10);
183 
184  // We've done the query, now check that the result is what
185  // we expect (documents 2 and 4)
186  mset_expect_order(mymset, 2, 4);
187 
188  // Check the weights
189  Xapian::MSetIterator i = mymset.begin();
190  // These weights are for BM25Weight(1,0,1,0.5,0.5)
191  TEST_EQUAL_DOUBLE(i.get_weight(), 1.04648168717725);
192  i++;
193  TEST_EQUAL_DOUBLE(i.get_weight(), 0.640987686595914);
194 }
195 
196 // tests for the right document count for another simple query
197 DEFINE_TESTCASE(simplequery3, backend) {
198  Xapian::Enquire enquire(get_database("apitest_simpledata"));
199  enquire.set_query(query("this"));
200  Xapian::MSet mymset = enquire.get_mset(0, 10);
201 
202  // Check that 6 documents were returned.
203  TEST_MSET_SIZE(mymset, 6);
204 }
205 
206 // test that a multidb with 3 dbs query returns correct docids
207 DEFINE_TESTCASE(multidb2, backend && !multi) {
208  Xapian::Database mydb2(get_database("apitest_simpledata"));
209  mydb2.add_database(get_database("apitest_simpledata2"));
210  mydb2.add_database(get_database("apitest_termorder"));
211  Xapian::Enquire enquire(mydb2);
212 
213  // make a query
214  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
216  enquire.set_query(myquery);
217 
218  // retrieve the top ten results
219  Xapian::MSet mymset = enquire.get_mset(0, 10);
220  mset_expect_order(mymset, 2, 3, 4, 10);
221 }
222 
223 // tests that when specifying maxitems to get_mset, no more than
224 // that are returned.
225 DEFINE_TESTCASE(msetmaxitems1, backend) {
226  Xapian::Enquire enquire(get_database("apitest_simpledata"));
227  enquire.set_query(query("this"));
228  Xapian::MSet mymset = enquire.get_mset(0, 1);
229  TEST_MSET_SIZE(mymset, 1);
230 
231  mymset = enquire.get_mset(0, 5);
232  TEST_MSET_SIZE(mymset, 5);
233 }
234 
235 // tests the returned weights are as expected (regression test for remote
236 // backend which was using the average weight rather than the actual document
237 // weight for computing weights - fixed in 1.0.0).
238 DEFINE_TESTCASE(expandweights1, backend) {
239  Xapian::Enquire enquire(get_database("apitest_simpledata"));
240  enquire.set_query(Xapian::Query("this"));
241 
242  Xapian::MSet mymset = enquire.get_mset(0, 10);
243 
244  Xapian::RSet myrset;
245  Xapian::MSetIterator i = mymset.begin();
246  myrset.add_document(*i);
247  myrset.add_document(*(++i));
248 
249  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
250  TEST_EQUAL(eset.size(), 3);
251  TEST_REL(eset.get_ebound(), >=, eset.size());
252  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
253  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
254  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
255 
256  // Test non-default k too.
257  enquire.set_expansion_scheme("prob", 2.0);
258  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
259  TEST_EQUAL(eset.size(), 3);
260  TEST_REL(eset.get_ebound(), >=, eset.size());
261  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 5.88109547674955);
262  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 5.88109547674955);
263  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.44473599216144);
264 }
265 
266 // Just like test_expandweights1 but without USE_EXACT_TERMFREQ.
267 DEFINE_TESTCASE(expandweights2, backend) {
268  Xapian::Enquire enquire(get_database("apitest_simpledata"));
269  enquire.set_query(Xapian::Query("this"));
270 
271  Xapian::MSet mymset = enquire.get_mset(0, 10);
272 
273  Xapian::RSet myrset;
274  Xapian::MSetIterator i = mymset.begin();
275  myrset.add_document(*i);
276  myrset.add_document(*(++i));
277 
278  Xapian::ESet eset = enquire.get_eset(3, myrset);
279  TEST_EQUAL(eset.size(), 3);
280  TEST_REL(eset.get_ebound(), >=, eset.size());
281  // With a multi backend, the top three terms all happen to occur in both
282  // shard so their termfreq is exactly known even without
283  // USE_EXACT_TERMFREQ and so the weights should be the same for all
284  // test harness backends.
285  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
286  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
287  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
288 }
289 
290 DEFINE_TESTCASE(expandweights3, backend) {
291  Xapian::Enquire enquire(get_database("apitest_simpledata"));
292  enquire.set_query(Xapian::Query("this"));
293 
294  Xapian::MSet mymset = enquire.get_mset(0, 10);
295 
296  Xapian::RSet myrset;
297  Xapian::MSetIterator i = mymset.begin();
298  myrset.add_document(*i);
299  myrset.add_document(*(++i));
300 
301  // Set min_wt to 6.0
302  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 0, 6.0);
303  TEST_EQUAL(eset.size(), 2);
304  TEST_REL(eset.get_ebound(), >=, eset.size());
305  // With a multi backend, the top two terms all happen to occur in both
306  // shard so their termfreq is exactly known even without
307  // USE_EXACT_TERMFREQ and so the weights should be the same for all
308  // test harness backends.
309  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
310  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
311 }
312 
313 // tests that negative weights are returned
314 DEFINE_TESTCASE(expandweights4, backend) {
315  Xapian::Enquire enquire(get_database("apitest_simpledata"));
316  enquire.set_query(Xapian::Query("paragraph"));
317 
318  Xapian::MSet mymset = enquire.get_mset(0, 10);
319 
320  Xapian::RSet myrset;
321  Xapian::MSetIterator i = mymset.begin();
322  myrset.add_document(*i);
323  myrset.add_document(*(++i));
324 
325  Xapian::ESet eset = enquire.get_eset(37, myrset, 0, 0, -100);
326  // Now include negative weights
327  TEST_EQUAL(eset.size(), 37);
328  TEST_REL(eset.get_ebound(), >=, eset.size());
329  TEST_REL(eset[36].get_weight(), <, 0);
330  TEST_REL(eset[36].get_weight(), >=, -100);
331 }
332 
333 // test for Bo1EWeight
334 DEFINE_TESTCASE(expandweights5, backend) {
335  Xapian::Enquire enquire(get_database("apitest_simpledata"));
336  enquire.set_query(Xapian::Query("this"));
337 
338  Xapian::MSet mymset = enquire.get_mset(0, 10);
339 
340  Xapian::RSet myrset;
341  Xapian::MSetIterator i = mymset.begin();
342  myrset.add_document(*i);
343  myrset.add_document(*(++i));
344 
345  enquire.set_expansion_scheme("bo1");
346  Xapian::ESet eset = enquire.get_eset(3, myrset);
347 
348  TEST_EQUAL(eset.size(), 3);
349  TEST_REL(eset.get_ebound(), >=, eset.size());
350  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 7.21765284821702);
351  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.661623193760022);
352  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.58090119783738);
353 }
354 
355 // test that "prob" and "trad" can be set as the expansion scheme.
356 DEFINE_TESTCASE(expandweights6, backend) {
357  Xapian::Enquire enquire(get_database("apitest_simpledata"));
358  enquire.set_query(Xapian::Query("this"));
359 
360  Xapian::MSet mymset = enquire.get_mset(0, 10);
361 
362  Xapian::RSet myrset;
363  Xapian::MSetIterator i = mymset.begin();
364  myrset.add_document(*i);
365  myrset.add_document(*(++i));
366 
367  enquire.set_expansion_scheme("prob");
368  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
369 
370  TEST_EQUAL(eset.size(), 3);
371  TEST_REL(eset.get_ebound(), >=, eset.size());
372  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
373  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
374  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
375 
376  // Test deprecated scheme name "trad" (alias for "prob").
377  enquire.set_expansion_scheme("trad");
378  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
379 
380  TEST_EQUAL(eset.size(), 3);
381  TEST_REL(eset.get_ebound(), >=, eset.size());
382  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
383  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
384  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
385 }
386 
387 // test that invalid scheme names are not accepted
388 DEFINE_TESTCASE(expandweights7, backend) {
389  Xapian::Enquire enquire(get_database("apitest_simpledata"));
390 
392  enquire.set_expansion_scheme("no_such_scheme"));
393 }
394 
395 // test that "expand_k" can be passed as a parameter to get_eset
396 DEFINE_TESTCASE(expandweights8, backend) {
397  Xapian::Enquire enquire(get_database("apitest_simpledata"));
398  enquire.set_query(Xapian::Query("this"));
399 
400  Xapian::MSet mymset = enquire.get_mset(0, 10);
401 
402  Xapian::RSet myrset;
403  Xapian::MSetIterator i = mymset.begin();
404  myrset.add_document(*i);
405  myrset.add_document(*(++i));
406 
407  // Set expand_k to 1.0 and min_wt to 0
408  enquire.set_expansion_scheme("prob", 1.0);
409  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 0, 0);
410  // With a multi backend, the top three terms all happen to occur in both
411  // shard so their termfreq is exactly known even without
412  // USE_EXACT_TERMFREQ and so the weights should be the same for all
413  // test harness backends.
414  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
415  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
416  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
417  TEST_REL(eset.back().get_weight(),>=,0);
418 }
419 
420 // tests that when specifying maxitems to get_eset, no more than
421 // that are returned.
422 DEFINE_TESTCASE(expandmaxitems1, backend) {
423  Xapian::Enquire enquire(get_database("apitest_simpledata"));
424  enquire.set_query(Xapian::Query("this"));
425 
426  Xapian::MSet mymset = enquire.get_mset(0, 10);
427  tout << "mymset.size() = " << mymset.size() << '\n';
428  TEST(mymset.size() >= 2);
429 
430  Xapian::RSet myrset;
431  Xapian::MSetIterator i = mymset.begin();
432  myrset.add_document(*i);
433  myrset.add_document(*(++i));
434 
435  Xapian::ESet myeset = enquire.get_eset(1, myrset);
436  TEST_EQUAL(myeset.size(), 1);
437  TEST_REL(myeset.get_ebound(), >=, myeset.size());
438 }
439 
440 // tests that a pure boolean query has all weights set to 0
441 DEFINE_TESTCASE(boolquery1, backend) {
442  Xapian::Query myboolquery(query("this"));
443 
444  // open the database (in this case a simple text file
445  // we prepared earlier)
446  Xapian::Enquire enquire(get_database("apitest_simpledata"));
447  enquire.set_query(myboolquery);
449 
450  // retrieve the top results
451  Xapian::MSet mymset = enquire.get_mset(0, 10);
452 
453  TEST_NOT_EQUAL(mymset.size(), 0);
454  TEST_EQUAL(mymset.get_max_possible(), 0);
455  for (Xapian::MSetIterator i = mymset.begin(); i != mymset.end(); ++i) {
456  TEST_EQUAL(i.get_weight(), 0);
457  }
458 }
459 
460 // tests that get_mset() specifying "this" works as expected
461 DEFINE_TESTCASE(msetfirst1, backend) {
462  Xapian::Enquire enquire(get_database("apitest_simpledata"));
463  enquire.set_query(query("this"));
464  Xapian::MSet mymset1 = enquire.get_mset(0, 6);
465  Xapian::MSet mymset2 = enquire.get_mset(3, 3);
466  TEST(mset_range_is_same(mymset1, 3, mymset2, 0, 3));
467 
468  // Regression test - we weren't adjusting the index into items[] by
469  // firstitem in api/omenquire.cc.
470  TEST_EQUAL(mymset1[5].get_document().get_data(),
471  mymset2[2].get_document().get_data());
472 }
473 
474 // tests the converting-to-percent functions
475 DEFINE_TESTCASE(topercent1, backend) {
476  Xapian::Enquire enquire(get_database("apitest_simpledata"));
477  enquire.set_query(query("this"));
478  Xapian::MSet mymset = enquire.get_mset(0, 20);
479 
480  int last_pct = 100;
481  Xapian::MSetIterator i = mymset.begin();
482  for ( ; i != mymset.end(); ++i) {
483  int pct = mymset.convert_to_percent(i);
484  TEST_AND_EXPLAIN(pct == i.get_percent(),
485  "convert_to_%(msetitor) != convert_to_%(wt)");
487  "convert_to_%(msetitor) != convert_to_%(wt)");
488  TEST_AND_EXPLAIN(pct >= 0 && pct <= 100,
489  "percentage out of range: " << pct);
490  TEST_AND_EXPLAIN(pct <= last_pct, "percentage increased down mset");
491  last_pct = pct;
492  }
493 }
494 
495 // tests the percentage values returned
496 DEFINE_TESTCASE(topercent2, backend) {
497  Xapian::Enquire enquire(get_database("apitest_simpledata"));
498 
499  int pct;
500 
501  // First, test a search in which the top document scores 100%.
502  enquire.set_query(query("this"));
503  Xapian::MSet mymset = enquire.get_mset(0, 20);
504 
505  Xapian::MSetIterator i = mymset.begin();
506  TEST(i != mymset.end());
507  pct = mymset.convert_to_percent(i);
508  TEST_EQUAL(pct, 100);
509 
510  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
511  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
512  TEST_EQUAL(mymset.get_matches_estimated(), 6);
513  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 0.0553904060041786);
514  TEST_EQUAL(mymset.size(), 6);
515  mset_expect_order(mymset, 2, 1, 3, 5, 6, 4);
516 
517  // A search in which the top document doesn't have 100%
519  "this", "line", "paragraph", "rubbish");
520  enquire.set_query(q);
521  mymset = enquire.get_mset(0, 20);
522 
523  i = mymset.begin();
524  TEST(i != mymset.end());
525  pct = mymset.convert_to_percent(i);
526  TEST_REL(pct,>,60);
527  TEST_REL(pct,<,76);
528 
529  ++i;
530 
531  TEST(i != mymset.end());
532  pct = mymset.convert_to_percent(i);
533  TEST_REL(pct,>,40);
534  TEST_REL(pct,<,50);
535 
536  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
537  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
538  TEST_EQUAL(mymset.get_matches_estimated(), 6);
539  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 1.67412192414056);
540  TEST_EQUAL(mymset.size(), 6);
541  mset_expect_order(mymset, 3, 1, 4, 2, 5, 6);
542 }
543 
545  public:
546  bool operator()(const string& tname) const override {
547  unsigned long sum = 0;
548  for (unsigned ch : tname) {
549  sum += ch;
550  }
551 // if (verbose) {
552 // tout << tname << "==> " << sum << "\n";
553 // }
554  return (sum % 2) == 0;
555  }
556 };
557 
558 // tests the expand decision functor
559 DEFINE_TESTCASE(expandfunctor1, backend) {
560  Xapian::Enquire enquire(get_database("apitest_simpledata"));
561  enquire.set_query(Xapian::Query("this"));
562 
563  Xapian::MSet mymset = enquire.get_mset(0, 10);
564  TEST(mymset.size() >= 2);
565 
566  Xapian::RSet myrset;
567  Xapian::MSetIterator i = mymset.begin();
568  myrset.add_document(*i);
569  myrset.add_document(*(++i));
570 
571  EvenParityExpandFunctor myfunctor;
572 
573  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
574  unsigned int neweset_size = 0;
575  Xapian::ESetIterator j = myeset_orig.begin();
576  for ( ; j != myeset_orig.end(); ++j) {
577  if (myfunctor(*j)) neweset_size++;
578  }
579  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
580 
581 #if 0
582  // Compare myeset with the hand-filtered version of myeset_orig.
583  if (verbose) {
584  tout << "orig_eset: ";
585  copy(myeset_orig.begin(), myeset_orig.end(),
586  ostream_iterator<Xapian::ESetItem>(tout, " "));
587  tout << "\n";
588 
589  tout << "new_eset: ";
590  copy(myeset.begin(), myeset.end(),
591  ostream_iterator<Xapian::ESetItem>(tout, " "));
592  tout << "\n";
593  }
594 #endif
595  Xapian::ESetIterator orig = myeset_orig.begin();
596  Xapian::ESetIterator filt = myeset.begin();
597  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
598  // skip over items that shouldn't be in myeset
599  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
600  ++orig;
601  }
602 
603  TEST_AND_EXPLAIN(*orig == *filt &&
604  orig.get_weight() == filt.get_weight(),
605  "Mismatch in items " << *orig << " vs. " << *filt
606  << " after filtering");
607  }
608 
609  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
610  ++orig;
611  }
612 
613  TEST_EQUAL(orig, myeset_orig.end());
614  TEST_AND_EXPLAIN(filt == myeset.end(),
615  "Extra items in the filtered eset.");
616 }
617 
618 DEFINE_TESTCASE(expanddeciderfilterprefix2, backend) {
619  Xapian::Enquire enquire(get_database("apitest_simpledata"));
620  enquire.set_query(Xapian::Query("this"));
621 
622  Xapian::MSet mymset = enquire.get_mset(0, 10);
623  TEST(mymset.size() >= 2);
624 
625  Xapian::RSet myrset;
626  Xapian::MSetIterator i = mymset.begin();
627  myrset.add_document(*i);
628  myrset.add_document(*(++i));
629 
630  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
631  unsigned int neweset_size = 0;
632 
633  // Choose the first char in the first term as prefix.
634  Xapian::ESetIterator j = myeset_orig.begin();
635  TEST(myeset_orig.size() >= 1);
636  string prefix(*j, 0, 1);
637  Xapian::ExpandDeciderFilterPrefix myfunctor(prefix);
638 
639  for ( ; j != myeset_orig.end(); ++j) {
640  if (myfunctor(*j)) neweset_size++;
641  }
642  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
643 
644  Xapian::ESetIterator orig = myeset_orig.begin();
645  Xapian::ESetIterator filt = myeset.begin();
646  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
647  // skip over items that shouldn't be in myeset
648  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
649  ++orig;
650  }
651 
652  TEST_AND_EXPLAIN(*orig == *filt &&
653  orig.get_weight() == filt.get_weight(),
654  "Mismatch in items " << *orig << " vs. " << *filt
655  << " after filtering");
656  }
657 
658  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
659  ++orig;
660  }
661 
662  TEST_EQUAL(orig, myeset_orig.end());
663  TEST_AND_EXPLAIN(filt == myeset.end(),
664  "Extra items in the filtered eset.");
665 }
666 
667 // tests the percent cutoff option
668 DEFINE_TESTCASE(pctcutoff1, backend) {
669  Xapian::Enquire enquire(get_database("apitest_simpledata"));
671  "this", "line", "paragraph", "rubbish"));
672  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
673 
674  if (verbose) {
675  tout << "Original mset pcts:";
676  print_mset_percentages(mymset1);
677  tout << "\n";
678  }
679 
680  unsigned int num_items = 0;
681  int my_pct = 100;
682  int changes = 0;
683  Xapian::MSetIterator i = mymset1.begin();
684  int c = 0;
685  for ( ; i != mymset1.end(); ++i, ++c) {
686  int new_pct = mymset1.convert_to_percent(i);
687  if (new_pct != my_pct) {
688  changes++;
689  if (changes > 3) break;
690  num_items = c;
691  my_pct = new_pct;
692  }
693  }
694 
695  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
696  if (verbose) {
697  tout << "Cutoff percent: " << my_pct << "\n";
698  }
699 
700  enquire.set_cutoff(my_pct);
701  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
702 
703  if (verbose) {
704  tout << "Percentages after cutoff:";
705  print_mset_percentages(mymset2);
706  tout << "\n";
707  }
708 
709  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
710  "Match with % cutoff lost too many items");
711 
712  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
713  (mymset2.convert_to_percent(mymset2[num_items]) == my_pct &&
714  mymset2.convert_to_percent(mymset2.back()) == my_pct),
715  "Match with % cutoff returned too many items");
716 }
717 
718 // Tests the percent cutoff option combined with collapsing
719 DEFINE_TESTCASE(pctcutoff2, backend) {
720  Xapian::Enquire enquire(get_database("apitest_simpledata"));
722  Xapian::MSet mset = enquire.get_mset(0, 100);
723 
724  if (verbose) {
725  tout << "Original mset pcts:";
727  tout << "\n";
728  }
729 
730  TEST(mset.size() >= 2);
731  TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
732 
733  int cutoff = mset[0].get_percent() + mset[1].get_percent();
734  cutoff /= 2;
735 
736  enquire.set_cutoff(cutoff);
737  enquire.set_collapse_key(1234); // Value which is always empty.
738 
739  Xapian::MSet mset2 = enquire.get_mset(0, 1);
740  TEST_EQUAL(mset2.size(), 1);
741  TEST_REL(mset2.get_matches_lower_bound(),>=,1);
743  mset2.get_matches_lower_bound());
748 }
749 
750 // Test that the percent cutoff option returns all the answers it should.
751 DEFINE_TESTCASE(pctcutoff3, backend) {
752  Xapian::Enquire enquire(get_database("apitest_simpledata"));
753  enquire.set_query(Xapian::Query("this"));
754  Xapian::MSet mset1 = enquire.get_mset(0, 10);
755 
756  if (verbose) {
757  tout << "Original mset pcts:";
758  print_mset_percentages(mset1);
759  tout << "\n";
760  }
761 
762  int percent = 100;
763  for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
764  int new_percent = mset1.convert_to_percent(i);
765  if (new_percent != percent) {
766  tout.str(string());
767  tout << "Testing " << percent << "% cutoff\n";
768  enquire.set_cutoff(percent);
769  Xapian::MSet mset2 = enquire.get_mset(0, 10);
770  TEST_EQUAL(mset2.back().get_percent(), percent);
771  TEST_EQUAL(mset2.size(), i.get_rank());
772  percent = new_percent;
773  }
774  }
775 }
776 
777 // tests the cutoff option
778 DEFINE_TESTCASE(cutoff1, backend) {
779  Xapian::Enquire enquire(get_database("apitest_simpledata"));
781  "this", "line", "paragraph", "rubbish"));
782  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
783 
784  if (verbose) {
785  tout << "Original mset weights:";
786  print_mset_weights(mymset1);
787  tout << "\n";
788  }
789 
790  unsigned int num_items = 0;
791  double my_wt = -100;
792  int changes = 0;
793  Xapian::MSetIterator i = mymset1.begin();
794  int c = 0;
795  for ( ; i != mymset1.end(); ++i, ++c) {
796  double new_wt = i.get_weight();
797  if (new_wt != my_wt) {
798  changes++;
799  if (changes > 3) break;
800  num_items = c;
801  my_wt = new_wt;
802  }
803  }
804 
805  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
806  if (verbose) {
807  tout << "Cutoff weight: " << my_wt << "\n";
808  }
809 
810  enquire.set_cutoff(0, my_wt);
811  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
812 
813  if (verbose) {
814  tout << "Weights after cutoff:";
815  print_mset_weights(mymset2);
816  tout << "\n";
817  }
818 
819  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
820  "Match with cutoff lost too many items");
821 
822  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
823  (mymset2[num_items].get_weight() == my_wt &&
824  mymset2.back().get_weight() == my_wt),
825  "Match with cutoff returned too many items");
826 }
827 
828 // tests the allow query terms expand option
829 DEFINE_TESTCASE(allowqterms1, backend) {
830  Xapian::Enquire enquire(get_database("apitest_simpledata"));
831  string term = "paragraph";
832  enquire.set_query(Xapian::Query(term));
833 
834  Xapian::MSet mymset = enquire.get_mset(0, 10);
835  TEST(mymset.size() >= 2);
836 
837  Xapian::RSet myrset;
838  Xapian::MSetIterator i = mymset.begin();
839  myrset.add_document(*i);
840  myrset.add_document(*(++i));
841 
842  Xapian::ESet myeset = enquire.get_eset(1000, myrset);
843  Xapian::ESetIterator j = myeset.begin();
844  for ( ; j != myeset.end(); ++j) {
845  TEST_NOT_EQUAL(*j, term);
846  }
847 
848  Xapian::ESet myeset2 = enquire.get_eset(1000, myrset, Xapian::Enquire::INCLUDE_QUERY_TERMS);
849  j = myeset2.begin();
850  for ( ; j != myeset2.end(); ++j) {
851  if (*j == term) break;
852  }
853  TEST(j != myeset2.end());
854 }
855 
856 // tests that the MSet max_attained works
857 DEFINE_TESTCASE(maxattain1, backend) {
858  Xapian::Enquire enquire(get_database("apitest_simpledata"));
859  enquire.set_query(query("this"));
860  Xapian::MSet mymset = enquire.get_mset(0, 100);
861 
862  double mymax = 0;
863  Xapian::MSetIterator i = mymset.begin();
864  for ( ; i != mymset.end(); ++i) {
865  if (i.get_weight() > mymax) mymax = i.get_weight();
866  }
867  TEST_EQUAL(mymax, mymset.get_max_attained());
868 }
869 
870 // tests a reversed boolean query
871 DEFINE_TESTCASE(reversebool1, backend) {
872  Xapian::Enquire enquire(get_database("apitest_simpledata"));
873  enquire.set_query(Xapian::Query("this"));
875 
876  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
877  TEST_AND_EXPLAIN(mymset1.size() > 1,
878  "Mset was too small to test properly");
879 
881  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
883  Xapian::MSet mymset3 = enquire.get_mset(0, 100);
884 
885  // mymset1 and mymset2 should be identical
886  TEST_EQUAL(mymset1.size(), mymset2.size());
887 
888  {
889  Xapian::MSetIterator i = mymset1.begin();
890  Xapian::MSetIterator j = mymset2.begin();
891  for ( ; i != mymset1.end(); ++i, j++) {
892  TEST(j != mymset2.end());
893  // if this fails, then setting match_sort_forward=true was not
894  // the same as the default.
895  TEST_EQUAL(*i, *j);
896  }
897  TEST(j == mymset2.end());
898  }
899 
900  // mymset1 and mymset3 should be same but reversed
901  TEST_EQUAL(mymset1.size(), mymset3.size());
902 
903  {
904  Xapian::MSetIterator i = mymset1.begin();
905  Xapian::MSetIterator j = mymset3.end();
906  for ( ; i != mymset1.end(); ++i) {
907  --j;
908  // if this fails, then setting match_sort_forward=false didn't
909  // reverse the results.
910  TEST_EQUAL(*i, *j);
911  }
912  }
913 }
914 
915 // tests a reversed boolean query, where the full mset isn't returned
916 DEFINE_TESTCASE(reversebool2, backend) {
917  Xapian::Enquire enquire(get_database("apitest_simpledata"));
918  enquire.set_query(Xapian::Query("this"));
920 
921  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
922 
923  TEST_AND_EXPLAIN(mymset1.size() > 1,
924  "Mset was too small to test properly");
925 
927  Xapian::doccount msize = mymset1.size() / 2;
928  Xapian::MSet mymset2 = enquire.get_mset(0, msize);
930  Xapian::MSet mymset3 = enquire.get_mset(0, msize);
931 
932  // mymset2 should be first msize items of mymset1
933  TEST_EQUAL(msize, mymset2.size());
934  {
935  Xapian::MSetIterator i = mymset1.begin();
936  Xapian::MSetIterator j = mymset2.begin();
937  for ( ; j != mymset2.end(); ++i, ++j) {
938  TEST(i != mymset1.end());
939  // if this fails, then setting match_sort_forward=true was not
940  // the same as the default.
941  TEST_EQUAL(*i, *j);
942  }
943  // mymset1 should be larger.
944  TEST(i != mymset1.end());
945  }
946 
947  // mymset3 should be last msize items of mymset1, in reverse order
948  TEST_EQUAL(msize, mymset3.size());
949  {
950  Xapian::MSetIterator i = mymset1.end();
952  for (j = mymset3.begin(); j != mymset3.end(); ++j) {
953  // if this fails, then setting match_sort_forward=false didn't
954  // reverse the results.
955  --i;
956  TEST_EQUAL(*i, *j);
957  }
958  }
959 }
960 
961 // tests that get_matching_terms() returns the terms in the right order
962 DEFINE_TESTCASE(getmterms1, backend) {
963  list<string> answers_list;
964  answers_list.push_back("one");
965  answers_list.push_back("two");
966  answers_list.push_back("three");
967  answers_list.push_back("four");
968 
969  Xapian::Database mydb(get_database("apitest_termorder"));
970  Xapian::Enquire enquire(mydb);
971 
974  Xapian::Query("one", 1, 1),
975  Xapian::Query("three", 1, 3)),
977  Xapian::Query("four", 1, 4),
978  Xapian::Query("two", 1, 2)));
979 
980  enquire.set_query(myquery);
981 
982  Xapian::MSet mymset = enquire.get_mset(0, 10);
983 
984  TEST_MSET_SIZE(mymset, 1);
985  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
986  enquire.get_matching_terms_end(mymset.begin()));
987  TEST(list == answers_list);
988 }
989 
990 // tests that get_matching_terms() returns the terms only once
991 DEFINE_TESTCASE(getmterms2, backend) {
992  list<string> answers_list;
993  answers_list.push_back("one");
994  answers_list.push_back("two");
995  answers_list.push_back("three");
996 
997  Xapian::Database mydb(get_database("apitest_termorder"));
998  Xapian::Enquire enquire(mydb);
999 
1002  Xapian::Query("one", 1, 1),
1003  Xapian::Query("three", 1, 3)),
1005  Xapian::Query("one", 1, 4),
1006  Xapian::Query("two", 1, 2)));
1007 
1008  enquire.set_query(myquery);
1009 
1010  Xapian::MSet mymset = enquire.get_mset(0, 10);
1011 
1012  TEST_MSET_SIZE(mymset, 1);
1013  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1014  enquire.get_matching_terms_end(mymset.begin()));
1015  TEST(list == answers_list);
1016 }
1017 
1018 // test that running a query twice returns the same results
1019 DEFINE_TESTCASE(repeatquery1, backend) {
1020  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1021  enquire.set_query(Xapian::Query("this"));
1022 
1023  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1024 
1025  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1026  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1027  TEST_EQUAL(mymset1, mymset2);
1028 }
1029 
1030 // test that prefetching documents works (at least, gives same results)
1031 DEFINE_TESTCASE(fetchdocs1, backend) {
1032  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1033  enquire.set_query(Xapian::Query("this"));
1034 
1035  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1036 
1037  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1038  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1039  TEST_EQUAL(mymset1, mymset2);
1040  mymset2.fetch(mymset2[0], mymset2[mymset2.size() - 1]);
1041  mymset2.fetch(mymset2.begin(), mymset2.end());
1042  mymset2.fetch(mymset2.begin());
1043  mymset2.fetch();
1044 
1045  Xapian::MSetIterator it1 = mymset1.begin();
1046  Xapian::MSetIterator it2 = mymset2.begin();
1047 
1048  while (it1 != mymset1.end() && it2 != mymset2.end()) {
1050  it2.get_document().get_data());
1051  TEST_NOT_EQUAL(it1.get_document().get_data(), "");
1052  TEST_NOT_EQUAL(it2.get_document().get_data(), "");
1053  it1++;
1054  it2++;
1055  }
1056  TEST_EQUAL(it1, mymset1.end());
1057  TEST_EQUAL(it1, mymset2.end());
1058 }
1059 
1060 // test that searching for a term not in the database fails nicely
1061 DEFINE_TESTCASE(absentterm1, backend) {
1062  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1064  enquire.set_query(Xapian::Query("frink"));
1065 
1066  Xapian::MSet mymset = enquire.get_mset(0, 10);
1067  mset_expect_order(mymset);
1068 }
1069 
1070 // as absentterm1, but setting query from a vector of terms
1071 DEFINE_TESTCASE(absentterm2, backend) {
1072  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1073  vector<string> terms;
1074  terms.push_back("frink");
1075 
1076  Xapian::Query query(Xapian::Query::OP_OR, terms.begin(), terms.end());
1077  enquire.set_query(query);
1078 
1079  Xapian::MSet mymset = enquire.get_mset(0, 10);
1080  mset_expect_order(mymset);
1081 }
1082 
1083 // test that rsets do sensible things
1084 DEFINE_TESTCASE(rset1, backend) {
1085  Xapian::Database mydb(get_database("apitest_rset"));
1086  Xapian::Enquire enquire(mydb);
1087  Xapian::Query myquery = query(Xapian::Query::OP_OR, "giraffe", "tiger");
1088  enquire.set_query(myquery);
1089 
1090  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1091 
1092  Xapian::RSet myrset;
1093  myrset.add_document(1);
1094 
1095  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1096 
1097  // We should have the same documents turn up, but 1 and 3 should
1098  // have higher weights with the RSet.
1099  TEST_MSET_SIZE(mymset1, 3);
1100  TEST_MSET_SIZE(mymset2, 3);
1101 }
1102 
1104 DEFINE_TESTCASE(rset2, backend) {
1105  Xapian::Database mydb(get_database("apitest_rset"));
1106  Xapian::Enquire enquire(mydb);
1107  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
1108  enquire.set_query(myquery);
1109 
1110  // Test with the default BM25Weight, then with TradWeight.
1111  for (int i = 0; i < 2; ++i) {
1112  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1113 
1114  Xapian::RSet myrset;
1115  myrset.add_document(2);
1116 
1117  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1118 
1119  mset_expect_order(mymset1, 1, 2);
1120  // Document 2 should have higher weight than document 1 despite the wdf
1121  // of "people" being 1 because "people" indexes a document in the RSet
1122  // whereas "cuddly" (wdf=2) does not.
1123  mset_expect_order(mymset2, 2, 1);
1125  }
1126 }
1127 
1128 // test that rsets behave correctly with multiDBs
1129 DEFINE_TESTCASE(rsetmultidb1, backend && !multi) {
1130  Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
1131  Xapian::Database mydb2(get_database("apitest_rset"));
1132  mydb2.add_database(get_database("apitest_simpledata2"));
1133 
1134  Xapian::Enquire enquire1(mydb1);
1135  Xapian::Enquire enquire2(mydb2);
1136 
1137  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "multiple");
1138 
1139  enquire1.set_query(myquery);
1140  enquire2.set_query(myquery);
1141 
1142  Xapian::RSet myrset1;
1143  Xapian::RSet myrset2;
1144  myrset1.add_document(4);
1145  myrset2.add_document(2);
1146 
1147  Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
1148  Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
1149  Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
1150  Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
1151 
1152  mset_expect_order(mymset1a, 1, 4);
1153  mset_expect_order(mymset1b, 4, 1);
1154  mset_expect_order(mymset2a, 1, 2);
1155  mset_expect_order(mymset2b, 2, 1);
1156 
1157  TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
1158  TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
1159  TEST_NOT_EQUAL(mymset1a, mymset1b);
1160  TEST_NOT_EQUAL(mymset2a, mymset2b);
1161 }
1162 
1163 // regression tests - used to cause assertion in stats.h to fail
1164 // Doesn't actually fail for multi but it doesn't make sense to run there.
1165 DEFINE_TESTCASE(rsetmultidb3, backend && !multi) {
1166  Xapian::Enquire enquire(get_database("apitest_simpledata2"));
1167  enquire.set_query(query(Xapian::Query::OP_OR, "cuddly", "people"));
1168  Xapian::MSet mset = enquire.get_mset(0, 10); // used to fail assertion
1169 }
1170 
1172 DEFINE_TESTCASE(eliteset1, backend && !multi) {
1173  Xapian::Database mydb(get_database("apitest_simpledata"));
1174  Xapian::Enquire enquire(mydb);
1175 
1176  Xapian::Query myquery1 = query(Xapian::Query::OP_OR, "word");
1177 
1179  "simple", "word");
1180 
1181  enquire.set_query(myquery1, 2); // So the query lengths are the same.
1182  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1183 
1184  enquire.set_query(myquery2);
1185  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1186 
1187  TEST_EQUAL(mymset1, mymset2);
1188 }
1189 
1191 DEFINE_TESTCASE(elitesetmulti1, multi) {
1192  Xapian::Database mydb(get_database("apitest_simpledata"));
1193  Xapian::Enquire enquire(mydb);
1194 
1196  "simple", "word");
1197 
1198  enquire.set_query(myquery2);
1199  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1200 
1201  // For a sharded database, the elite set is resolved per shard and can
1202  // select different terms because the max term weights vary with the
1203  // per-shard term statistics. I can't see a feasible way to create
1204  // an equivalent MSet to compare with so for now at least we hard-code
1205  // the expected values.
1206  TEST_EQUAL(mymset2.size(), 3);
1207  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1208  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1209  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1210  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.1736756775723788948);
1211  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.0464816871772451012);
1212  mset_expect_order(mymset2, 2, 4, 5);
1213  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.0464816871772451012);
1214  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.64098768659591376373);
1215  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.46338869498075929698);
1216 }
1217 
1220 DEFINE_TESTCASE(eliteset2, backend && !multi) {
1221  Xapian::Database mydb(get_database("apitest_simpledata"));
1222  Xapian::Enquire enquire(mydb);
1223 
1224  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1225 
1226  vector<Xapian::Query> qs;
1227  qs.push_back(query("this"));
1228  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1230  qs.begin(), qs.end(), 1);
1231 
1232  enquire.set_query(myquery1);
1233  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1234 
1235  enquire.set_query(myquery2);
1236  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1237 
1238  TEST_EQUAL(mymset1, mymset2);
1239 }
1240 
1242 DEFINE_TESTCASE(elitesetmulti2, multi) {
1243  Xapian::Database mydb(get_database("apitest_simpledata"));
1244  Xapian::Enquire enquire(mydb);
1245 
1246  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1247 
1248  vector<Xapian::Query> qs;
1249  qs.push_back(query("this"));
1250  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1252  qs.begin(), qs.end(), 1);
1253 
1254  enquire.set_query(myquery2);
1255  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1256 
1257  // For a sharded database, the elite set is resolved per shard and can
1258  // select different terms because the max term weights vary with the
1259  // per-shard term statistics. I can't see a feasible way to create
1260  // an equivalent MSet to compare with so for now at least we hard-code
1261  // the expected values.
1262  TEST_EQUAL(mymset2.size(), 4);
1263  TEST_EQUAL(mymset2.get_matches_lower_bound(), 4);
1264  TEST_EQUAL(mymset2.get_matches_estimated(), 4);
1265  TEST_EQUAL(mymset2.get_matches_upper_bound(), 4);
1266  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 2.6585705165783908299);
1267  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.9700834242150864206);
1268  mset_expect_order(mymset2, 2, 1, 3, 5);
1269  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.9700834242150864206);
1270  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.051103097360122341775);
1271  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.043131803408968119595);
1272  TEST_EQUAL_DOUBLE(mymset2[3].get_weight(), 0.043131803408968119595);
1273 }
1274 
1275 
1278 DEFINE_TESTCASE(eliteset3, backend) {
1279  Xapian::Database mydb1(get_database("apitest_simpledata"));
1280  Xapian::Enquire enquire1(mydb1);
1281 
1282  Xapian::Database mydb2(get_database("apitest_simpledata"));
1283  Xapian::Enquire enquire2(mydb2);
1284 
1285  // make a query
1286  Xapian::Stem stemmer("english");
1287 
1288  string term1 = stemmer("word");
1289  string term2 = stemmer("rubbish");
1290  string term3 = stemmer("banana");
1291 
1292  vector<string> terms;
1293  terms.push_back(term1);
1294  terms.push_back(term2);
1295  terms.push_back(term3);
1296 
1297  Xapian::Query myquery1(Xapian::Query::OP_OR, terms.begin(), terms.end());
1298  enquire1.set_query(myquery1);
1299 
1300  Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET, terms.begin(), terms.end(), 3);
1301  enquire2.set_query(myquery2);
1302 
1303  // retrieve the results
1304  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1305  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1306 
1307  TEST_EQUAL(mymset1, mymset2);
1308 
1309  TEST_EQUAL(mymset1.get_termfreq(term1),
1310  mymset2.get_termfreq(term1));
1311  TEST_EQUAL(mymset1.get_termweight(term1),
1312  mymset2.get_termweight(term1));
1313  TEST_EQUAL(mymset1.get_termfreq(term2),
1314  mymset2.get_termfreq(term2));
1315  TEST_EQUAL(mymset1.get_termweight(term2),
1316  mymset2.get_termweight(term2));
1317  TEST_EQUAL(mymset1.get_termfreq(term3),
1318  mymset2.get_termfreq(term3));
1319  TEST_EQUAL(mymset1.get_termweight(term3),
1320  mymset2.get_termweight(term3));
1321 }
1322 
1324 DEFINE_TESTCASE(eliteset4, backend && !multi) {
1325  Xapian::Database mydb1(get_database("apitest_simpledata"));
1326  Xapian::Enquire enquire1(mydb1);
1327 
1328  Xapian::Database mydb2(get_database("apitest_simpledata"));
1329  Xapian::Enquire enquire2(mydb2);
1330 
1331  Xapian::Query myquery1 = query("rubbish");
1333  "word", "rubbish", "fibble");
1334  enquire1.set_query(myquery1);
1335  enquire2.set_query(myquery2);
1336 
1337  // retrieve the results
1338  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1339  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1340 
1341  TEST_NOT_EQUAL(mymset2.size(), 0);
1342  TEST_EQUAL(mymset1, mymset2);
1343 }
1344 
1346 DEFINE_TESTCASE(elitesetmulti4, multi) {
1347  Xapian::Database mydb2(get_database("apitest_simpledata"));
1348  Xapian::Enquire enquire2(mydb2);
1349 
1351  "word", "rubbish", "fibble");
1352  enquire2.set_query(myquery2);
1353 
1354  // retrieve the results
1355  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1356 
1357  // For a sharded database, the elite set is resolved per shard and can
1358  // select different terms because the max term weights vary with the
1359  // per-shard term statistics. I can't see a feasible way to create
1360  // an equivalent MSet to compare with so for now at least we hard-code
1361  // the expected values.
1362  TEST_EQUAL(mymset2.size(), 3);
1363  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1364  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1365  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1366  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.4848948390060121572);
1367  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.4848948390060121572);
1368  mset_expect_order(mymset2, 3, 2, 4);
1369  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.4848948390060121572);
1370  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 1.0464816871772451012);
1371  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.64098768659591376373);
1372 }
1373 
1375 DEFINE_TESTCASE(eliteset5, backend) {
1376  Xapian::Database mydb1(get_database("apitest_simpledata"));
1377  Xapian::Enquire enquire1(mydb1);
1378 
1379  vector<string> v;
1380  for (int i = 0; i != 3; ++i) {
1381  v.push_back("simpl");
1382  v.push_back("queri");
1383 
1384  v.push_back("rubbish");
1385  v.push_back("rubbish");
1386  v.push_back("rubbish");
1387  v.push_back("word");
1388  v.push_back("word");
1389  v.push_back("word");
1390  }
1391 
1392  for (Xapian::termcount n = 1; n != v.size(); ++n) {
1394  v.begin(), v.end(), n);
1396  myquery1,
1397  0.004);
1398 
1399  enquire1.set_query(myquery1);
1400  // On architectures with excess precision (or, at least, on x86), the
1401  // following call used to result in a segfault (at least when n=1).
1402  enquire1.get_mset(0, 10);
1403  }
1404 }
1405 
1407 DEFINE_TESTCASE(termlisttermfreq1, backend) {
1408  Xapian::Database mydb(get_database("apitest_simpledata"));
1409  Xapian::Enquire enquire(mydb);
1410  Xapian::Stem stemmer("english");
1411  Xapian::RSet rset1;
1412  Xapian::RSet rset2;
1413  rset1.add_document(5);
1414  rset2.add_document(6);
1415 
1416  Xapian::ESet eset1 = enquire.get_eset(1000, rset1);
1417  Xapian::ESet eset2 = enquire.get_eset(1000, rset2);
1418 
1419  // search for weight of term 'another'
1420  string theterm = stemmer("another");
1421 
1422  double wt1 = 0;
1423  double wt2 = 0;
1424  {
1425  Xapian::ESetIterator i = eset1.begin();
1426  for ( ; i != eset1.end(); ++i) {
1427  if (*i == theterm) {
1428  wt1 = i.get_weight();
1429  break;
1430  }
1431  }
1432  }
1433  {
1434  Xapian::ESetIterator i = eset2.begin();
1435  for ( ; i != eset2.end(); ++i) {
1436  if (*i == theterm) {
1437  wt2 = i.get_weight();
1438  break;
1439  }
1440  }
1441  }
1442 
1443  TEST_NOT_EQUAL(wt1, 0);
1444  TEST_NOT_EQUAL(wt2, 0);
1445  TEST_EQUAL(wt1, wt2);
1446 }
1447 
1449 DEFINE_TESTCASE(qterminfo1, backend) {
1450  Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
1451  Xapian::Enquire enquire1(mydb1);
1452 
1453  Xapian::Database mydb2(get_database("apitest_simpledata"));
1454  mydb2.add_database(get_database("apitest_simpledata2"));
1455  Xapian::Enquire enquire2(mydb2);
1456 
1457  // make a query
1458  Xapian::Stem stemmer("english");
1459 
1460  string term1 = stemmer("word");
1461  string term2 = stemmer("inmemory");
1462  string term3 = stemmer("flibble");
1463 
1465  Xapian::Query(term1),
1467  Xapian::Query(term2),
1468  Xapian::Query(term3)));
1469  myquery = myquery &~ Xapian::Query("Boolean");
1470  enquire1.set_query(myquery);
1471  enquire2.set_query(myquery);
1472 
1473  for (int i = 1; i <= 2; ++i) {
1474  // Retrieve the results.
1475  Xapian::MSet mymset1a = enquire1.get_mset(0, 0);
1476  Xapian::MSet mymset2a = enquire2.get_mset(0, 0);
1477 
1478  TEST_EQUAL(mymset1a.get_termfreq(term1),
1479  mymset2a.get_termfreq(term1));
1480  TEST_EQUAL(mymset1a.get_termfreq(term2),
1481  mymset2a.get_termfreq(term2));
1482  TEST_EQUAL(mymset1a.get_termfreq(term3),
1483  mymset2a.get_termfreq(term3));
1484 
1485  TEST_EQUAL(mymset1a.get_termfreq(term1), 3);
1486  TEST_EQUAL(mymset1a.get_termfreq(term2), 1);
1487  TEST_EQUAL(mymset1a.get_termfreq(term3), 0);
1488 
1489  TEST_NOT_EQUAL(mymset1a.get_termweight(term1), 0);
1490  TEST_NOT_EQUAL(mymset1a.get_termweight(term2), 0);
1491  // Non-existent terms should have zero weight.
1492  TEST_EQUAL(mymset1a.get_termweight(term3), 0);
1493 
1494  TEST_EQUAL(mymset1a.get_termfreq(stemmer("banana")), 1);
1495  TEST_EQUAL(mymset1a.get_termweight(stemmer("banana")), 0.0);
1496 
1497  TEST_EQUAL(mymset1a.get_termfreq("sponge"), 0);
1498  TEST_EQUAL(mymset1a.get_termweight(stemmer("sponge")), 0.0);
1499 
1500  TEST_EQUAL(mymset1a.get_termfreq("Boolean"), 0);
1501  TEST_EQUAL(mymset1a.get_termweight("Boolean"), 0.0);
1502 
1503  // Repeat tests with TradWeight. (Regression test to ensure
1504  // non-existent terms get zero weight with TradWeight.)
1507  }
1508 }
1509 
1511 DEFINE_TESTCASE(qterminfo2, backend) {
1512  Xapian::Database db(get_database("apitest_simpledata"));
1513  Xapian::Enquire enquire(db);
1514 
1515  // make a query
1516  Xapian::Stem stemmer("english");
1517 
1518  string term1 = stemmer("paragraph");
1519  string term2 = stemmer("another");
1520 
1521  enquire.set_query(Xapian::Query(term1));
1522  Xapian::MSet mset0 = enquire.get_mset(0, 10);
1523 
1524  TEST_NOT_EQUAL(mset0.get_termweight("paragraph"), 0);
1525 
1527  Xapian::Query(Xapian::Query::OP_AND, term1, term2));
1528  enquire.set_query(query);
1529 
1530  // retrieve the results
1531  // Note: get_mset() used to throw "AssertionError" in debug builds
1532  Xapian::MSet mset = enquire.get_mset(0, 10);
1533 
1534  TEST_NOT_EQUAL(mset.get_termweight("paragraph"), 0);
1535 }
1536 
1537 // tests that when specifying that no items are to be returned, those
1538 // statistics which should be the same are.
1539 DEFINE_TESTCASE(msetzeroitems1, backend) {
1540  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1541  enquire.set_query(query("this"));
1542  Xapian::MSet mymset1 = enquire.get_mset(0, 0);
1543 
1544  Xapian::MSet mymset2 = enquire.get_mset(0, 1);
1545 
1546  TEST_EQUAL(mymset1.get_max_possible(), mymset2.get_max_possible());
1547 }
1548 
1549 // test that the matches_* of a simple query are as expected
1550 DEFINE_TESTCASE(matches1, backend) {
1551  Xapian::Database db = get_database("apitest_simpledata");
1552  Xapian::Enquire enquire(db);
1553  Xapian::Query myquery;
1554  Xapian::MSet mymset;
1555 
1556  myquery = query("word");
1557  enquire.set_query(myquery);
1558  mymset = enquire.get_mset(0, 10);
1559  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1560  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1561  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1565 
1566  myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
1567  enquire.set_query(myquery);
1568  mymset = enquire.get_mset(0, 10);
1569  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1570  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1571  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1575 
1576  myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
1577  enquire.set_query(myquery);
1578  mymset = enquire.get_mset(0, 10);
1579  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1580  TEST_EQUAL(mymset.get_matches_estimated(), 0);
1581  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
1585 
1586  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1587  enquire.set_query(myquery);
1588  mymset = enquire.get_mset(0, 10);
1589  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1590  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1591  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1595 
1596  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1597  enquire.set_query(myquery);
1598  mymset = enquire.get_mset(0, 0);
1599  if (db.size() > 1) {
1600  // We get a tighter lower bound because each shard is handled
1601  // separately and that happens to give us the same tight range for
1602  // both terms in one shard, and no matches for one term in the other.
1603  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1604  } else {
1605  // The matcher can tell at least 1 document must match by taking into
1606  // account the ranges of matched docids.
1607  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1608  }
1609  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1610  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1614 
1615  mymset = enquire.get_mset(0, 1);
1616  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1617  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1618  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1622 
1623  mymset = enquire.get_mset(0, 2);
1624  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1625  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1626  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1630 
1631  myquery = query(Xapian::Query::OP_AND, "paragraph", "another");
1632  enquire.set_query(myquery);
1633  mymset = enquire.get_mset(0, 0);
1634  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1635  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1636  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1640 
1641  mymset = enquire.get_mset(0, 1);
1642  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1643  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1644  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1648 
1649  mymset = enquire.get_mset(0, 2);
1650  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1651  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1652  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1656 
1657  mymset = enquire.get_mset(1, 20);
1658  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1659  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1660  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1664 }
1665 
1666 // tests that wqf affects the document weights
1667 DEFINE_TESTCASE(wqf1, backend) {
1668  // Both queries have length 2; in q1 word has wqf=2, in q2 word has wqf=1
1669  Xapian::Query q1("word", 2);
1670  Xapian::Query q2("word");
1671  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1672  enquire.set_query(q1);
1673  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1674  enquire.set_query(q2);
1675  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1676  // Check the weights
1677  TEST(mset1.begin().get_weight() > mset2.begin().get_weight());
1678 }
1679 
1680 // tests that query length affects the document weights
1681 DEFINE_TESTCASE(qlen1, backend) {
1682  Xapian::Query q1("word");
1683  Xapian::Query q2("word");
1684  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1685  enquire.set_query(q1);
1686  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1687  enquire.set_query(q2);
1688  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1689  // Check the weights
1690  // TEST(mset1.begin().get_weight() < mset2.begin().get_weight());
1691  TEST(mset1.begin().get_weight() == mset2.begin().get_weight());
1692 }
1693 
1694 // tests that opening a non-existent termlist throws the correct exception
1695 DEFINE_TESTCASE(termlist1, backend) {
1696  Xapian::Database db(get_database("apitest_onedoc"));
1701  // Cause the database to be used properly, showing up problems
1702  // with the link being in a bad state.
1703  Xapian::TermIterator temp = db.termlist_begin(1);
1705  Xapian::TermIterator t = db.termlist_begin(999999999));
1706 }
1707 
1708 // tests that a Xapian::TermIterator works as an STL iterator
1709 DEFINE_TESTCASE(termlist2, backend) {
1710  Xapian::Database db(get_database("apitest_onedoc"));
1712  Xapian::TermIterator tend = db.termlist_end(1);
1713 
1714  // test operator= creates a copy which compares equal
1715  Xapian::TermIterator t_copy = t;
1716  TEST_EQUAL(t, t_copy);
1717 
1718  // test copy constructor creates a copy which compares equal
1719  Xapian::TermIterator t_clone(t);
1720  TEST_EQUAL(t, t_clone);
1721 
1722  vector<string> v(t, tend);
1723 
1724  t = db.termlist_begin(1);
1725  tend = db.termlist_end(1);
1726  vector<string>::const_iterator i;
1727  for (i = v.begin(); i != v.end(); ++i) {
1728  TEST_NOT_EQUAL(t, tend);
1729  TEST_EQUAL(*i, *t);
1730  t++;
1731  }
1732  TEST_EQUAL(t, tend);
1733 }
1734 
1735 static Xapian::TermIterator
1737 {
1738  Xapian::Database db(get_database("apitest_onedoc"));
1739  return db.termlist_begin(1);
1740 }
1741 
1742 // tests that a Xapian::TermIterator still works when the DB is deleted
1743 DEFINE_TESTCASE(termlist3, backend) {
1745  Xapian::Database db(get_database("apitest_onedoc"));
1747  Xapian::TermIterator tend = db.termlist_end(1);
1748 
1749  while (t != tend) {
1750  TEST_EQUAL(*t, *u);
1751  t++;
1752  u++;
1753  }
1754 }
1755 
1756 // tests skip_to
1757 DEFINE_TESTCASE(termlist4, backend) {
1758  Xapian::Database db(get_database("apitest_onedoc"));
1760  i.skip_to("");
1761  i.skip_to("\xff");
1762 }
1763 
1764 // tests punctuation is OK in terms (particularly in remote queries)
1765 DEFINE_TESTCASE(puncterms1, backend) {
1766  Xapian::Database db(get_database("apitest_punc"));
1767  Xapian::Enquire enquire(db);
1768 
1769  Xapian::Query q1("semi;colon");
1770  enquire.set_query(q1);
1771  Xapian::MSet m1 = enquire.get_mset(0, 10);
1772 
1773  Xapian::Query q2("col:on");
1774  enquire.set_query(q2);
1775  Xapian::MSet m2 = enquire.get_mset(0, 10);
1776 
1777  Xapian::Query q3("com,ma");
1778  enquire.set_query(q3);
1779  Xapian::MSet m3 = enquire.get_mset(0, 10);
1780 }
1781 
1782 // test that searching for a term with a space or backslash in it works
1783 DEFINE_TESTCASE(spaceterms1, backend) {
1784  Xapian::Enquire enquire(get_database("apitest_space"));
1785  Xapian::MSet mymset;
1786  Xapian::doccount count;
1788  Xapian::Stem stemmer("english");
1789 
1790  enquire.set_query(stemmer("space man"));
1791  mymset = enquire.get_mset(0, 10);
1792  TEST_MSET_SIZE(mymset, 1);
1793  count = 0;
1794  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1795  TEST_EQUAL(count, 1);
1796 
1797  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
1798  TEST_NOT_EQUAL(mymset.begin().get_document().get_data(), "");
1799  TEST_NOT_EQUAL(mymset.begin().get_document().get_value(value_no), "");
1800  }
1801 
1802  enquire.set_query(stemmer("tab\tby"));
1803  mymset = enquire.get_mset(0, 10);
1804  TEST_MSET_SIZE(mymset, 1);
1805  count = 0;
1806  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1807  TEST_EQUAL(count, 1);
1808 
1809  for (Xapian::valueno value_no = 0; value_no < 7; ++value_no) {
1810  string value = mymset.begin().get_document().get_value(value_no);
1811  TEST_NOT_EQUAL(value, "");
1812  if (value_no == 0) {
1813  TEST(value.size() > 262);
1814  TEST_EQUAL(static_cast<unsigned char>(value[262]), 255);
1815  }
1816  }
1817 
1818  enquire.set_query(stemmer("back\\slash"));
1819  mymset = enquire.get_mset(0, 10);
1820  TEST_MSET_SIZE(mymset, 1);
1821  count = 0;
1822  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1823  TEST_EQUAL(count, 1);
1824 }
1825 
1826 // test that XOR queries work
1827 DEFINE_TESTCASE(xor1, backend) {
1828  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1829  Xapian::Stem stemmer("english");
1830 
1831  vector<string> terms;
1832  terms.push_back(stemmer("this"));
1833  terms.push_back(stemmer("word"));
1834  terms.push_back(stemmer("of"));
1835 
1836  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1838  enquire.set_query(query);
1839 
1840  Xapian::MSet mymset = enquire.get_mset(0, 10);
1841  // Docid this word of Match?
1842  // 1 * *
1843  // 2 * * * *
1844  // 3 * *
1845  // 4 * *
1846  // 5 * *
1847  // 6 * *
1848  mset_expect_order(mymset, 1, 2, 5, 6);
1849 }
1850 
1852 DEFINE_TESTCASE(xor2, backend) {
1853  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1854  Xapian::Stem stemmer("english");
1855 
1856  vector<string> terms;
1857  terms.push_back(stemmer("this"));
1858  terms.push_back(stemmer("word"));
1859  terms.push_back(stemmer("of"));
1860 
1861  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1862  enquire.set_query(query);
1863 
1864  Xapian::MSet mymset = enquire.get_mset(0, 10);
1865  // Docid LEN this word of Match?
1866  // 1 28 2 *
1867  // 2 81 5 8 1 *
1868  // 3 15 1 2
1869  // 4 31 1 1
1870  // 5 15 1 *
1871  // 6 15 1 *
1872  mset_expect_order(mymset, 2, 1, 5, 6);
1873 }
1874 
1875 // test Xapian::Database::get_document()
1876 DEFINE_TESTCASE(getdoc1, backend) {
1877  Xapian::Database db(get_database("apitest_onedoc"));
1878  Xapian::Document doc(db.get_document(1));
1884  // Check that Document works as a handle on modification
1885  // (this was broken for the first try at Xapian::Document prior to 0.7).
1886  Xapian::Document doc2 = doc;
1887  doc.set_data("modified!");
1888  TEST_EQUAL(doc.get_data(), "modified!");
1889  TEST_EQUAL(doc.get_data(), doc2.get_data());
1890 }
1891 
1892 // test whether operators with no elements work as a null query
1893 DEFINE_TESTCASE(emptyop1, backend) {
1894  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1895  vector<Xapian::Query> nullvec;
1896 
1897  Xapian::Query query1(Xapian::Query::OP_XOR, nullvec.begin(), nullvec.end());
1898 
1899  enquire.set_query(query1);
1900  Xapian::MSet mymset = enquire.get_mset(0, 10);
1901  TEST_MSET_SIZE(mymset, 0);
1902  // In Xapian < 1.3.0, this gave InvalidArgumentError (because
1903  // query1.empty()) but elsewhere we treat an empty query as just not
1904  // matching any documents, so we now do the same here too.
1906  enquire.get_matching_terms_end(1));
1907 }
1908 
1909 // Regression test for check_at_least SEGV when there are no matches.
1910 DEFINE_TESTCASE(checkatleast1, backend) {
1911  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1912  enquire.set_query(Xapian::Query("thom"));
1913  Xapian::MSet mymset = enquire.get_mset(0, 10, 11);
1914  TEST_EQUAL(0, mymset.size());
1915 }
1916 
1917 // Regression test - if check_at_least was set we returned (check_at_least - 1)
1918 // results, rather than the requested msize. Fixed in 1.0.2.
1919 DEFINE_TESTCASE(checkatleast2, backend) {
1920  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1921  enquire.set_query(Xapian::Query("paragraph"));
1922 
1923  Xapian::MSet mymset = enquire.get_mset(0, 3, 10);
1924  TEST_MSET_SIZE(mymset, 3);
1925  TEST_EQUAL(mymset.get_matches_lower_bound(), 5);
1927 
1928  mymset = enquire.get_mset(0, 2, 4);
1929  TEST_MSET_SIZE(mymset, 2);
1930  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1931  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1934 }
1935 
1936 // Feature tests - check_at_least with various sorting options.
1937 DEFINE_TESTCASE(checkatleast3, backend) {
1938  Xapian::Enquire enquire(get_database("etext"));
1939  enquire.set_query(Xapian::Query("prussian")); // 60 matches.
1940 
1941  for (int order = 0; order < 3; ++order) {
1942  switch (order) {
1943  case 0:
1945  break;
1946  case 1:
1948  break;
1949  case 2:
1951  break;
1952  }
1953 
1954  for (int sort = 0; sort < 7; ++sort) {
1955  bool reverse = (sort & 1);
1956  switch (sort) {
1957  case 0:
1958  enquire.set_sort_by_relevance();
1959  break;
1960  case 1: case 2:
1961  enquire.set_sort_by_value(0, reverse);
1962  break;
1963  case 3: case 4:
1964  enquire.set_sort_by_value_then_relevance(0, reverse);
1965  break;
1966  case 5: case 6:
1967  enquire.set_sort_by_relevance_then_value(0, reverse);
1968  break;
1969  }
1970 
1971  Xapian::MSet mset = enquire.get_mset(0, 100, 500);
1972  TEST_MSET_SIZE(mset, 60);
1973  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1974  TEST_EQUAL(mset.get_matches_estimated(), 60);
1975  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1979 
1980  mset = enquire.get_mset(0, 50, 100);
1981  TEST_MSET_SIZE(mset, 50);
1982  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1983  TEST_EQUAL(mset.get_matches_estimated(), 60);
1984  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1988 
1989  mset = enquire.get_mset(0, 10, 50);
1990  TEST_MSET_SIZE(mset, 10);
1991  TEST_REL(mset.get_matches_lower_bound(),>=,50);
1993  }
1994  }
1995 }
1996 
1997 // tests all document postlists
1998 DEFINE_TESTCASE(allpostlist1, backend) {
1999  Xapian::Database db(get_database("apitest_manydocs"));
2001  unsigned int j = 1;
2002  while (i != db.postlist_end("")) {
2003  TEST_EQUAL(*i, j);
2004  i++;
2005  j++;
2006  }
2007  TEST_EQUAL(j, 513);
2008 
2009  i = db.postlist_begin("");
2010  j = 1;
2011  while (i != db.postlist_end("")) {
2012  TEST_EQUAL(*i, j);
2013  i++;
2014  j++;
2015  if (j == 50) {
2016  j += 10;
2017  i.skip_to(j);
2018  }
2019  }
2020  TEST_EQUAL(j, 513);
2021 }
2022 
2024 {
2025  // Don't bother with postlist_begin() because allpostlist tests cover that.
2027  TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
2028  TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
2030 }
2031 
2032 // tests results of passing an empty term to various methods
2033 DEFINE_TESTCASE(emptyterm1, backend) {
2034  Xapian::Database db(get_database("apitest_manydocs"));
2035  TEST_EQUAL(db.get_doccount(), 512);
2037 
2038  db = get_database("apitest_onedoc");
2039  TEST_EQUAL(db.get_doccount(), 1);
2041 
2042  db = get_database("");
2043  TEST_EQUAL(db.get_doccount(), 0);
2045 }
2046 
2047 // Test for alldocs postlist with a sparse database.
2048 DEFINE_TESTCASE(alldocspl1, backend) {
2049  Xapian::Database db = get_database("alldocspl1",
2050  [](Xapian::WritableDatabase& wdb,
2051  const string&) {
2052  Xapian::Document doc;
2053  doc.set_data("5");
2054  doc.add_value(0, "5");
2055  wdb.replace_document(5, doc);
2056  });
2058  TEST(i != db.postlist_end(""));
2059  TEST_EQUAL(*i, 5);
2060  TEST_EQUAL(i.get_doclength(), 0);
2061  TEST_EQUAL(i.get_unique_terms(), 0);
2062  TEST_EQUAL(i.get_wdf(), 1);
2063  ++i;
2064  TEST(i == db.postlist_end(""));
2065 }
2066 
2067 // Test reading and writing a modified alldocspostlist.
2068 DEFINE_TESTCASE(alldocspl2, writable) {
2069  Xapian::PostingIterator i, end;
2070  {
2072  Xapian::Document doc;
2073  doc.set_data("5");
2074  doc.add_value(0, "5");
2075  db.replace_document(5, doc);
2076 
2077  // Test iterating before committing the changes.
2078  i = db.postlist_begin("");
2079  end = db.postlist_end("");
2080  TEST(i != end);
2081  TEST_EQUAL(*i, 5);
2082  TEST_EQUAL(i.get_doclength(), 0);
2083  TEST_EQUAL(i.get_unique_terms(), 0);
2084  TEST_EQUAL(i.get_wdf(), 1);
2085  ++i;
2086  TEST(i == end);
2087 
2088  db.commit();
2089 
2090  // Test iterating after committing the changes.
2091  i = db.postlist_begin("");
2092  end = db.postlist_end("");
2093  TEST(i != end);
2094  TEST_EQUAL(*i, 5);
2095  TEST_EQUAL(i.get_doclength(), 0);
2096  TEST_EQUAL(i.get_unique_terms(), 0);
2097  TEST_EQUAL(i.get_wdf(), 1);
2098  ++i;
2099  TEST(i == end);
2100 
2101  // Add another document.
2102  doc = Xapian::Document();
2103  doc.set_data("5");
2104  doc.add_value(0, "7");
2105  db.replace_document(7, doc);
2106 
2107  // Test iterating through before committing the changes.
2108  i = db.postlist_begin("");
2109  end = db.postlist_end("");
2110  TEST(i != end);
2111  TEST_EQUAL(*i, 5);
2112  TEST_EQUAL(i.get_doclength(), 0);
2113  TEST_EQUAL(i.get_unique_terms(), 0);
2114  TEST_EQUAL(i.get_wdf(), 1);
2115  ++i;
2116  TEST(i != end);
2117  TEST_EQUAL(*i, 7);
2118  TEST_EQUAL(i.get_doclength(), 0);
2119  TEST_EQUAL(i.get_unique_terms(), 0);
2120  TEST_EQUAL(i.get_wdf(), 1);
2121  ++i;
2122  TEST(i == end);
2123 
2124  // Delete the first document.
2125  db.delete_document(5);
2126 
2127  // Test iterating through before committing the changes.
2128  i = db.postlist_begin("");
2129  end = db.postlist_end("");
2130  TEST(i != end);
2131  TEST_EQUAL(*i, 7);
2132  TEST_EQUAL(i.get_doclength(), 0);
2133  TEST_EQUAL(i.get_unique_terms(), 0);
2134  TEST_EQUAL(i.get_wdf(), 1);
2135  ++i;
2136  TEST(i == end);
2137 
2138  // Test iterating through after committing the changes, and dropping the
2139  // reference to the main DB.
2140  db.commit();
2141  i = db.postlist_begin("");
2142  end = db.postlist_end("");
2143  }
2144 
2145  TEST(i != end);
2146  TEST_EQUAL(*i, 7);
2147  TEST_EQUAL(i.get_doclength(), 0);
2148  TEST_EQUAL(i.get_unique_terms(), 0);
2149  TEST_EQUAL(i.get_wdf(), 1);
2150  ++i;
2151  TEST(i == end);
2152 }
2153 
2154 // Feature test for Query::OP_SCALE_WEIGHT.
2155 DEFINE_TESTCASE(scaleweight1, backend) {
2156  Xapian::Database db(get_database("apitest_phrase"));
2157  Xapian::Enquire enq(db);
2159 
2160  static const char * const queries[] = {
2161  "pad",
2162  "milk fridge",
2163  "leave milk on fridge",
2164  "ordered milk operator",
2165  "ordered phrase operator",
2166  "leave \"milk on fridge\"",
2167  "notpresent",
2168  "leave \"milk notpresent\"",
2169  };
2170  static const double multipliers[] = {
2171  -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2172  0, 0
2173  };
2174 
2175  for (auto qstr : queries) {
2176  tout.str(string());
2177  Xapian::Query query1 = qp.parse_query(qstr);
2178  tout << "query1: " << query1.get_description() << '\n';
2179  for (const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2180  double mult = *multp;
2181  if (mult < 0) {
2184  query1, mult));
2185  continue;
2186  }
2187  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, mult);
2188  tout << "query2: " << query2.get_description() << '\n';
2189 
2190  enq.set_query(query1);
2191  Xapian::MSet mset1 = enq.get_mset(0, 20);
2192  enq.set_query(query2);
2193  Xapian::MSet mset2 = enq.get_mset(0, 20);
2194 
2195  TEST_EQUAL(mset1.size(), mset2.size());
2196 
2197  Xapian::MSetIterator i1, i2;
2198  if (mult > 0) {
2199  for (i1 = mset1.begin(), i2 = mset2.begin();
2200  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2201  TEST_EQUAL_DOUBLE(i1.get_weight() * mult, i2.get_weight());
2202  TEST_EQUAL(*i1, *i2);
2203  }
2204  } else {
2205  // Weights in mset2 are 0; so it should be sorted by docid.
2206  vector<Xapian::docid> ids1;
2207  vector<Xapian::docid> ids2;
2208  for (i1 = mset1.begin(), i2 = mset2.begin();
2209  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2211  TEST_EQUAL_DOUBLE(i2.get_weight(), 0);
2212  ids1.push_back(*i1);
2213  ids2.push_back(*i2);
2214  }
2215  sort(ids1.begin(), ids1.end());
2216  TEST_EQUAL(ids1, ids2);
2217  }
2218  }
2219  }
2220 }
2221 
2222 // Test Query::OP_SCALE_WEIGHT being used to multiply some of the weights of a
2223 // search by zero.
2224 DEFINE_TESTCASE(scaleweight2, backend) {
2225  Xapian::Database db(get_database("apitest_phrase"));
2226  Xapian::Enquire enq(db);
2228 
2229  Xapian::Query query1("fridg");
2230  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, 2.5);
2231  Xapian::Query query3("milk");
2232  Xapian::Query query4(Xapian::Query::OP_SCALE_WEIGHT, query3, 0);
2233  Xapian::Query query5(Xapian::Query::OP_OR, query2, query4);
2234 
2235  // query5 should first return the same results as query1, in the same
2236  // order, and then return the results of query3 which aren't also results
2237  // of query1, in ascending docid order. We test that this happens.
2238 
2239  // First, build a vector of docids matching the first part of the query,
2240  // and append the non-duplicate docids matching the second part of the
2241  // query.
2242  vector<Xapian::docid> ids1;
2243  set<Xapian::docid> idsin1;
2244  vector<Xapian::docid> ids3;
2245 
2246  enq.set_query(query1);
2247  Xapian::MSet mset1 = enq.get_mset(0, 20);
2248  enq.set_query(query3);
2249  Xapian::MSet mset3 = enq.get_mset(0, 20);
2250  TEST_NOT_EQUAL(mset1.size(), 0);
2251  for (i = mset1.begin(); i != mset1.end(); ++i) {
2252  ids1.push_back(*i);
2253  idsin1.insert(*i);
2254  }
2255  TEST_NOT_EQUAL(mset3.size(), 0);
2256  for (i = mset3.begin(); i != mset3.end(); ++i) {
2257  if (idsin1.find(*i) != idsin1.end())
2258  continue;
2259  ids3.push_back(*i);
2260  }
2261  sort(ids3.begin(), ids3.end());
2262  ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2263 
2264  // Now, run the combined query and build a vector of the matching docids.
2265  vector<Xapian::docid> ids5;
2266  enq.set_query(query5);
2267  Xapian::MSet mset5 = enq.get_mset(0, 20);
2268  for (i = mset5.begin(); i != mset5.end(); ++i) {
2269  ids5.push_back(*i);
2270  }
2271 
2272  TEST_EQUAL(ids1, ids5);
2273 }
2274 
2275 // Feature test for Database::get_uuid().
2276 DEFINE_TESTCASE(uuid1, backend && !multi) {
2277  SKIP_TEST_FOR_BACKEND("inmemory");
2278  Xapian::Database db = get_database("apitest_simpledata");
2279  string uuid1 = db.get_uuid();
2280  TEST_EQUAL(uuid1.size(), 36);
2281 
2282  // A database with no sub-databases has an empty UUID.
2283  Xapian::Database db2;
2284  TEST(db2.get_uuid().empty());
2285 
2286  db2.add_database(db);
2287  TEST_EQUAL(uuid1, db2.get_uuid());
2288 
2289  // Multi-database has multiple UUIDs (we don't define the format exactly
2290  // so this assumes something about the implementation).
2291  db2.add_database(db);
2292  TEST_EQUAL(uuid1 + ":" + uuid1, db2.get_uuid());
2293 
2294 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
2295  // This relies on InMemory databases not supporting uuids.
2296  // A multi-database containing a database with no uuid has no uuid.
2298  TEST(db2.get_uuid().empty());
2299 #endif
2300 }
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
static void print_mset_percentages(const Xapian::MSet &mset)
Definition: api_anydb.cc:53
static Xapian::TermIterator test_termlist3_helper()
Definition: api_anydb.cc:1736
DEFINE_TESTCASE(zerodocid1, backend)
Definition: api_anydb.cc:117
static void print_mset_weights(const Xapian::MSet &mset)
Definition: api_anydb.cc:44
static void test_emptyterm1_helper(Xapian::Database &db)
Definition: api_anydb.cc:2023
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:86
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:84
bool operator()(const string &tname) const override
Do we want this term in the ESet?
Definition: api_anydb.cc:546
Class implementing a "boolean" weighting scheme.
Definition: weight.h:678
An indexed database of documents.
Definition: database.h:75
Xapian::doccount get_termfreq(std::string_view term) const
Get the number of documents indexed by a specified term.
Definition: database.cc:262
PostingIterator postlist_begin(std::string_view term) const
Start iterating the postings of a term.
Definition: database.cc:192
TermIterator termlist_begin(Xapian::docid did) const
Start iterating the terms in a document.
Definition: database.cc:200
PositionIterator positionlist_begin(Xapian::docid did, std::string_view term) const
Start iterating positions for a term in a document.
Definition: database.cc:221
size_t size() const
Return number of shards in this Database object.
Definition: database.cc:105
void add_database(const Database &other)
Add shards from another Database.
Definition: database.h:109
bool term_exists(std::string_view term) const
Test is a particular term is present in any document.
Definition: database.cc:378
Xapian::termcount get_collection_freq(std::string_view term) const
Get the total number of occurrences of a specified term.
Definition: database.cc:273
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
PostingIterator postlist_end(std::string_view) const noexcept
End iterator corresponding to postlist_begin().
Definition: database.h:258
TermIterator termlist_end(Xapian::docid) const noexcept
End iterator corresponding to termlist_begin().
Definition: database.h:271
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
Definition: database.cc:368
std::string get_uuid() const
Get the UUID for the database.
Definition: database.cc:505
Indicates an attempt to access a document not present in the database.
Definition: error.h:662
Class representing a document.
Definition: document.h:64
void set_data(std::string_view data)
Set the document data.
Definition: document.cc:81
std::string get_data() const
Get the document data.
Definition: document.cc:75
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
Definition: document.cc:185
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Definition: document.cc:191
Iterator over a Xapian::ESet.
Definition: eset.h:157
double get_weight() const
Get the weight for the current position.
Class representing a list of search results.
Definition: eset.h:42
Xapian::termcount size() const
Return number of items in this ESet object.
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Definition: eset.h:341
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:329
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:324
Querying session.
Definition: enquire.h:57
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
Definition: enquire.cc:85
static const int USE_EXACT_TERMFREQ
Flag telling get_eset() to always use the exact term frequency.
Definition: enquire.h:479
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
TermIterator get_matching_terms_begin(docid did) const
Iterate query terms matching a document.
Definition: enquire.cc:210
void set_sort_by_value_then_relevance(valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: enquire.cc:123
void set_cutoff(int percent_threshold, double weight_threshold=0)
Set lower bounds on percentage and/or weight.
Definition: enquire.cc:172
void set_expansion_scheme(std::string_view eweightname, double expand_k=1.0) const
Set the weighting scheme to use for expansion.
Definition: enquire.cc:216
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
ESet get_eset(termcount maxitems, const RSet &rset, int flags=0, const ExpandDecider *edecider=NULL, double min_weight=0.0) const
Perform query expansion.
Definition: enquire.cc:231
void set_sort_by_relevance_then_value(valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: enquire.cc:144
void set_sort_by_relevance()
Set the sorting to be by relevance only.
Definition: enquire.cc:97
void set_sort_by_value(valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: enquire.cc:103
void set_collapse_key(valueno collapse_key, doccount collapse_max=1)
Control collapsing of results.
Definition: enquire.cc:165
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: enquire.cc:91
@ DESCENDING
docids sort in descending order.
Definition: enquire.h:134
@ ASCENDING
docids sort in ascending order (default)
Definition: enquire.h:132
@ DONT_CARE
docids sort in whatever order is most efficient for the backend.
Definition: enquire.h:136
static const int INCLUDE_QUERY_TERMS
Flag telling get_eset() to allow query terms in Xapian::ESet.
Definition: enquire.h:469
TermIterator get_matching_terms_end(docid) const noexcept
End iterator corresponding to get_matching_terms_begin().
Definition: enquire.h:435
ExpandDecider subclass which restrict terms to a particular prefix.
Virtual base class for expand decider functor.
Definition: expanddecider.h:38
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
Iterator over a Xapian::MSet.
Definition: mset.h:535
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:711
double get_weight() const
Get the weight for the current position.
Definition: msetiterator.cc:55
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: msetiterator.cc:45
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount get_termfreq(std::string_view term) const
Get the termfreq of a term.
Definition: mset.cc:281
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: mset.cc:368
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: mset.cc:356
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: mset.cc:346
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: mset.cc:340
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: mset.cc:275
double get_termweight(std::string_view term) const
Get the term weight of a term.
Definition: mset.cc:300
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: mset.cc:334
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
Definition: mset.h:774
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:803
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786
double get_max_attained() const
The maximum weight attained by any document.
Definition: mset.cc:362
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: mset.cc:318
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: mset.cc:324
Class for iterating over a list of terms.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
void skip_to(Xapian::docid did)
Advance the iterator to document did.
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:516
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
Definition: queryparser.cc:174
Class representing a query.
Definition: query.h:45
std::string get_description() const
Return a string describing this object.
Definition: query.cc:307
op
Query operators.
Definition: query.h:78
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166
@ OP_XOR
Match documents which an odd number of subqueries match.
Definition: query.h:107
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Class representing a set of documents judged as relevant.
Definition: rset.h:39
void add_document(Xapian::docid did)
Mark a document as relevant.
Definition: rset.cc:55
Class representing a stemming algorithm.
Definition: stem.h:74
Class for iterating over a list of terms.
Definition: termiterator.h:41
void skip_to(std::string_view term)
Advance the iterator to term term.
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:1297
This class provides read/write access to a database.
Definition: database.h:964
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: database.cc:567
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a document in the database.
Definition: database.cc:582
void commit()
Commit pending modifications.
Definition: database.cc:543
string term
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:277
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
Definition: constants.h:182
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
static Xapian::Stem stemmer
Definition: stemtest.cc:42
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
a generic test suite engine
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:298
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:293
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:303
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails.
Definition: testsuite.h:265
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:110
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:224
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:45
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:112
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Definition: testutils.h:77
static int verbose
Definition: xapian-delve.cc:46
Public interfaces for the Xapian library.