xapian-core  1.4.27
api_anydb.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2024 Olly Betts
7  * Copyright 2006,2008 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #include <config.h>
27 
28 #include "api_anydb.h"
29 
30 #include <algorithm>
31 #include <string>
32 
33 #define XAPIAN_DEPRECATED(X) X
34 #include <xapian.h>
35 #include "testsuite.h"
36 #include "testutils.h"
37 
38 #include "apitest.h"
39 
40 #include <list>
41 
42 using namespace std;
43 
44 static void
46 {
47  Xapian::MSetIterator i = mset.begin();
48  for ( ; i != mset.end(); ++i) {
49  tout << " " << i.get_weight();
50  }
51 }
52 
53 static void
55 {
56  Xapian::MSetIterator i = mset.begin();
57  for ( ; i != mset.end(); ++i) {
58  tout << " " << mset.convert_to_percent(i);
59  }
60 }
61 
62 static Xapian::Query
64  const string & t1 = string(), const string & t2 = string(),
65  const string & t3 = string(), const string & t4 = string(),
66  const string & t5 = string(), const string & t6 = string(),
67  const string & t7 = string(), const string & t8 = string(),
68  const string & t9 = string(), const string & t10 = string())
69 {
70  vector<string> v;
71  Xapian::Stem stemmer("english");
72  if (!t1.empty()) v.push_back(stemmer(t1));
73  if (!t2.empty()) v.push_back(stemmer(t2));
74  if (!t3.empty()) v.push_back(stemmer(t3));
75  if (!t4.empty()) v.push_back(stemmer(t4));
76  if (!t5.empty()) v.push_back(stemmer(t5));
77  if (!t6.empty()) v.push_back(stemmer(t6));
78  if (!t7.empty()) v.push_back(stemmer(t7));
79  if (!t8.empty()) v.push_back(stemmer(t8));
80  if (!t9.empty()) v.push_back(stemmer(t9));
81  if (!t10.empty()) v.push_back(stemmer(t10));
82  return Xapian::Query(op, v.begin(), v.end());
83 }
84 
85 static Xapian::Query
87  const string & t1 = string(), const string & t2 = string(),
88  const string & t3 = string(), const string & t4 = string(),
89  const string & t5 = string(), const string & t6 = string(),
90  const string & t7 = string(), const string & t8 = string(),
91  const string & t9 = string(), const string & t10 = string())
92 {
93  vector<string> v;
94  Xapian::Stem stemmer("english");
95  if (!t1.empty()) v.push_back(stemmer(t1));
96  if (!t2.empty()) v.push_back(stemmer(t2));
97  if (!t3.empty()) v.push_back(stemmer(t3));
98  if (!t4.empty()) v.push_back(stemmer(t4));
99  if (!t5.empty()) v.push_back(stemmer(t5));
100  if (!t6.empty()) v.push_back(stemmer(t6));
101  if (!t7.empty()) v.push_back(stemmer(t7));
102  if (!t8.empty()) v.push_back(stemmer(t8));
103  if (!t9.empty()) v.push_back(stemmer(t9));
104  if (!t10.empty()) v.push_back(stemmer(t10));
105  return Xapian::Query(op, v.begin(), v.end(), parameter);
106 }
107 
108 static Xapian::Query
109 query(const string &t)
110 {
111  return Xapian::Query(Xapian::Stem("english")(t));
112 }
113 
114 // #######################################################################
115 // # Tests start here
116 
117 // tests that the backend doesn't return zero docids
118 DEFINE_TESTCASE(zerodocid1, backend) {
119  // open the database (in this case a simple text file
120  // we prepared earlier)
121 
122  Xapian::Database mydb(get_database("apitest_onedoc"));
123 
124  Xapian::Enquire enquire(mydb);
125 
126  // make a simple query, with one word in it - "word".
127  enquire.set_query(Xapian::Query("word"));
128 
129  // retrieve the top ten results (we only expect one)
130  Xapian::MSet mymset = enquire.get_mset(0, 10);
131 
132  // We've done the query, now check that the result is what
133  // we expect (1 document, with non-zero docid)
134  TEST_MSET_SIZE(mymset, 1);
135 
136  TEST_AND_EXPLAIN(*(mymset.begin()) != 0,
137  "A query on a database returned a zero docid");
138 }
139 
140 // tests that an empty query returns no matches
141 DEFINE_TESTCASE(emptyquery1, backend) {
142  Xapian::Enquire enquire(get_database("apitest_simpledata"));
143 
144  enquire.set_query(Xapian::Query());
145  Xapian::MSet mymset = enquire.get_mset(0, 10);
146  TEST_MSET_SIZE(mymset, 0);
147  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
148  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
149  TEST_EQUAL(mymset.get_matches_estimated(), 0);
153 
154  vector<Xapian::Query> v;
155  enquire.set_query(Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()));
156  mymset = enquire.get_mset(0, 10);
157  TEST_MSET_SIZE(mymset, 0);
158  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
159  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
160  TEST_EQUAL(mymset.get_matches_estimated(), 0);
164 }
165 
166 // tests the document count for a simple query
167 DEFINE_TESTCASE(simplequery1, backend) {
168  Xapian::Enquire enquire(get_database("apitest_simpledata"));
169  enquire.set_query(Xapian::Query("word"));
170  Xapian::MSet mymset = enquire.get_mset(0, 10);
171  TEST_MSET_SIZE(mymset, 2);
172 }
173 
174 // tests for the right documents and weights returned with simple query
175 DEFINE_TESTCASE(simplequery2, backend) {
176  // open the database (in this case a simple text file
177  // we prepared earlier)
178  Xapian::Database db = get_database("apitest_simpledata");
179  Xapian::Enquire enquire(db);
180  enquire.set_query(Xapian::Query("word"));
181 
182  // retrieve the top results
183  Xapian::MSet mymset = enquire.get_mset(0, 10);
184 
185  // We've done the query, now check that the result is what
186  // we expect (documents 2 and 4)
187  mset_expect_order(mymset, 2, 4);
188 
189  // Check the weights
190  Xapian::MSetIterator i = mymset.begin();
191  // These weights are for BM25Weight(1,0,1,0.5,0.5)
192  TEST_EQUAL_DOUBLE(i.get_weight(), 1.04648168717725);
193  i++;
194  TEST_EQUAL_DOUBLE(i.get_weight(), 0.640987686595914);
195 }
196 
197 // tests for the right document count for another simple query
198 DEFINE_TESTCASE(simplequery3, backend) {
199  Xapian::Enquire enquire(get_database("apitest_simpledata"));
200  enquire.set_query(query("this"));
201  Xapian::MSet mymset = enquire.get_mset(0, 10);
202 
203  // Check that 6 documents were returned.
204  TEST_MSET_SIZE(mymset, 6);
205 }
206 
207 // test that a multidb with 3 dbs query returns correct docids
208 DEFINE_TESTCASE(multidb2, backend && !multi) {
209  Xapian::Database mydb2(get_database("apitest_simpledata"));
210  mydb2.add_database(get_database("apitest_simpledata2"));
211  mydb2.add_database(get_database("apitest_termorder"));
212  Xapian::Enquire enquire(mydb2);
213 
214  // make a query
215  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
217  enquire.set_query(myquery);
218 
219  // retrieve the top ten results
220  Xapian::MSet mymset = enquire.get_mset(0, 10);
221  mset_expect_order(mymset, 2, 3, 4, 10);
222 }
223 
224 // tests that when specifying maxitems to get_mset, no more than
225 // that are returned.
226 DEFINE_TESTCASE(msetmaxitems1, backend) {
227  Xapian::Enquire enquire(get_database("apitest_simpledata"));
228  enquire.set_query(query("this"));
229  Xapian::MSet mymset = enquire.get_mset(0, 1);
230  TEST_MSET_SIZE(mymset, 1);
231 
232  mymset = enquire.get_mset(0, 5);
233  TEST_MSET_SIZE(mymset, 5);
234 }
235 
236 // tests the returned weights are as expected (regression test for remote
237 // backend which was using the average weight rather than the actual document
238 // weight for computing weights - fixed in 1.0.0).
239 DEFINE_TESTCASE(expandweights1, backend) {
240  Xapian::Enquire enquire(get_database("apitest_simpledata"));
241  enquire.set_query(Xapian::Query("this"));
242 
243  Xapian::MSet mymset = enquire.get_mset(0, 10);
244 
245  Xapian::RSet myrset;
246  Xapian::MSetIterator i = mymset.begin();
247  myrset.add_document(*i);
248  myrset.add_document(*(++i));
249 
250  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
251  TEST_EQUAL(eset.size(), 3);
252  TEST_REL(eset.get_ebound(), >=, eset.size());
253  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
254  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
255  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
256 
257  // Test non-default k too.
258  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ, 2.0);
259  TEST_EQUAL(eset.size(), 3);
260  TEST_REL(eset.get_ebound(), >=, eset.size());
261  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 5.88109547674955);
262  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 5.88109547674955);
263  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.44473599216144);
264 }
265 
266 // Just like test_expandweights1 but without USE_EXACT_TERMFREQ.
267 DEFINE_TESTCASE(expandweights2, backend) {
268  Xapian::Enquire enquire(get_database("apitest_simpledata"));
269  enquire.set_query(Xapian::Query("this"));
270 
271  Xapian::MSet mymset = enquire.get_mset(0, 10);
272 
273  Xapian::RSet myrset;
274  Xapian::MSetIterator i = mymset.begin();
275  myrset.add_document(*i);
276  myrset.add_document(*(++i));
277 
278  Xapian::ESet eset = enquire.get_eset(3, myrset);
279  TEST_EQUAL(eset.size(), 3);
280  TEST_REL(eset.get_ebound(), >=, eset.size());
281  // With a multi backend, the top three terms all happen to occur in both
282  // shard so their termfreq is exactly known even without
283  // USE_EXACT_TERMFREQ and so the weights should be the same for all
284  // test harness backends.
285  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
286  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
287  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
288 }
289 
290 DEFINE_TESTCASE(expandweights3, backend) {
291  Xapian::Enquire enquire(get_database("apitest_simpledata"));
292  enquire.set_query(Xapian::Query("this"));
293 
294  Xapian::MSet mymset = enquire.get_mset(0, 10);
295 
296  Xapian::RSet myrset;
297  Xapian::MSetIterator i = mymset.begin();
298  myrset.add_document(*i);
299  myrset.add_document(*(++i));
300 
301  // Set min_wt to 6.0
302  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 0, 6.0);
303  TEST_EQUAL(eset.size(), 2);
304  TEST_REL(eset.get_ebound(), >=, eset.size());
305  // With a multi backend, the top two terms all happen to occur in both
306  // shard so their termfreq is exactly known even without
307  // USE_EXACT_TERMFREQ and so the weights should be the same for all
308  // test harness backends.
309  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
310  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
311 }
312 
313 // tests that negative weights are returned
314 DEFINE_TESTCASE(expandweights4, backend) {
315  Xapian::Enquire enquire(get_database("apitest_simpledata"));
316  enquire.set_query(Xapian::Query("paragraph"));
317 
318  Xapian::MSet mymset = enquire.get_mset(0, 10);
319 
320  Xapian::RSet myrset;
321  Xapian::MSetIterator i = mymset.begin();
322  myrset.add_document(*i);
323  myrset.add_document(*(++i));
324 
325  Xapian::ESet eset = enquire.get_eset(37, myrset, 0, 0, -100);
326  // Now include negative weights
327  TEST_EQUAL(eset.size(), 37);
328  TEST_REL(eset.get_ebound(), >=, eset.size());
329  TEST_REL(eset[36].get_weight(), <, 0);
330  TEST_REL(eset[36].get_weight(), >=, -100);
331 }
332 
333 // test for Bo1EWeight
334 DEFINE_TESTCASE(expandweights5, backend) {
335  Xapian::Enquire enquire(get_database("apitest_simpledata"));
336  enquire.set_query(Xapian::Query("this"));
337 
338  Xapian::MSet mymset = enquire.get_mset(0, 10);
339 
340  Xapian::RSet myrset;
341  Xapian::MSetIterator i = mymset.begin();
342  myrset.add_document(*i);
343  myrset.add_document(*(++i));
344 
345  enquire.set_expansion_scheme("bo1");
346  Xapian::ESet eset = enquire.get_eset(3, myrset);
347 
348  TEST_EQUAL(eset.size(), 3);
349  TEST_REL(eset.get_ebound(), >=, eset.size());
350  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 7.21765284821702);
351  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.661623193760022);
352  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.58090119783738);
353 }
354 
355 // test that "prob" and "trad" can be set as the expansion scheme.
356 DEFINE_TESTCASE(expandweights6, backend) {
357  Xapian::Enquire enquire(get_database("apitest_simpledata"));
358  enquire.set_query(Xapian::Query("this"));
359 
360  Xapian::MSet mymset = enquire.get_mset(0, 10);
361 
362  Xapian::RSet myrset;
363  Xapian::MSetIterator i = mymset.begin();
364  myrset.add_document(*i);
365  myrset.add_document(*(++i));
366 
367  enquire.set_expansion_scheme("prob");
368  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
369 
370  TEST_EQUAL(eset.size(), 3);
371  TEST_REL(eset.get_ebound(), >=, eset.size());
372  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
373  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
374  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
375 
376  // Older scheme name "trad" (alias for "prob").
377  enquire.set_expansion_scheme("trad");
378  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
379 
380  TEST_EQUAL(eset.size(), 3);
381  TEST_REL(eset.get_ebound(), >=, eset.size());
382  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
383  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
384  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
385 }
386 
387 // test that invalid scheme names are not accepted
388 DEFINE_TESTCASE(expandweights7, backend) {
389  Xapian::Enquire enquire(get_database("apitest_simpledata"));
390 
392  enquire.set_expansion_scheme("no_such_scheme"));
393 }
394 
395 // test that "expand_k" can be passed as a parameter to get_eset
396 DEFINE_TESTCASE(expandweights8, backend) {
397  Xapian::Enquire enquire(get_database("apitest_simpledata"));
398  enquire.set_query(Xapian::Query("this"));
399 
400  Xapian::MSet mymset = enquire.get_mset(0, 10);
401 
402  Xapian::RSet myrset;
403  Xapian::MSetIterator i = mymset.begin();
404  myrset.add_document(*i);
405  myrset.add_document(*(++i));
406 
407  // Set expand_k to 1.0 and min_wt to 0
408  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 1.0, 0, 0);
409  // With a multi backend, the top three terms all happen to occur in both
410  // shard so their termfreq is exactly known even without
411  // USE_EXACT_TERMFREQ and so the weights should be the same for all
412  // test harness backends.
413  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
414  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
415  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
416  TEST_REL(eset.back().get_weight(),>=,0);
417 }
418 
419 // tests that when specifying maxitems to get_eset, no more than
420 // that are returned.
421 DEFINE_TESTCASE(expandmaxitems1, backend) {
422  Xapian::Enquire enquire(get_database("apitest_simpledata"));
423  enquire.set_query(Xapian::Query("this"));
424 
425  Xapian::MSet mymset = enquire.get_mset(0, 10);
426  tout << "mymset.size() = " << mymset.size() << '\n';
427  TEST(mymset.size() >= 2);
428 
429  Xapian::RSet myrset;
430  Xapian::MSetIterator i = mymset.begin();
431  myrset.add_document(*i);
432  myrset.add_document(*(++i));
433 
434  Xapian::ESet myeset = enquire.get_eset(1, myrset);
435  TEST_EQUAL(myeset.size(), 1);
436  TEST_REL(myeset.get_ebound(), >=, myeset.size());
437 }
438 
439 // tests that a pure boolean query has all weights set to 0
440 DEFINE_TESTCASE(boolquery1, backend) {
441  Xapian::Query myboolquery(query("this"));
442 
443  // open the database (in this case a simple text file
444  // we prepared earlier)
445  Xapian::Enquire enquire(get_database("apitest_simpledata"));
446  enquire.set_query(myboolquery);
448 
449  // retrieve the top results
450  Xapian::MSet mymset = enquire.get_mset(0, 10);
451 
452  TEST_NOT_EQUAL(mymset.size(), 0);
453  TEST_EQUAL(mymset.get_max_possible(), 0);
454  for (Xapian::MSetIterator i = mymset.begin(); i != mymset.end(); ++i) {
455  TEST_EQUAL(i.get_weight(), 0);
456  }
457 }
458 
459 // tests that get_mset() specifying "this" works as expected
460 DEFINE_TESTCASE(msetfirst1, backend) {
461  Xapian::Enquire enquire(get_database("apitest_simpledata"));
462  enquire.set_query(query("this"));
463  Xapian::MSet mymset1 = enquire.get_mset(0, 6);
464  Xapian::MSet mymset2 = enquire.get_mset(3, 3);
465  TEST(mset_range_is_same(mymset1, 3, mymset2, 0, 3));
466 
467  // Regression test - we weren't adjusting the index into items[] by
468  // firstitem in api/omenquire.cc.
469  TEST_EQUAL(mymset1[5].get_document().get_data(),
470  mymset2[2].get_document().get_data());
471 }
472 
473 // tests the converting-to-percent functions
474 DEFINE_TESTCASE(topercent1, backend) {
475  Xapian::Enquire enquire(get_database("apitest_simpledata"));
476  enquire.set_query(query("this"));
477  Xapian::MSet mymset = enquire.get_mset(0, 20);
478 
479  int last_pct = 100;
480  Xapian::MSetIterator i = mymset.begin();
481  for ( ; i != mymset.end(); ++i) {
482  int pct = mymset.convert_to_percent(i);
483  TEST_AND_EXPLAIN(pct == i.get_percent(),
484  "convert_to_%(msetitor) != convert_to_%(wt)");
486  "convert_to_%(msetitor) != convert_to_%(wt)");
487  TEST_AND_EXPLAIN(pct >= 0 && pct <= 100,
488  "percentage out of range: " << pct);
489  TEST_AND_EXPLAIN(pct <= last_pct, "percentage increased down mset");
490  last_pct = pct;
491  }
492 }
493 
494 // tests the percentage values returned
495 DEFINE_TESTCASE(topercent2, backend) {
496  Xapian::Enquire enquire(get_database("apitest_simpledata"));
497 
498  int pct;
499 
500  // First, test a search in which the top document scores 100%.
501  enquire.set_query(query("this"));
502  Xapian::MSet mymset = enquire.get_mset(0, 20);
503 
504  Xapian::MSetIterator i = mymset.begin();
505  TEST(i != mymset.end());
506  pct = mymset.convert_to_percent(i);
507  TEST_EQUAL(pct, 100);
508 
509  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
510  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
511  TEST_EQUAL(mymset.get_matches_estimated(), 6);
512  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 0.0553904060041786);
513  TEST_EQUAL(mymset.size(), 6);
514  mset_expect_order(mymset, 2, 1, 3, 5, 6, 4);
515 
516  // A search in which the top document doesn't have 100%
518  "this", "line", "paragraph", "rubbish");
519  enquire.set_query(q);
520  mymset = enquire.get_mset(0, 20);
521 
522  i = mymset.begin();
523  TEST(i != mymset.end());
524  pct = mymset.convert_to_percent(i);
525  TEST_REL(pct,>,60);
526  TEST_REL(pct,<,76);
527 
528  ++i;
529 
530  TEST(i != mymset.end());
531  pct = mymset.convert_to_percent(i);
532  TEST_REL(pct,>,40);
533  TEST_REL(pct,<,50);
534 
535  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
536  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
537  TEST_EQUAL(mymset.get_matches_estimated(), 6);
538  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 1.67412192414056);
539  TEST_EQUAL(mymset.size(), 6);
540  mset_expect_order(mymset, 3, 1, 4, 2, 5, 6);
541 }
542 
544  public:
545  bool operator()(const string& tname) const override {
546  unsigned long sum = 0;
547  for (unsigned ch : tname) {
548  sum += ch;
549  }
550 // if (verbose) {
551 // tout << tname << "==> " << sum << "\n";
552 // }
553  return (sum % 2) == 0;
554  }
555 };
556 
557 // tests the expand decision functor
558 DEFINE_TESTCASE(expandfunctor1, backend) {
559  Xapian::Enquire enquire(get_database("apitest_simpledata"));
560  enquire.set_query(Xapian::Query("this"));
561 
562  Xapian::MSet mymset = enquire.get_mset(0, 10);
563  TEST(mymset.size() >= 2);
564 
565  Xapian::RSet myrset;
566  Xapian::MSetIterator i = mymset.begin();
567  myrset.add_document(*i);
568  myrset.add_document(*(++i));
569 
570  EvenParityExpandFunctor myfunctor;
571 
572  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
573  unsigned int neweset_size = 0;
574  Xapian::ESetIterator j = myeset_orig.begin();
575  for ( ; j != myeset_orig.end(); ++j) {
576  if (myfunctor(*j)) neweset_size++;
577  }
578  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
579 
580 #if 0
581  // Compare myeset with the hand-filtered version of myeset_orig.
582  if (verbose) {
583  tout << "orig_eset: ";
584  copy(myeset_orig.begin(), myeset_orig.end(),
585  ostream_iterator<Xapian::ESetItem>(tout, " "));
586  tout << "\n";
587 
588  tout << "new_eset: ";
589  copy(myeset.begin(), myeset.end(),
590  ostream_iterator<Xapian::ESetItem>(tout, " "));
591  tout << "\n";
592  }
593 #endif
594  Xapian::ESetIterator orig = myeset_orig.begin();
595  Xapian::ESetIterator filt = myeset.begin();
596  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
597  // skip over items that shouldn't be in myeset
598  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
599  ++orig;
600  }
601 
602  TEST_AND_EXPLAIN(*orig == *filt &&
603  orig.get_weight() == filt.get_weight(),
604  "Mismatch in items " << *orig << " vs. " << *filt
605  << " after filtering");
606  }
607 
608  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
609  ++orig;
610  }
611 
612  TEST_EQUAL(orig, myeset_orig.end());
613  TEST_AND_EXPLAIN(filt == myeset.end(),
614  "Extra items in the filtered eset.");
615 }
616 
617 DEFINE_TESTCASE(expanddeciderfilterprefix2, backend) {
618  Xapian::Enquire enquire(get_database("apitest_simpledata"));
619  enquire.set_query(Xapian::Query("this"));
620 
621  Xapian::MSet mymset = enquire.get_mset(0, 10);
622  TEST(mymset.size() >= 2);
623 
624  Xapian::RSet myrset;
625  Xapian::MSetIterator i = mymset.begin();
626  myrset.add_document(*i);
627  myrset.add_document(*(++i));
628 
629  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
630  unsigned int neweset_size = 0;
631 
632  // Choose the first char in the first term as prefix.
633  Xapian::ESetIterator j = myeset_orig.begin();
634  TEST(myeset_orig.size() >= 1);
635  string prefix(*j, 0, 1);
636  Xapian::ExpandDeciderFilterPrefix myfunctor(prefix);
637 
638  for ( ; j != myeset_orig.end(); ++j) {
639  if (myfunctor(*j)) neweset_size++;
640  }
641  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
642 
643  Xapian::ESetIterator orig = myeset_orig.begin();
644  Xapian::ESetIterator filt = myeset.begin();
645  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
646  // skip over items that shouldn't be in myeset
647  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
648  ++orig;
649  }
650 
651  TEST_AND_EXPLAIN(*orig == *filt &&
652  orig.get_weight() == filt.get_weight(),
653  "Mismatch in items " << *orig << " vs. " << *filt
654  << " after filtering");
655  }
656 
657  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
658  ++orig;
659  }
660 
661  TEST_EQUAL(orig, myeset_orig.end());
662  TEST_AND_EXPLAIN(filt == myeset.end(),
663  "Extra items in the filtered eset.");
664 }
665 
666 // tests the percent cutoff option
667 DEFINE_TESTCASE(pctcutoff1, backend) {
668  Xapian::Enquire enquire(get_database("apitest_simpledata"));
670  "this", "line", "paragraph", "rubbish"));
671  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
672 
673  if (verbose) {
674  tout << "Original mset pcts:";
675  print_mset_percentages(mymset1);
676  tout << "\n";
677  }
678 
679  unsigned int num_items = 0;
680  int my_pct = 100;
681  int changes = 0;
682  Xapian::MSetIterator i = mymset1.begin();
683  int c = 0;
684  for ( ; i != mymset1.end(); ++i, ++c) {
685  int new_pct = mymset1.convert_to_percent(i);
686  if (new_pct != my_pct) {
687  changes++;
688  if (changes > 3) break;
689  num_items = c;
690  my_pct = new_pct;
691  }
692  }
693 
694  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
695  if (verbose) {
696  tout << "Cutoff percent: " << my_pct << "\n";
697  }
698 
699  enquire.set_cutoff(my_pct);
700  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
701 
702  if (verbose) {
703  tout << "Percentages after cutoff:";
704  print_mset_percentages(mymset2);
705  tout << "\n";
706  }
707 
708  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
709  "Match with % cutoff lost too many items");
710 
711  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
712  (mymset2.convert_to_percent(mymset2[num_items]) == my_pct &&
713  mymset2.convert_to_percent(mymset2.back()) == my_pct),
714  "Match with % cutoff returned too many items");
715 }
716 
717 // Tests the percent cutoff option combined with collapsing
718 DEFINE_TESTCASE(pctcutoff2, backend) {
719  Xapian::Enquire enquire(get_database("apitest_simpledata"));
721  Xapian::MSet mset = enquire.get_mset(0, 100);
722 
723  if (verbose) {
724  tout << "Original mset pcts:";
726  tout << "\n";
727  }
728 
729  TEST(mset.size() >= 2);
730  TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
731 
732  int cutoff = mset[0].get_percent() + mset[1].get_percent();
733  cutoff /= 2;
734 
735  enquire.set_cutoff(cutoff);
736  enquire.set_collapse_key(1234); // Value which is always empty.
737 
738  Xapian::MSet mset2 = enquire.get_mset(0, 1);
739  TEST_EQUAL(mset2.size(), 1);
740  TEST_REL(mset2.get_matches_lower_bound(),>=,1);
742  mset2.get_matches_lower_bound());
747 }
748 
749 // Test that the percent cutoff option returns all the answers it should.
750 DEFINE_TESTCASE(pctcutoff3, backend) {
751  Xapian::Enquire enquire(get_database("apitest_simpledata"));
752  enquire.set_query(Xapian::Query("this"));
753  Xapian::MSet mset1 = enquire.get_mset(0, 10);
754 
755  if (verbose) {
756  tout << "Original mset pcts:";
757  print_mset_percentages(mset1);
758  tout << "\n";
759  }
760 
761  int percent = 100;
762  for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
763  int new_percent = mset1.convert_to_percent(i);
764  if (new_percent != percent) {
765  tout.str(string());
766  tout << "Testing " << percent << "% cutoff\n";
767  enquire.set_cutoff(percent);
768  Xapian::MSet mset2 = enquire.get_mset(0, 10);
769  TEST_EQUAL(mset2.back().get_percent(), percent);
770  TEST_EQUAL(mset2.size(), i.get_rank());
771  percent = new_percent;
772  }
773  }
774 }
775 
776 // tests the cutoff option
777 DEFINE_TESTCASE(cutoff1, backend) {
778  Xapian::Enquire enquire(get_database("apitest_simpledata"));
780  "this", "line", "paragraph", "rubbish"));
781  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
782 
783  if (verbose) {
784  tout << "Original mset weights:";
785  print_mset_weights(mymset1);
786  tout << "\n";
787  }
788 
789  unsigned int num_items = 0;
790  double my_wt = -100;
791  int changes = 0;
792  Xapian::MSetIterator i = mymset1.begin();
793  int c = 0;
794  for ( ; i != mymset1.end(); ++i, ++c) {
795  double new_wt = i.get_weight();
796  if (new_wt != my_wt) {
797  changes++;
798  if (changes > 3) break;
799  num_items = c;
800  my_wt = new_wt;
801  }
802  }
803 
804  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
805  if (verbose) {
806  tout << "Cutoff weight: " << my_wt << "\n";
807  }
808 
809  enquire.set_cutoff(0, my_wt);
810  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
811 
812  if (verbose) {
813  tout << "Weights after cutoff:";
814  print_mset_weights(mymset2);
815  tout << "\n";
816  }
817 
818  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
819  "Match with cutoff lost too many items");
820 
821  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
822  (mymset2[num_items].get_weight() == my_wt &&
823  mymset2.back().get_weight() == my_wt),
824  "Match with cutoff returned too many items");
825 }
826 
827 // tests the allow query terms expand option
828 DEFINE_TESTCASE(allowqterms1, backend) {
829  Xapian::Enquire enquire(get_database("apitest_simpledata"));
830  string term = "paragraph";
831  enquire.set_query(Xapian::Query(term));
832 
833  Xapian::MSet mymset = enquire.get_mset(0, 10);
834  TEST(mymset.size() >= 2);
835 
836  Xapian::RSet myrset;
837  Xapian::MSetIterator i = mymset.begin();
838  myrset.add_document(*i);
839  myrset.add_document(*(++i));
840 
841  Xapian::ESet myeset = enquire.get_eset(1000, myrset);
842  Xapian::ESetIterator j = myeset.begin();
843  for ( ; j != myeset.end(); ++j) {
844  TEST_NOT_EQUAL(*j, term);
845  }
846 
847  Xapian::ESet myeset2 = enquire.get_eset(1000, myrset, Xapian::Enquire::INCLUDE_QUERY_TERMS);
848  j = myeset2.begin();
849  for ( ; j != myeset2.end(); ++j) {
850  if (*j == term) break;
851  }
852  TEST(j != myeset2.end());
853 }
854 
855 // tests that the MSet max_attained works
856 DEFINE_TESTCASE(maxattain1, backend) {
857  Xapian::Enquire enquire(get_database("apitest_simpledata"));
858  enquire.set_query(query("this"));
859  Xapian::MSet mymset = enquire.get_mset(0, 100);
860 
861  double mymax = 0;
862  Xapian::MSetIterator i = mymset.begin();
863  for ( ; i != mymset.end(); ++i) {
864  if (i.get_weight() > mymax) mymax = i.get_weight();
865  }
866  TEST_EQUAL(mymax, mymset.get_max_attained());
867 }
868 
869 // tests a reversed boolean query
870 DEFINE_TESTCASE(reversebool1, backend) {
871  Xapian::Enquire enquire(get_database("apitest_simpledata"));
872  enquire.set_query(Xapian::Query("this"));
874 
875  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
876  TEST_AND_EXPLAIN(mymset1.size() > 1,
877  "Mset was too small to test properly");
878 
880  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
882  Xapian::MSet mymset3 = enquire.get_mset(0, 100);
883 
884  // mymset1 and mymset2 should be identical
885  TEST_EQUAL(mymset1.size(), mymset2.size());
886 
887  {
888  Xapian::MSetIterator i = mymset1.begin();
889  Xapian::MSetIterator j = mymset2.begin();
890  for ( ; i != mymset1.end(); ++i, j++) {
891  TEST(j != mymset2.end());
892  // if this fails, then setting match_sort_forward=true was not
893  // the same as the default.
894  TEST_EQUAL(*i, *j);
895  }
896  TEST(j == mymset2.end());
897  }
898 
899  // mymset1 and mymset3 should be same but reversed
900  TEST_EQUAL(mymset1.size(), mymset3.size());
901 
902  {
903  Xapian::MSetIterator i = mymset1.begin();
904  Xapian::MSetIterator j = mymset3.end();
905  for ( ; i != mymset1.end(); ++i) {
906  --j;
907  // if this fails, then setting match_sort_forward=false didn't
908  // reverse the results.
909  TEST_EQUAL(*i, *j);
910  }
911  }
912 }
913 
914 // tests a reversed boolean query, where the full mset isn't returned
915 DEFINE_TESTCASE(reversebool2, backend) {
916  Xapian::Enquire enquire(get_database("apitest_simpledata"));
917  enquire.set_query(Xapian::Query("this"));
919 
920  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
921 
922  TEST_AND_EXPLAIN(mymset1.size() > 1,
923  "Mset was too small to test properly");
924 
926  Xapian::doccount msize = mymset1.size() / 2;
927  Xapian::MSet mymset2 = enquire.get_mset(0, msize);
929  Xapian::MSet mymset3 = enquire.get_mset(0, msize);
930 
931  // mymset2 should be first msize items of mymset1
932  TEST_EQUAL(msize, mymset2.size());
933  {
934  Xapian::MSetIterator i = mymset1.begin();
935  Xapian::MSetIterator j = mymset2.begin();
936  for ( ; j != mymset2.end(); ++i, ++j) {
937  TEST(i != mymset1.end());
938  // if this fails, then setting match_sort_forward=true was not
939  // the same as the default.
940  TEST_EQUAL(*i, *j);
941  }
942  // mymset1 should be larger.
943  TEST(i != mymset1.end());
944  }
945 
946  // mymset3 should be last msize items of mymset1, in reverse order
947  TEST_EQUAL(msize, mymset3.size());
948  {
949  Xapian::MSetIterator i = mymset1.end();
951  for (j = mymset3.begin(); j != mymset3.end(); ++j) {
952  // if this fails, then setting match_sort_forward=false didn't
953  // reverse the results.
954  --i;
955  TEST_EQUAL(*i, *j);
956  }
957  }
958 }
959 
960 // tests that get_matching_terms() returns the terms in the right order
961 DEFINE_TESTCASE(getmterms1, backend) {
962  list<string> answers_list;
963  answers_list.push_back("one");
964  answers_list.push_back("two");
965  answers_list.push_back("three");
966  answers_list.push_back("four");
967 
968  Xapian::Database mydb(get_database("apitest_termorder"));
969  Xapian::Enquire enquire(mydb);
970 
973  Xapian::Query("one", 1, 1),
974  Xapian::Query("three", 1, 3)),
976  Xapian::Query("four", 1, 4),
977  Xapian::Query("two", 1, 2)));
978 
979  enquire.set_query(myquery);
980 
981  Xapian::MSet mymset = enquire.get_mset(0, 10);
982 
983  TEST_MSET_SIZE(mymset, 1);
984  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
985  enquire.get_matching_terms_end(mymset.begin()));
986  TEST(list == answers_list);
987 }
988 
989 // tests that get_matching_terms() returns the terms only once
990 DEFINE_TESTCASE(getmterms2, backend) {
991  list<string> answers_list;
992  answers_list.push_back("one");
993  answers_list.push_back("two");
994  answers_list.push_back("three");
995 
996  Xapian::Database mydb(get_database("apitest_termorder"));
997  Xapian::Enquire enquire(mydb);
998 
1001  Xapian::Query("one", 1, 1),
1002  Xapian::Query("three", 1, 3)),
1004  Xapian::Query("one", 1, 4),
1005  Xapian::Query("two", 1, 2)));
1006 
1007  enquire.set_query(myquery);
1008 
1009  Xapian::MSet mymset = enquire.get_mset(0, 10);
1010 
1011  TEST_MSET_SIZE(mymset, 1);
1012  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1013  enquire.get_matching_terms_end(mymset.begin()));
1014  TEST(list == answers_list);
1015 }
1016 
1017 // test that running a query twice returns the same results
1018 DEFINE_TESTCASE(repeatquery1, backend) {
1019  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1020  enquire.set_query(Xapian::Query("this"));
1021 
1022  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1023 
1024  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1025  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1026  TEST_EQUAL(mymset1, mymset2);
1027 }
1028 
1029 // test that prefetching documents works (at least, gives same results)
1030 DEFINE_TESTCASE(fetchdocs1, backend) {
1031  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1032  enquire.set_query(Xapian::Query("this"));
1033 
1034  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1035 
1036  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1037  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1038  TEST_EQUAL(mymset1, mymset2);
1039  mymset2.fetch(mymset2[0], mymset2[mymset2.size() - 1]);
1040  mymset2.fetch(mymset2.begin(), mymset2.end());
1041  mymset2.fetch(mymset2.begin());
1042  mymset2.fetch();
1043 
1044  Xapian::MSetIterator it1 = mymset1.begin();
1045  Xapian::MSetIterator it2 = mymset2.begin();
1046 
1047  while (it1 != mymset1.end() && it2 != mymset2.end()) {
1049  it2.get_document().get_data());
1050  TEST_NOT_EQUAL(it1.get_document().get_data(), "");
1051  TEST_NOT_EQUAL(it2.get_document().get_data(), "");
1052  it1++;
1053  it2++;
1054  }
1055  TEST_EQUAL(it1, mymset1.end());
1056  TEST_EQUAL(it1, mymset2.end());
1057 }
1058 
1059 // test that searching for a term not in the database fails nicely
1060 DEFINE_TESTCASE(absentterm1, backend) {
1061  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1063  enquire.set_query(Xapian::Query("frink"));
1064 
1065  Xapian::MSet mymset = enquire.get_mset(0, 10);
1066  mset_expect_order(mymset);
1067 }
1068 
1069 // as absentterm1, but setting query from a vector of terms
1070 DEFINE_TESTCASE(absentterm2, backend) {
1071  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1072  vector<string> terms;
1073  terms.push_back("frink");
1074 
1075  Xapian::Query query(Xapian::Query::OP_OR, terms.begin(), terms.end());
1076  enquire.set_query(query);
1077 
1078  Xapian::MSet mymset = enquire.get_mset(0, 10);
1079  mset_expect_order(mymset);
1080 }
1081 
1082 // test that rsets do sensible things
1083 DEFINE_TESTCASE(rset1, backend) {
1084  Xapian::Database mydb(get_database("apitest_rset"));
1085  Xapian::Enquire enquire(mydb);
1086  Xapian::Query myquery = query(Xapian::Query::OP_OR, "giraffe", "tiger");
1087  enquire.set_query(myquery);
1088 
1089  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1090 
1091  Xapian::RSet myrset;
1092  myrset.add_document(1);
1093 
1094  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1095 
1096  // We should have the same documents turn up, but 1 and 3 should
1097  // have higher weights with the RSet.
1098  TEST_MSET_SIZE(mymset1, 3);
1099  TEST_MSET_SIZE(mymset2, 3);
1100 }
1101 
1103 DEFINE_TESTCASE(rset2, backend) {
1104  Xapian::Database mydb(get_database("apitest_rset"));
1105  Xapian::Enquire enquire(mydb);
1106  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
1107  enquire.set_query(myquery);
1108 
1109  // Test with the default BM25Weight, then with TradWeight.
1110  for (int i = 0; i < 2; ++i) {
1111  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1112 
1113  Xapian::RSet myrset;
1114  myrset.add_document(2);
1115 
1116  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1117 
1118  mset_expect_order(mymset1, 1, 2);
1119  // Document 2 should have higher weight than document 1 despite the wdf
1120  // of "people" being 1 because "people" indexes a document in the RSet
1121  // whereas "cuddly" (wdf=2) does not.
1122  mset_expect_order(mymset2, 2, 1);
1124  }
1125 }
1126 
1127 // test that rsets behave correctly with multiDBs
1128 DEFINE_TESTCASE(rsetmultidb1, backend && !multi) {
1129  Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
1130  Xapian::Database mydb2(get_database("apitest_rset"));
1131  mydb2.add_database(get_database("apitest_simpledata2"));
1132 
1133  Xapian::Enquire enquire1(mydb1);
1134  Xapian::Enquire enquire2(mydb2);
1135 
1136  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "multiple");
1137 
1138  enquire1.set_query(myquery);
1139  enquire2.set_query(myquery);
1140 
1141  Xapian::RSet myrset1;
1142  Xapian::RSet myrset2;
1143  myrset1.add_document(4);
1144  myrset2.add_document(2);
1145 
1146  Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
1147  Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
1148  Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
1149  Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
1150 
1151  mset_expect_order(mymset1a, 1, 4);
1152  mset_expect_order(mymset1b, 4, 1);
1153  mset_expect_order(mymset2a, 1, 2);
1154  mset_expect_order(mymset2b, 2, 1);
1155 
1156  TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
1157  TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
1158  TEST_NOT_EQUAL(mymset1a, mymset1b);
1159  TEST_NOT_EQUAL(mymset2a, mymset2b);
1160 }
1161 
1162 // regression tests - used to cause assertion in stats.h to fail
1163 // Doesn't actually fail for multi but it doesn't make sense to run there.
1164 DEFINE_TESTCASE(rsetmultidb3, backend && !multi) {
1165  Xapian::Enquire enquire(get_database("apitest_simpledata2"));
1166  enquire.set_query(query(Xapian::Query::OP_OR, "cuddly", "people"));
1167  Xapian::MSet mset = enquire.get_mset(0, 10); // used to fail assertion
1168 }
1169 
1171 DEFINE_TESTCASE(eliteset1, backend && !multi) {
1172  Xapian::Database mydb(get_database("apitest_simpledata"));
1173  Xapian::Enquire enquire(mydb);
1174 
1175  Xapian::Query myquery1 = query(Xapian::Query::OP_OR, "word");
1176 
1178  "simple", "word");
1179 
1180  enquire.set_query(myquery1, 2); // So the query lengths are the same.
1181  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1182 
1183  enquire.set_query(myquery2);
1184  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1185 
1186  TEST_EQUAL(mymset1, mymset2);
1187 }
1188 
1190 DEFINE_TESTCASE(elitesetmulti1, multi) {
1191  Xapian::Database mydb(get_database("apitest_simpledata"));
1192  Xapian::Enquire enquire(mydb);
1193 
1195  "simple", "word");
1196 
1197  enquire.set_query(myquery2);
1198  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1199 
1200  // For a sharded database, the elite set is resolved per shard and can
1201  // select different terms because the max term weights vary with the
1202  // per-shard term statistics. I can't see a feasible way to create
1203  // an equivalent MSet to compare with so for now at least we hard-code
1204  // the expected values.
1205  TEST_EQUAL(mymset2.size(), 3);
1206  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1207  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1208  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1209  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.1736756775723788948);
1210  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.0464816871772451012);
1211  mset_expect_order(mymset2, 2, 4, 5);
1212  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.0464816871772451012);
1213  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.64098768659591376373);
1214  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.46338869498075929698);
1215 }
1216 
1219 DEFINE_TESTCASE(eliteset2, backend && !multi) {
1220  Xapian::Database mydb(get_database("apitest_simpledata"));
1221  Xapian::Enquire enquire(mydb);
1222 
1223  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1224 
1225  vector<Xapian::Query> qs;
1226  qs.push_back(query("this"));
1227  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1229  qs.begin(), qs.end(), 1);
1230 
1231  enquire.set_query(myquery1);
1232  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1233 
1234  enquire.set_query(myquery2);
1235  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1236 
1237  TEST_EQUAL(mymset1, mymset2);
1238 }
1239 
1241 DEFINE_TESTCASE(elitesetmulti2, multi) {
1242  Xapian::Database mydb(get_database("apitest_simpledata"));
1243  Xapian::Enquire enquire(mydb);
1244 
1245  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1246 
1247  vector<Xapian::Query> qs;
1248  qs.push_back(query("this"));
1249  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1251  qs.begin(), qs.end(), 1);
1252 
1253  enquire.set_query(myquery2);
1254  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1255 
1256  // For a sharded database, the elite set is resolved per shard and can
1257  // select different terms because the max term weights vary with the
1258  // per-shard term statistics. I can't see a feasible way to create
1259  // an equivalent MSet to compare with so for now at least we hard-code
1260  // the expected values.
1261  TEST_EQUAL(mymset2.size(), 4);
1262  TEST_EQUAL(mymset2.get_matches_lower_bound(), 4);
1263  TEST_EQUAL(mymset2.get_matches_estimated(), 4);
1264  TEST_EQUAL(mymset2.get_matches_upper_bound(), 4);
1265  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 2.6585705165783908299);
1266  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.9700834242150864206);
1267  mset_expect_order(mymset2, 2, 1, 3, 5);
1268  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.9700834242150864206);
1269  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.051103097360122341775);
1270  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.043131803408968119595);
1271  TEST_EQUAL_DOUBLE(mymset2[3].get_weight(), 0.043131803408968119595);
1272 }
1273 
1274 
1277 DEFINE_TESTCASE(eliteset3, backend) {
1278  Xapian::Database mydb1(get_database("apitest_simpledata"));
1279  Xapian::Enquire enquire1(mydb1);
1280 
1281  Xapian::Database mydb2(get_database("apitest_simpledata"));
1282  Xapian::Enquire enquire2(mydb2);
1283 
1284  // make a query
1285  Xapian::Stem stemmer("english");
1286 
1287  string term1 = stemmer("word");
1288  string term2 = stemmer("rubbish");
1289  string term3 = stemmer("banana");
1290 
1291  vector<string> terms;
1292  terms.push_back(term1);
1293  terms.push_back(term2);
1294  terms.push_back(term3);
1295 
1296  Xapian::Query myquery1(Xapian::Query::OP_OR, terms.begin(), terms.end());
1297  enquire1.set_query(myquery1);
1298 
1299  Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET, terms.begin(), terms.end(), 3);
1300  enquire2.set_query(myquery2);
1301 
1302  // retrieve the results
1303  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1304  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1305 
1306  TEST_EQUAL(mymset1, mymset2);
1307 
1308  TEST_EQUAL(mymset1.get_termfreq(term1),
1309  mymset2.get_termfreq(term1));
1310  TEST_EQUAL(mymset1.get_termweight(term1),
1311  mymset2.get_termweight(term1));
1312  TEST_EQUAL(mymset1.get_termfreq(term2),
1313  mymset2.get_termfreq(term2));
1314  TEST_EQUAL(mymset1.get_termweight(term2),
1315  mymset2.get_termweight(term2));
1316  TEST_EQUAL(mymset1.get_termfreq(term3),
1317  mymset2.get_termfreq(term3));
1318  TEST_EQUAL(mymset1.get_termweight(term3),
1319  mymset2.get_termweight(term3));
1320 }
1321 
1323 DEFINE_TESTCASE(eliteset4, backend && !multi) {
1324  Xapian::Database mydb1(get_database("apitest_simpledata"));
1325  Xapian::Enquire enquire1(mydb1);
1326 
1327  Xapian::Database mydb2(get_database("apitest_simpledata"));
1328  Xapian::Enquire enquire2(mydb2);
1329 
1330  Xapian::Query myquery1 = query("rubbish");
1332  "word", "rubbish", "fibble");
1333  enquire1.set_query(myquery1);
1334  enquire2.set_query(myquery2);
1335 
1336  // retrieve the results
1337  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1338  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1339 
1340  TEST_NOT_EQUAL(mymset2.size(), 0);
1341  TEST_EQUAL(mymset1, mymset2);
1342 }
1343 
1345 DEFINE_TESTCASE(elitesetmulti4, multi) {
1346  Xapian::Database mydb2(get_database("apitest_simpledata"));
1347  Xapian::Enquire enquire2(mydb2);
1348 
1350  "word", "rubbish", "fibble");
1351  enquire2.set_query(myquery2);
1352 
1353  // retrieve the results
1354  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1355 
1356  // For a sharded database, the elite set is resolved per shard and can
1357  // select different terms because the max term weights vary with the
1358  // per-shard term statistics. I can't see a feasible way to create
1359  // an equivalent MSet to compare with so for now at least we hard-code
1360  // the expected values.
1361  TEST_EQUAL(mymset2.size(), 3);
1362  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1363  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1364  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1365  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.4848948390060121572);
1366  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.4848948390060121572);
1367  mset_expect_order(mymset2, 3, 2, 4);
1368  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.4848948390060121572);
1369  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 1.0464816871772451012);
1370  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.64098768659591376373);
1371 }
1372 
1374 DEFINE_TESTCASE(eliteset5, backend) {
1375  Xapian::Database mydb1(get_database("apitest_simpledata"));
1376  Xapian::Enquire enquire1(mydb1);
1377 
1378  vector<string> v;
1379  for (int i = 0; i != 3; ++i) {
1380  v.push_back("simpl");
1381  v.push_back("queri");
1382 
1383  v.push_back("rubbish");
1384  v.push_back("rubbish");
1385  v.push_back("rubbish");
1386  v.push_back("word");
1387  v.push_back("word");
1388  v.push_back("word");
1389  }
1390 
1391  for (Xapian::termcount n = 1; n != v.size(); ++n) {
1393  v.begin(), v.end(), n);
1395  myquery1,
1396  0.004);
1397 
1398  enquire1.set_query(myquery1);
1399  // On architectures with excess precision (or, at least, on x86), the
1400  // following call used to result in a segfault (at least when n=1).
1401  enquire1.get_mset(0, 10);
1402  }
1403 }
1404 
1406 DEFINE_TESTCASE(termlisttermfreq1, backend) {
1407  Xapian::Database mydb(get_database("apitest_simpledata"));
1408  Xapian::Enquire enquire(mydb);
1409  Xapian::Stem stemmer("english");
1410  Xapian::RSet rset1;
1411  Xapian::RSet rset2;
1412  rset1.add_document(5);
1413  rset2.add_document(6);
1414 
1415  Xapian::ESet eset1 = enquire.get_eset(1000, rset1);
1416  Xapian::ESet eset2 = enquire.get_eset(1000, rset2);
1417 
1418  // search for weight of term 'another'
1419  string theterm = stemmer("another");
1420 
1421  double wt1 = 0;
1422  double wt2 = 0;
1423  {
1424  Xapian::ESetIterator i = eset1.begin();
1425  for ( ; i != eset1.end(); ++i) {
1426  if (*i == theterm) {
1427  wt1 = i.get_weight();
1428  break;
1429  }
1430  }
1431  }
1432  {
1433  Xapian::ESetIterator i = eset2.begin();
1434  for ( ; i != eset2.end(); ++i) {
1435  if (*i == theterm) {
1436  wt2 = i.get_weight();
1437  break;
1438  }
1439  }
1440  }
1441 
1442  TEST_NOT_EQUAL(wt1, 0);
1443  TEST_NOT_EQUAL(wt2, 0);
1444  TEST_EQUAL(wt1, wt2);
1445 }
1446 
1448 DEFINE_TESTCASE(qterminfo1, backend) {
1449  Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
1450  Xapian::Enquire enquire1(mydb1);
1451 
1452  Xapian::Database mydb2(get_database("apitest_simpledata"));
1453  mydb2.add_database(get_database("apitest_simpledata2"));
1454  Xapian::Enquire enquire2(mydb2);
1455 
1456  // make a query
1457  Xapian::Stem stemmer("english");
1458 
1459  string term1 = stemmer("word");
1460  string term2 = stemmer("inmemory");
1461  string term3 = stemmer("flibble");
1462 
1464  Xapian::Query(term1),
1466  Xapian::Query(term2),
1467  Xapian::Query(term3)));
1468  enquire1.set_query(myquery);
1469  enquire2.set_query(myquery);
1470 
1471  for (int i = 1; i <= 2; ++i) {
1472  // Retrieve the results.
1473  Xapian::MSet mymset1a = enquire1.get_mset(0, 0);
1474  Xapian::MSet mymset2a = enquire2.get_mset(0, 0);
1475 
1476  TEST_EQUAL(mymset1a.get_termfreq(term1),
1477  mymset2a.get_termfreq(term1));
1478  TEST_EQUAL(mymset1a.get_termfreq(term2),
1479  mymset2a.get_termfreq(term2));
1480  TEST_EQUAL(mymset1a.get_termfreq(term3),
1481  mymset2a.get_termfreq(term3));
1482 
1483  TEST_EQUAL(mymset1a.get_termfreq(term1), 3);
1484  TEST_EQUAL(mymset1a.get_termfreq(term2), 1);
1485  TEST_EQUAL(mymset1a.get_termfreq(term3), 0);
1486 
1487  TEST_NOT_EQUAL(mymset1a.get_termweight(term1), 0);
1488  TEST_NOT_EQUAL(mymset1a.get_termweight(term2), 0);
1489  // Non-existent terms should have zero weight.
1490  TEST_EQUAL(mymset1a.get_termweight(term3), 0);
1491 
1492  TEST_EQUAL(mymset1a.get_termfreq(stemmer("banana")), 1);
1494  mymset1a.get_termweight(stemmer("banana")));
1495 
1496  TEST_EQUAL(mymset1a.get_termfreq("sponge"), 0);
1498  mymset1a.get_termweight("sponge"));
1499 
1500  // Repeat tests with TradWeight. (Regression test to ensure
1501  // non-existent terms get zero weight with TradWeight.)
1504  }
1505 }
1506 
1508 DEFINE_TESTCASE(qterminfo2, backend) {
1509  Xapian::Database db(get_database("apitest_simpledata"));
1510  Xapian::Enquire enquire(db);
1511 
1512  // make a query
1513  Xapian::Stem stemmer("english");
1514 
1515  string term1 = stemmer("paragraph");
1516  string term2 = stemmer("another");
1517 
1518  enquire.set_query(Xapian::Query(term1));
1519  Xapian::MSet mset0 = enquire.get_mset(0, 10);
1520 
1521  TEST_NOT_EQUAL(mset0.get_termweight("paragraph"), 0);
1522 
1524  Xapian::Query(Xapian::Query::OP_AND, term1, term2));
1525  enquire.set_query(query);
1526 
1527  // retrieve the results
1528  // Note: get_mset() used to throw "AssertionError" in debug builds
1529  Xapian::MSet mset = enquire.get_mset(0, 10);
1530 
1531  TEST_NOT_EQUAL(mset.get_termweight("paragraph"), 0);
1532 }
1533 
1534 // tests that when specifying that no items are to be returned, those
1535 // statistics which should be the same are.
1536 DEFINE_TESTCASE(msetzeroitems1, backend) {
1537  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1538  enquire.set_query(query("this"));
1539  Xapian::MSet mymset1 = enquire.get_mset(0, 0);
1540 
1541  Xapian::MSet mymset2 = enquire.get_mset(0, 1);
1542 
1543  TEST_EQUAL(mymset1.get_max_possible(), mymset2.get_max_possible());
1544 }
1545 
1546 // test that the matches_* of a simple query are as expected
1547 DEFINE_TESTCASE(matches1, backend) {
1548  bool remote = get_dbtype().find("remote") != string::npos;
1549 
1550  Xapian::Database db = get_database("apitest_simpledata");
1551  Xapian::Enquire enquire(db);
1552  Xapian::Query myquery;
1553  Xapian::MSet mymset;
1554 
1555  myquery = query("word");
1556  enquire.set_query(myquery);
1557  mymset = enquire.get_mset(0, 10);
1558  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1559  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1560  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1564 
1565  myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
1566  enquire.set_query(myquery);
1567  mymset = enquire.get_mset(0, 10);
1568  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1569  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1570  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1574 
1575  myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
1576  enquire.set_query(myquery);
1577  mymset = enquire.get_mset(0, 10);
1578  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1579  TEST_EQUAL(mymset.get_matches_estimated(), 0);
1580  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
1584 
1585  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1586  enquire.set_query(myquery);
1587  mymset = enquire.get_mset(0, 10);
1588  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1589  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1590  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1594 
1595  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1596  enquire.set_query(myquery);
1597  mymset = enquire.get_mset(0, 0);
1598  if (db.size() == 1) {
1599  // This isn't true for sharded DBs since there one sub-database has 3
1600  // documents and simple and word both have termfreq of 2, so the
1601  // matcher can tell at least one document must match!)
1602  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1603  }
1605  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1606  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1610 
1611  mymset = enquire.get_mset(0, 1);
1612  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1613  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1614  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1615  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1616  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1617  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1618 
1619  mymset = enquire.get_mset(0, 2);
1620  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1621  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1622  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1623  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1624  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1625  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1626 
1627  myquery = query(Xapian::Query::OP_AND, "paragraph", "another");
1628  enquire.set_query(myquery);
1629  mymset = enquire.get_mset(0, 0);
1630  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1631  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1632  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1633  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1634  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1635  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1636 
1637  mymset = enquire.get_mset(0, 1);
1638  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1639  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1640  if (db.size() > 1 && remote) {
1641  // The matcher can tell there's only one match in this case.
1642  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1643  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1644  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1645  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1646  } else {
1647  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1648  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1649  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1650  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1651  }
1652 
1653  mymset = enquire.get_mset(0, 2);
1654  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1655  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1656  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1657  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1658  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1659  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1660 
1661  mymset = enquire.get_mset(1, 20);
1662  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1663  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1664  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1665  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1666  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1667  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1668 }
1669 
1670 // tests that wqf affects the document weights
1671 DEFINE_TESTCASE(wqf1, backend) {
1672  // Both queries have length 2; in q1 word has wqf=2, in q2 word has wqf=1
1673  Xapian::Query q1("word", 2);
1674  Xapian::Query q2("word");
1675  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1676  enquire.set_query(q1);
1677  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1678  enquire.set_query(q2);
1679  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1680  // Check the weights
1681  TEST(mset1.begin().get_weight() > mset2.begin().get_weight());
1682 }
1683 
1684 // tests that query length affects the document weights
1685 DEFINE_TESTCASE(qlen1, backend) {
1686  Xapian::Query q1("word");
1687  Xapian::Query q2("word");
1688  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1689  enquire.set_query(q1);
1690  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1691  enquire.set_query(q2);
1692  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1693  // Check the weights
1694  // TEST(mset1.begin().get_weight() < mset2.begin().get_weight());
1695  TEST(mset1.begin().get_weight() == mset2.begin().get_weight());
1696 }
1697 
1698 // tests that opening a non-existent termlist throws the correct exception
1699 DEFINE_TESTCASE(termlist1, backend) {
1700  Xapian::Database db(get_database("apitest_onedoc"));
1705  /* Cause the database to be used properly, showing up problems
1706  * with the link being in a bad state. CME */
1707  Xapian::TermIterator temp = db.termlist_begin(1);
1709  Xapian::TermIterator t = db.termlist_begin(999999999));
1710 }
1711 
1712 // tests that a Xapian::TermIterator works as an STL iterator
1713 DEFINE_TESTCASE(termlist2, backend) {
1714  Xapian::Database db(get_database("apitest_onedoc"));
1716  Xapian::TermIterator tend = db.termlist_end(1);
1717 
1718  // test operator= creates a copy which compares equal
1719  Xapian::TermIterator t_copy = t;
1720  TEST_EQUAL(t, t_copy);
1721 
1722  // test copy constructor creates a copy which compares equal
1723  Xapian::TermIterator t_clone(t);
1724  TEST_EQUAL(t, t_clone);
1725 
1726  vector<string> v(t, tend);
1727 
1728  t = db.termlist_begin(1);
1729  tend = db.termlist_end(1);
1730  vector<string>::const_iterator i;
1731  for (i = v.begin(); i != v.end(); ++i) {
1732  TEST_NOT_EQUAL(t, tend);
1733  TEST_EQUAL(*i, *t);
1734  t++;
1735  }
1736  TEST_EQUAL(t, tend);
1737 }
1738 
1739 static Xapian::TermIterator
1741 {
1742  Xapian::Database db(get_database("apitest_onedoc"));
1743  return db.termlist_begin(1);
1744 }
1745 
1746 // tests that a Xapian::TermIterator still works when the DB is deleted
1747 DEFINE_TESTCASE(termlist3, backend) {
1749  Xapian::Database db(get_database("apitest_onedoc"));
1751  Xapian::TermIterator tend = db.termlist_end(1);
1752 
1753  while (t != tend) {
1754  TEST_EQUAL(*t, *u);
1755  t++;
1756  u++;
1757  }
1758 }
1759 
1760 // tests skip_to
1761 DEFINE_TESTCASE(termlist4, backend) {
1762  Xapian::Database db(get_database("apitest_onedoc"));
1764  i.skip_to("");
1765  i.skip_to("\xff");
1766 }
1767 
1768 // tests punctuation is OK in terms (particularly in remote queries)
1769 DEFINE_TESTCASE(puncterms1, backend) {
1770  Xapian::Database db(get_database("apitest_punc"));
1771  Xapian::Enquire enquire(db);
1772 
1773  Xapian::Query q1("semi;colon");
1774  enquire.set_query(q1);
1775  Xapian::MSet m1 = enquire.get_mset(0, 10);
1776 
1777  Xapian::Query q2("col:on");
1778  enquire.set_query(q2);
1779  Xapian::MSet m2 = enquire.get_mset(0, 10);
1780 
1781  Xapian::Query q3("com,ma");
1782  enquire.set_query(q3);
1783  Xapian::MSet m3 = enquire.get_mset(0, 10);
1784 }
1785 
1786 // test that searching for a term with a space or backslash in it works
1787 DEFINE_TESTCASE(spaceterms1, backend) {
1788  Xapian::Enquire enquire(get_database("apitest_space"));
1789  Xapian::MSet mymset;
1790  Xapian::doccount count;
1792  Xapian::Stem stemmer("english");
1793 
1794  enquire.set_query(stemmer("space man"));
1795  mymset = enquire.get_mset(0, 10);
1796  TEST_MSET_SIZE(mymset, 1);
1797  count = 0;
1798  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1799  TEST_EQUAL(count, 1);
1800 
1801  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
1802  TEST_NOT_EQUAL(mymset.begin().get_document().get_data(), "");
1803  TEST_NOT_EQUAL(mymset.begin().get_document().get_value(value_no), "");
1804  }
1805 
1806  enquire.set_query(stemmer("tab\tby"));
1807  mymset = enquire.get_mset(0, 10);
1808  TEST_MSET_SIZE(mymset, 1);
1809  count = 0;
1810  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1811  TEST_EQUAL(count, 1);
1812 
1813  for (Xapian::valueno value_no = 0; value_no < 7; ++value_no) {
1814  string value = mymset.begin().get_document().get_value(value_no);
1815  TEST_NOT_EQUAL(value, "");
1816  if (value_no == 0) {
1817  TEST(value.size() > 262);
1818  TEST_EQUAL(static_cast<unsigned char>(value[262]), 255);
1819  }
1820  }
1821 
1822  enquire.set_query(stemmer("back\\slash"));
1823  mymset = enquire.get_mset(0, 10);
1824  TEST_MSET_SIZE(mymset, 1);
1825  count = 0;
1826  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1827  TEST_EQUAL(count, 1);
1828 }
1829 
1830 // test that XOR queries work
1831 DEFINE_TESTCASE(xor1, backend) {
1832  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1833  Xapian::Stem stemmer("english");
1834 
1835  vector<string> terms;
1836  terms.push_back(stemmer("this"));
1837  terms.push_back(stemmer("word"));
1838  terms.push_back(stemmer("of"));
1839 
1840  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1842  enquire.set_query(query);
1843 
1844  Xapian::MSet mymset = enquire.get_mset(0, 10);
1845  // Docid this word of Match?
1846  // 1 * *
1847  // 2 * * * *
1848  // 3 * *
1849  // 4 * *
1850  // 5 * *
1851  // 6 * *
1852  mset_expect_order(mymset, 1, 2, 5, 6);
1853 }
1854 
1856 DEFINE_TESTCASE(xor2, backend) {
1857  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1858  Xapian::Stem stemmer("english");
1859 
1860  vector<string> terms;
1861  terms.push_back(stemmer("this"));
1862  terms.push_back(stemmer("word"));
1863  terms.push_back(stemmer("of"));
1864 
1865  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1866  enquire.set_query(query);
1867 
1868  Xapian::MSet mymset = enquire.get_mset(0, 10);
1869  // Docid LEN this word of Match?
1870  // 1 28 2 *
1871  // 2 81 5 8 1 *
1872  // 3 15 1 2
1873  // 4 31 1 1
1874  // 5 15 1 *
1875  // 6 15 1 *
1876  mset_expect_order(mymset, 2, 1, 5, 6);
1877 }
1878 
1879 // test Xapian::Database::get_document()
1880 DEFINE_TESTCASE(getdoc1, backend) {
1881  Xapian::Database db(get_database("apitest_onedoc"));
1882  Xapian::Document doc(db.get_document(1));
1888  // Check that Document works as a handle on modification
1889  // (this was broken for the first try at Xapian::Document prior to 0.7).
1890  Xapian::Document doc2 = doc;
1891  doc.set_data("modified!");
1892  TEST_EQUAL(doc.get_data(), "modified!");
1893  TEST_EQUAL(doc.get_data(), doc2.get_data());
1894 }
1895 
1896 // test whether operators with no elements work as a null query
1897 DEFINE_TESTCASE(emptyop1, backend) {
1898  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1899  vector<Xapian::Query> nullvec;
1900 
1901  Xapian::Query query1(Xapian::Query::OP_XOR, nullvec.begin(), nullvec.end());
1902 
1903  enquire.set_query(query1);
1904  Xapian::MSet mymset = enquire.get_mset(0, 10);
1905  TEST_MSET_SIZE(mymset, 0);
1906  // In Xapian < 1.3.0, this gave InvalidArgumentError (because
1907  // query1.empty()) but elsewhere we treat an empty query as just not
1908  // matching any documents, so we now do the same here too.
1910  enquire.get_matching_terms_end(1));
1911 }
1912 
1913 // Regression test for check_at_least SEGV when there are no matches.
1914 DEFINE_TESTCASE(checkatleast1, backend) {
1915  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1916  enquire.set_query(Xapian::Query("thom"));
1917  Xapian::MSet mymset = enquire.get_mset(0, 10, 11);
1918  TEST_EQUAL(0, mymset.size());
1919 }
1920 
1921 // Regression test - if check_at_least was set we returned (check_at_least - 1)
1922 // results, rather than the requested msize. Fixed in 1.0.2.
1923 DEFINE_TESTCASE(checkatleast2, backend) {
1924  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1925  enquire.set_query(Xapian::Query("paragraph"));
1926 
1927  Xapian::MSet mymset = enquire.get_mset(0, 3, 10);
1928  TEST_MSET_SIZE(mymset, 3);
1929  TEST_EQUAL(mymset.get_matches_lower_bound(), 5);
1931 
1932  mymset = enquire.get_mset(0, 2, 4);
1933  TEST_MSET_SIZE(mymset, 2);
1934  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1935  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1936  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1937  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1938 }
1939 
1940 // Feature tests - check_at_least with various sorting options.
1941 DEFINE_TESTCASE(checkatleast3, backend) {
1942  Xapian::Enquire enquire(get_database("etext"));
1943  enquire.set_query(Xapian::Query("prussian")); // 60 matches.
1944 
1945  for (int order = 0; order < 3; ++order) {
1946  switch (order) {
1947  case 0:
1949  break;
1950  case 1:
1952  break;
1953  case 2:
1955  break;
1956  }
1957 
1958  for (int sort = 0; sort < 7; ++sort) {
1959  bool reverse = (sort & 1);
1960  switch (sort) {
1961  case 0:
1962  enquire.set_sort_by_relevance();
1963  break;
1964  case 1: case 2:
1965  enquire.set_sort_by_value(0, reverse);
1966  break;
1967  case 3: case 4:
1968  enquire.set_sort_by_value_then_relevance(0, reverse);
1969  break;
1970  case 5: case 6:
1971  enquire.set_sort_by_relevance_then_value(0, reverse);
1972  break;
1973  }
1974 
1975  Xapian::MSet mset = enquire.get_mset(0, 100, 500);
1976  TEST_MSET_SIZE(mset, 60);
1977  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1978  TEST_EQUAL(mset.get_matches_estimated(), 60);
1979  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1983 
1984  mset = enquire.get_mset(0, 50, 100);
1985  TEST_MSET_SIZE(mset, 50);
1986  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1987  TEST_EQUAL(mset.get_matches_estimated(), 60);
1988  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1989  TEST_EQUAL(mset.get_uncollapsed_matches_lower_bound(), 60);
1990  TEST_EQUAL(mset.get_uncollapsed_matches_estimated(), 60);
1991  TEST_EQUAL(mset.get_uncollapsed_matches_upper_bound(), 60);
1992 
1993  mset = enquire.get_mset(0, 10, 50);
1994  TEST_MSET_SIZE(mset, 10);
1995  TEST_REL(mset.get_matches_lower_bound(),>=,50);
1996  TEST_REL(mset.get_uncollapsed_matches_lower_bound(),>=,50);
1997  }
1998  }
1999 }
2000 
2001 // tests all document postlists
2002 DEFINE_TESTCASE(allpostlist1, backend) {
2003  Xapian::Database db(get_database("apitest_manydocs"));
2005  unsigned int j = 1;
2006  while (i != db.postlist_end("")) {
2007  TEST_EQUAL(*i, j);
2008  i++;
2009  j++;
2010  }
2011  TEST_EQUAL(j, 513);
2012 
2013  i = db.postlist_begin("");
2014  j = 1;
2015  while (i != db.postlist_end("")) {
2016  TEST_EQUAL(*i, j);
2017  i++;
2018  j++;
2019  if (j == 50) {
2020  j += 10;
2021  i.skip_to(j);
2022  }
2023  }
2024  TEST_EQUAL(j, 513);
2025 }
2026 
2028 {
2029  // Don't bother with postlist_begin() because allpostlist tests cover that.
2031  TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
2032  TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
2034 }
2035 
2036 // tests results of passing an empty term to various methods
2037 DEFINE_TESTCASE(emptyterm1, backend) {
2038  Xapian::Database db(get_database("apitest_manydocs"));
2039  TEST_EQUAL(db.get_doccount(), 512);
2041 
2042  db = get_database("apitest_onedoc");
2043  TEST_EQUAL(db.get_doccount(), 1);
2045 
2046  db = get_database("");
2047  TEST_EQUAL(db.get_doccount(), 0);
2049 }
2050 
2051 // Test for alldocs postlist with a sparse database.
2052 DEFINE_TESTCASE(alldocspl1, backend) {
2053  Xapian::Database db = get_database("alldocspl1",
2054  [](Xapian::WritableDatabase& wdb,
2055  const string&) {
2056  Xapian::Document doc;
2057  doc.set_data("5");
2058  doc.add_value(0, "5");
2059  wdb.replace_document(5, doc);
2060  });
2062  TEST(i != db.postlist_end(""));
2063  TEST_EQUAL(*i, 5);
2064  TEST_EQUAL(i.get_doclength(), 0);
2065  TEST_EQUAL(i.get_unique_terms(), 0);
2066  TEST_EQUAL(i.get_wdf(), 1);
2067  ++i;
2068  TEST(i == db.postlist_end(""));
2069 }
2070 
2071 // Test reading and writing a modified alldocspostlist.
2072 DEFINE_TESTCASE(alldocspl2, writable) {
2073  Xapian::PostingIterator i, end;
2074  {
2076  Xapian::Document doc;
2077  doc.set_data("5");
2078  doc.add_value(0, "5");
2079  db.replace_document(5, doc);
2080 
2081  // Test iterating before committing the changes.
2082  i = db.postlist_begin("");
2083  end = db.postlist_end("");
2084  TEST(i != end);
2085  TEST_EQUAL(*i, 5);
2086  TEST_EQUAL(i.get_doclength(), 0);
2087  TEST_EQUAL(i.get_unique_terms(), 0);
2088  TEST_EQUAL(i.get_wdf(), 1);
2089  ++i;
2090  TEST(i == end);
2091 
2092  db.commit();
2093 
2094  // Test iterating after committing the changes.
2095  i = db.postlist_begin("");
2096  end = db.postlist_end("");
2097  TEST(i != end);
2098  TEST_EQUAL(*i, 5);
2099  TEST_EQUAL(i.get_doclength(), 0);
2100  TEST_EQUAL(i.get_unique_terms(), 0);
2101  TEST_EQUAL(i.get_wdf(), 1);
2102  ++i;
2103  TEST(i == end);
2104 
2105  // Add another document.
2106  doc = Xapian::Document();
2107  doc.set_data("5");
2108  doc.add_value(0, "7");
2109  db.replace_document(7, doc);
2110 
2111  // Test iterating through before committing the changes.
2112  i = db.postlist_begin("");
2113  end = db.postlist_end("");
2114  TEST(i != end);
2115  TEST_EQUAL(*i, 5);
2116  TEST_EQUAL(i.get_doclength(), 0);
2117  TEST_EQUAL(i.get_unique_terms(), 0);
2118  TEST_EQUAL(i.get_wdf(), 1);
2119  ++i;
2120  TEST(i != end);
2121  TEST_EQUAL(*i, 7);
2122  TEST_EQUAL(i.get_doclength(), 0);
2123  TEST_EQUAL(i.get_unique_terms(), 0);
2124  TEST_EQUAL(i.get_wdf(), 1);
2125  ++i;
2126  TEST(i == end);
2127 
2128  // Delete the first document.
2129  db.delete_document(5);
2130 
2131  // Test iterating through before committing the changes.
2132  i = db.postlist_begin("");
2133  end = db.postlist_end("");
2134  TEST(i != end);
2135  TEST_EQUAL(*i, 7);
2136  TEST_EQUAL(i.get_doclength(), 0);
2137  TEST_EQUAL(i.get_unique_terms(), 0);
2138  TEST_EQUAL(i.get_wdf(), 1);
2139  ++i;
2140  TEST(i == end);
2141 
2142  // Test iterating through after committing the changes, and dropping the
2143  // reference to the main DB.
2144  db.commit();
2145  i = db.postlist_begin("");
2146  end = db.postlist_end("");
2147  }
2148 
2149  TEST(i != end);
2150  TEST_EQUAL(*i, 7);
2151  TEST_EQUAL(i.get_doclength(), 0);
2152  TEST_EQUAL(i.get_unique_terms(), 0);
2153  TEST_EQUAL(i.get_wdf(), 1);
2154  ++i;
2155  TEST(i == end);
2156 }
2157 
2158 // Feature test for Query::OP_SCALE_WEIGHT.
2159 DEFINE_TESTCASE(scaleweight1, backend) {
2160  Xapian::Database db(get_database("apitest_phrase"));
2161  Xapian::Enquire enq(db);
2163 
2164  static const char * const queries[] = {
2165  "pad",
2166  "milk fridge",
2167  "leave milk on fridge",
2168  "ordered milk operator",
2169  "ordered phrase operator",
2170  "leave \"milk on fridge\"",
2171  "notpresent",
2172  "leave \"milk notpresent\"",
2173  };
2174  static const double multipliers[] = {
2175  -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2176  0, 0
2177  };
2178 
2179  for (auto qstr : queries) {
2180  tout.str(string());
2181  Xapian::Query query1 = qp.parse_query(qstr);
2182  tout << "query1: " << query1.get_description() << '\n';
2183  for (const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2184  double mult = *multp;
2185  if (mult < 0) {
2188  query1, mult));
2189  continue;
2190  }
2191  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, mult);
2192  tout << "query2: " << query2.get_description() << '\n';
2193 
2194  enq.set_query(query1);
2195  Xapian::MSet mset1 = enq.get_mset(0, 20);
2196  enq.set_query(query2);
2197  Xapian::MSet mset2 = enq.get_mset(0, 20);
2198 
2199  TEST_EQUAL(mset1.size(), mset2.size());
2200 
2201  Xapian::MSetIterator i1, i2;
2202  if (mult > 0) {
2203  for (i1 = mset1.begin(), i2 = mset2.begin();
2204  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2205  TEST_EQUAL_DOUBLE(i1.get_weight() * mult, i2.get_weight());
2206  TEST_EQUAL(*i1, *i2);
2207  }
2208  } else {
2209  // Weights in mset2 are 0; so it should be sorted by docid.
2210  vector<Xapian::docid> ids1;
2211  vector<Xapian::docid> ids2;
2212  for (i1 = mset1.begin(), i2 = mset2.begin();
2213  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2214  TEST_NOT_EQUAL_DOUBLE(i1.get_weight(), 0);
2215  TEST_EQUAL_DOUBLE(i2.get_weight(), 0);
2216  ids1.push_back(*i1);
2217  ids2.push_back(*i2);
2218  }
2219  sort(ids1.begin(), ids1.end());
2220  TEST_EQUAL(ids1, ids2);
2221  }
2222  }
2223  }
2224 }
2225 
2226 // Test Query::OP_SCALE_WEIGHT being used to multiply some of the weights of a
2227 // search by zero.
2228 DEFINE_TESTCASE(scaleweight2, backend) {
2229  Xapian::Database db(get_database("apitest_phrase"));
2230  Xapian::Enquire enq(db);
2232 
2233  Xapian::Query query1("fridg");
2234  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, 2.5);
2235  Xapian::Query query3("milk");
2236  Xapian::Query query4(Xapian::Query::OP_SCALE_WEIGHT, query3, 0);
2237  Xapian::Query query5(Xapian::Query::OP_OR, query2, query4);
2238 
2239  // query5 should first return the same results as query1, in the same
2240  // order, and then return the results of query3 which aren't also results
2241  // of query1, in ascending docid order. We test that this happens.
2242 
2243  // First, build a vector of docids matching the first part of the query,
2244  // and append the non-duplicate docids matching the second part of the
2245  // query.
2246  vector<Xapian::docid> ids1;
2247  set<Xapian::docid> idsin1;
2248  vector<Xapian::docid> ids3;
2249 
2250  enq.set_query(query1);
2251  Xapian::MSet mset1 = enq.get_mset(0, 20);
2252  enq.set_query(query3);
2253  Xapian::MSet mset3 = enq.get_mset(0, 20);
2254  TEST_NOT_EQUAL(mset1.size(), 0);
2255  for (i = mset1.begin(); i != mset1.end(); ++i) {
2256  ids1.push_back(*i);
2257  idsin1.insert(*i);
2258  }
2259  TEST_NOT_EQUAL(mset3.size(), 0);
2260  for (i = mset3.begin(); i != mset3.end(); ++i) {
2261  if (idsin1.find(*i) != idsin1.end())
2262  continue;
2263  ids3.push_back(*i);
2264  }
2265  sort(ids3.begin(), ids3.end());
2266  ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2267 
2268  // Now, run the combined query and build a vector of the matching docids.
2269  vector<Xapian::docid> ids5;
2270  enq.set_query(query5);
2271  Xapian::MSet mset5 = enq.get_mset(0, 20);
2272  for (i = mset5.begin(); i != mset5.end(); ++i) {
2273  ids5.push_back(*i);
2274  }
2275 
2276  TEST_EQUAL(ids1, ids5);
2277 }
2278 
2279 // Feature test for Database::get_uuid().
2280 DEFINE_TESTCASE(uuid1, backend && !multi) {
2281  SKIP_TEST_FOR_BACKEND("inmemory");
2282  Xapian::Database db = get_database("apitest_simpledata");
2283  string uuid1 = db.get_uuid();
2284  TEST_EQUAL(uuid1.size(), 36);
2285 
2286  // A database with no sub-databases has an empty UUID.
2287  Xapian::Database db2;
2288  TEST(db2.get_uuid().empty());
2289 
2290  db2.add_database(db);
2291  TEST_EQUAL(uuid1, db2.get_uuid());
2292 
2293  // Multi-database has multiple UUIDs (we don't define the format exactly
2294  // so this assumes something about the implementation).
2295  db2.add_database(db);
2296  TEST_EQUAL(uuid1 + ":" + uuid1, db2.get_uuid());
2297 
2298 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
2299  // This relies on InMemory databases not supporting uuids.
2300  // A multi-database containing a database with no uuid has no uuid.
2301  db2.add_database(Xapian::Database(string(), Xapian::DB_BACKEND_INMEMORY));
2302  TEST(db2.get_uuid().empty());
2303 #endif
2304 }
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Definition: testutils.h:78
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
Definition: constants.h:195
ExpandDecider subclass which restrict terms to a particular prefix.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
Definition: omenquire.cc:829
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: omenquire.cc:884
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
Definition: omdatabase.cc:198
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_sort_by_relevance()
Set the sorting to be by relevance only.
Definition: omenquire.cc:869
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:856
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
static void print_mset_percentages(const Xapian::MSet &mset)
Definition: api_anydb.cc:54
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: omenquire.cc:198
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:875
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Definition: eset.h:362
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:717
Match documents which an odd number of subqueries match.
Definition: query.h:107
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:862
Class representing a stemming algorithm.
Definition: stem.h:62
PositionIterator positionlist_begin(Xapian::docid did, const std::string &tname) const
An iterator pointing to the start of the position list for a given term in a given document...
Definition: omdatabase.cc:250
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
op
Query operators.
Definition: query.h:78
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails...
Definition: testsuite.h:267
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: omenquire.cc:246
static void test_emptyterm1_helper(Xapian::Database &db)
Definition: api_anydb.cc:2027
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:300
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
double get_max_attained() const
The maximum weight attained by any document.
Definition: omenquire.cc:297
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
a generic test suite engine
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
Definition: enquire.h:605
Class representing a list of search results.
Definition: mset.h:44
void skip_to(const std::string &term)
Advance the iterator to term term.
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
static Xapian::Stem stemmer
Definition: stemtest.cc:41
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
Definition: enquire.h:595
double get_weight() const
Get the weight for the current position.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:962
test functionality of the Xapian API
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: omenquire.cc:262
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:345
Class for iterating over a list of terms.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:947
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
Xapian::doccount size() const
Return number of items in this ESet object.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: omenquire.cc:276
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Class implementing a "boolean" weighting scheme.
Definition: weight.h:433
docids sort in whatever order is most efficient for the backend.
Definition: enquire.h:333
static int verbose
Definition: xapian-delve.cc:47
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
This class provides read/write access to a database.
Definition: database.h:789
DEFINE_TESTCASE(zerodocid1, backend)
Definition: api_anydb.cc:118
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
Iterator over a Xapian::MSet.
Definition: mset.h:368
Scale the weight contributed by a subquery.
Definition: query.h:166
bool operator()(const string &tname) const override
Do we want this term in the ESet?
Definition: api_anydb.cc:545
Public interfaces for the Xapian library.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: omenquire.cc:893
docids sort in ascending order (default)
Definition: enquire.h:328
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string get_dbtype()
Definition: apitest.cc:42
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
Definition: mset.h:612
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Definition: omenquire.cc:222
int percent
The percentage score for a document in an MSet.
Definition: types.h:66
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:774
static void print_mset_weights(const Xapian::MSet &mset)
Definition: api_anydb.cc:45
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Definition: queryparser.cc:162
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:531
Iterator over a Xapian::ESet.
Definition: eset.h:160
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
Definition: database.h:240
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:75
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
size_t size() const
Return number of shards in this Database object.
Definition: database.h:93
bool term_exists(const std::string &tname) const
Check if a given term exists in the database.
Definition: omdatabase.cc:524
double get_weight() const
Get the weight for the current position.
Definition: omenquire.cc:460
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
Match only documents which all subqueries match.
Definition: query.h:84
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
static Xapian::TermIterator test_termlist3_helper()
Definition: api_anydb.cc:1740
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:283
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:111
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Definition: omenquire.cc:206
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
Class representing a list of search results.
Definition: eset.h:43
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: omenquire.cc:450
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:641
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Definition: omenquire.cc:848
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:350
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
A relevance set (R-Set).
Definition: enquire.h:60
std::string get_uuid() const
Get a UUID for the database.
Definition: omdatabase.cc:776
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:269
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339
docids sort in descending order.
Definition: enquire.h:330