xapian-core  1.4.25
api_anydb.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2015,2016,2017,2020 Olly Betts
7  * Copyright 2006,2008 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #include <config.h>
27 
28 #include "api_anydb.h"
29 
30 #include <algorithm>
31 #include <string>
32 
33 #define XAPIAN_DEPRECATED(X) X
34 #include <xapian.h>
35 #include "testsuite.h"
36 #include "testutils.h"
37 
38 #include "apitest.h"
39 
40 #include <list>
41 
42 using namespace std;
43 
44 static void
46 {
47  Xapian::MSetIterator i = mset.begin();
48  for ( ; i != mset.end(); ++i) {
49  tout << " " << i.get_weight();
50  }
51 }
52 
53 static void
55 {
56  Xapian::MSetIterator i = mset.begin();
57  for ( ; i != mset.end(); ++i) {
58  tout << " " << mset.convert_to_percent(i);
59  }
60 }
61 
62 static Xapian::Query
64  const string & t1 = string(), const string & t2 = string(),
65  const string & t3 = string(), const string & t4 = string(),
66  const string & t5 = string(), const string & t6 = string(),
67  const string & t7 = string(), const string & t8 = string(),
68  const string & t9 = string(), const string & t10 = string())
69 {
70  vector<string> v;
71  Xapian::Stem stemmer("english");
72  if (!t1.empty()) v.push_back(stemmer(t1));
73  if (!t2.empty()) v.push_back(stemmer(t2));
74  if (!t3.empty()) v.push_back(stemmer(t3));
75  if (!t4.empty()) v.push_back(stemmer(t4));
76  if (!t5.empty()) v.push_back(stemmer(t5));
77  if (!t6.empty()) v.push_back(stemmer(t6));
78  if (!t7.empty()) v.push_back(stemmer(t7));
79  if (!t8.empty()) v.push_back(stemmer(t8));
80  if (!t9.empty()) v.push_back(stemmer(t9));
81  if (!t10.empty()) v.push_back(stemmer(t10));
82  return Xapian::Query(op, v.begin(), v.end());
83 }
84 
85 static Xapian::Query
87  const string & t1 = string(), const string & t2 = string(),
88  const string & t3 = string(), const string & t4 = string(),
89  const string & t5 = string(), const string & t6 = string(),
90  const string & t7 = string(), const string & t8 = string(),
91  const string & t9 = string(), const string & t10 = string())
92 {
93  vector<string> v;
94  Xapian::Stem stemmer("english");
95  if (!t1.empty()) v.push_back(stemmer(t1));
96  if (!t2.empty()) v.push_back(stemmer(t2));
97  if (!t3.empty()) v.push_back(stemmer(t3));
98  if (!t4.empty()) v.push_back(stemmer(t4));
99  if (!t5.empty()) v.push_back(stemmer(t5));
100  if (!t6.empty()) v.push_back(stemmer(t6));
101  if (!t7.empty()) v.push_back(stemmer(t7));
102  if (!t8.empty()) v.push_back(stemmer(t8));
103  if (!t9.empty()) v.push_back(stemmer(t9));
104  if (!t10.empty()) v.push_back(stemmer(t10));
105  return Xapian::Query(op, v.begin(), v.end(), parameter);
106 }
107 
108 static Xapian::Query
109 query(const string &t)
110 {
111  return Xapian::Query(Xapian::Stem("english")(t));
112 }
113 
114 // #######################################################################
115 // # Tests start here
116 
117 // tests that the backend doesn't return zero docids
118 DEFINE_TESTCASE(zerodocid1, backend) {
119  // open the database (in this case a simple text file
120  // we prepared earlier)
121 
122  Xapian::Database mydb(get_database("apitest_onedoc"));
123 
124  Xapian::Enquire enquire(mydb);
125 
126  // make a simple query, with one word in it - "word".
127  enquire.set_query(Xapian::Query("word"));
128 
129  // retrieve the top ten results (we only expect one)
130  Xapian::MSet mymset = enquire.get_mset(0, 10);
131 
132  // We've done the query, now check that the result is what
133  // we expect (1 document, with non-zero docid)
134  TEST_MSET_SIZE(mymset, 1);
135 
136  TEST_AND_EXPLAIN(*(mymset.begin()) != 0,
137  "A query on a database returned a zero docid");
138 }
139 
140 // tests that an empty query returns no matches
141 DEFINE_TESTCASE(emptyquery1, backend) {
142  Xapian::Enquire enquire(get_database("apitest_simpledata"));
143 
144  enquire.set_query(Xapian::Query());
145  Xapian::MSet mymset = enquire.get_mset(0, 10);
146  TEST_MSET_SIZE(mymset, 0);
147  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
148  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
149  TEST_EQUAL(mymset.get_matches_estimated(), 0);
153 
154  vector<Xapian::Query> v;
155  enquire.set_query(Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()));
156  mymset = enquire.get_mset(0, 10);
157  TEST_MSET_SIZE(mymset, 0);
158  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
159  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
160  TEST_EQUAL(mymset.get_matches_estimated(), 0);
164 }
165 
166 // tests the document count for a simple query
167 DEFINE_TESTCASE(simplequery1, backend) {
168  Xapian::Enquire enquire(get_database("apitest_simpledata"));
169  enquire.set_query(Xapian::Query("word"));
170  Xapian::MSet mymset = enquire.get_mset(0, 10);
171  TEST_MSET_SIZE(mymset, 2);
172 }
173 
174 // tests for the right documents and weights returned with simple query
175 DEFINE_TESTCASE(simplequery2, backend) {
176  // open the database (in this case a simple text file
177  // we prepared earlier)
178  Xapian::Database db = get_database("apitest_simpledata");
179  Xapian::Enquire enquire(db);
180  enquire.set_query(Xapian::Query("word"));
181 
182  // retrieve the top results
183  Xapian::MSet mymset = enquire.get_mset(0, 10);
184 
185  // We've done the query, now check that the result is what
186  // we expect (documents 2 and 4)
187  mset_expect_order(mymset, 2, 4);
188 
189  // Check the weights
190  Xapian::MSetIterator i = mymset.begin();
191  // These weights are for BM25Weight(1,0,1,0.5,0.5)
192  TEST_EQUAL_DOUBLE(i.get_weight(), 1.04648168717725);
193  i++;
194  TEST_EQUAL_DOUBLE(i.get_weight(), 0.640987686595914);
195 }
196 
197 // tests for the right document count for another simple query
198 DEFINE_TESTCASE(simplequery3, backend) {
199  Xapian::Enquire enquire(get_database("apitest_simpledata"));
200  enquire.set_query(query("this"));
201  Xapian::MSet mymset = enquire.get_mset(0, 10);
202 
203  // Check that 6 documents were returned.
204  TEST_MSET_SIZE(mymset, 6);
205 }
206 
207 // test that a multidb with 3 dbs query returns correct docids
208 DEFINE_TESTCASE(multidb2, backend && !multi) {
209  Xapian::Database mydb2(get_database("apitest_simpledata"));
210  mydb2.add_database(get_database("apitest_simpledata2"));
211  mydb2.add_database(get_database("apitest_termorder"));
212  Xapian::Enquire enquire(mydb2);
213 
214  // make a query
215  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
217  enquire.set_query(myquery);
218 
219  // retrieve the top ten results
220  Xapian::MSet mymset = enquire.get_mset(0, 10);
221  mset_expect_order(mymset, 2, 3, 4, 10);
222 }
223 
224 // tests that when specifying maxitems to get_mset, no more than
225 // that are returned.
226 DEFINE_TESTCASE(msetmaxitems1, backend) {
227  Xapian::Enquire enquire(get_database("apitest_simpledata"));
228  enquire.set_query(query("this"));
229  Xapian::MSet mymset = enquire.get_mset(0, 1);
230  TEST_MSET_SIZE(mymset, 1);
231 
232  mymset = enquire.get_mset(0, 5);
233  TEST_MSET_SIZE(mymset, 5);
234 }
235 
236 // tests the returned weights are as expected (regression test for remote
237 // backend which was using the average weight rather than the actual document
238 // weight for computing weights - fixed in 1.0.0).
239 DEFINE_TESTCASE(expandweights1, backend) {
240  Xapian::Enquire enquire(get_database("apitest_simpledata"));
241  enquire.set_query(Xapian::Query("this"));
242 
243  Xapian::MSet mymset = enquire.get_mset(0, 10);
244 
245  Xapian::RSet myrset;
246  Xapian::MSetIterator i = mymset.begin();
247  myrset.add_document(*i);
248  myrset.add_document(*(++i));
249 
250  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
251  TEST_EQUAL(eset.size(), 3);
252  TEST_REL(eset.get_ebound(), >=, eset.size());
253  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
254  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
255  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
256 
257  // Test non-default k too.
258  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ, 2.0);
259  TEST_EQUAL(eset.size(), 3);
260  TEST_REL(eset.get_ebound(), >=, eset.size());
261  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 5.88109547674955);
262  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 5.88109547674955);
263  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.44473599216144);
264 }
265 
266 // Just like test_expandweights1 but without USE_EXACT_TERMFREQ.
267 DEFINE_TESTCASE(expandweights2, backend) {
268  Xapian::Enquire enquire(get_database("apitest_simpledata"));
269  enquire.set_query(Xapian::Query("this"));
270 
271  Xapian::MSet mymset = enquire.get_mset(0, 10);
272 
273  Xapian::RSet myrset;
274  Xapian::MSetIterator i = mymset.begin();
275  myrset.add_document(*i);
276  myrset.add_document(*(++i));
277 
278  Xapian::ESet eset = enquire.get_eset(3, myrset);
279  TEST_EQUAL(eset.size(), 3);
280  TEST_REL(eset.get_ebound(), >=, eset.size());
281  // With a multi backend, the top three terms all happen to occur in both
282  // shard so their termfreq is exactly known even without
283  // USE_EXACT_TERMFREQ and so the weights should be the same for all
284  // test harness backends.
285  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
286  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
287  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
288 }
289 
290 DEFINE_TESTCASE(expandweights3, backend) {
291  Xapian::Enquire enquire(get_database("apitest_simpledata"));
292  enquire.set_query(Xapian::Query("this"));
293 
294  Xapian::MSet mymset = enquire.get_mset(0, 10);
295 
296  Xapian::RSet myrset;
297  Xapian::MSetIterator i = mymset.begin();
298  myrset.add_document(*i);
299  myrset.add_document(*(++i));
300 
301  // Set min_wt to 6.0
302  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 0, 6.0);
303  TEST_EQUAL(eset.size(), 2);
304  TEST_REL(eset.get_ebound(), >=, eset.size());
305  // With a multi backend, the top two terms all happen to occur in both
306  // shard so their termfreq is exactly known even without
307  // USE_EXACT_TERMFREQ and so the weights should be the same for all
308  // test harness backends.
309  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
310  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
311 }
312 
313 // tests that negative weights are returned
314 DEFINE_TESTCASE(expandweights4, backend) {
315  Xapian::Enquire enquire(get_database("apitest_simpledata"));
316  enquire.set_query(Xapian::Query("paragraph"));
317 
318  Xapian::MSet mymset = enquire.get_mset(0, 10);
319 
320  Xapian::RSet myrset;
321  Xapian::MSetIterator i = mymset.begin();
322  myrset.add_document(*i);
323  myrset.add_document(*(++i));
324 
325  Xapian::ESet eset = enquire.get_eset(37, myrset, 0, 0, -100);
326  // Now include negative weights
327  TEST_EQUAL(eset.size(), 37);
328  TEST_REL(eset.get_ebound(), >=, eset.size());
329  TEST_REL(eset[36].get_weight(), <, 0);
330  TEST_REL(eset[36].get_weight(), >=, -100);
331 }
332 
333 // test for Bo1EWeight
334 DEFINE_TESTCASE(expandweights5, backend) {
335  Xapian::Enquire enquire(get_database("apitest_simpledata"));
336  enquire.set_query(Xapian::Query("this"));
337 
338  Xapian::MSet mymset = enquire.get_mset(0, 10);
339 
340  Xapian::RSet myrset;
341  Xapian::MSetIterator i = mymset.begin();
342  myrset.add_document(*i);
343  myrset.add_document(*(++i));
344 
345  enquire.set_expansion_scheme("bo1");
346  Xapian::ESet eset = enquire.get_eset(3, myrset);
347 
348  TEST_EQUAL(eset.size(), 3);
349  TEST_REL(eset.get_ebound(), >=, eset.size());
350  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 7.21765284821702);
351  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.661623193760022);
352  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.58090119783738);
353 }
354 
355 // test that "trad" can be set as an expansion scheme.
356 DEFINE_TESTCASE(expandweights6, backend) {
357  Xapian::Enquire enquire(get_database("apitest_simpledata"));
358  enquire.set_query(Xapian::Query("this"));
359 
360  Xapian::MSet mymset = enquire.get_mset(0, 10);
361 
362  Xapian::RSet myrset;
363  Xapian::MSetIterator i = mymset.begin();
364  myrset.add_document(*i);
365  myrset.add_document(*(++i));
366 
367  enquire.set_expansion_scheme("trad");
368  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
369 
370  TEST_EQUAL(eset.size(), 3);
371  TEST_REL(eset.get_ebound(), >=, eset.size());
372  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
373  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
374  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
375 }
376 
377 // test that invalid scheme names are not accepted
378 DEFINE_TESTCASE(expandweights7, backend) {
379  Xapian::Enquire enquire(get_database("apitest_simpledata"));
380 
382  enquire.set_expansion_scheme("no_such_scheme"));
383 }
384 
385 // test that "expand_k" can be passed as a parameter to get_eset
386 DEFINE_TESTCASE(expandweights8, backend) {
387  Xapian::Enquire enquire(get_database("apitest_simpledata"));
388  enquire.set_query(Xapian::Query("this"));
389 
390  Xapian::MSet mymset = enquire.get_mset(0, 10);
391 
392  Xapian::RSet myrset;
393  Xapian::MSetIterator i = mymset.begin();
394  myrset.add_document(*i);
395  myrset.add_document(*(++i));
396 
397  // Set expand_k to 1.0 and min_wt to 0
398  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 1.0, 0, 0);
399  // With a multi backend, the top three terms all happen to occur in both
400  // shard so their termfreq is exactly known even without
401  // USE_EXACT_TERMFREQ and so the weights should be the same for all
402  // test harness backends.
403  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
404  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
405  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
406  TEST_REL(eset.back().get_weight(),>=,0);
407 }
408 
409 // tests that when specifying maxitems to get_eset, no more than
410 // that are returned.
411 DEFINE_TESTCASE(expandmaxitems1, backend) {
412  Xapian::Enquire enquire(get_database("apitest_simpledata"));
413  enquire.set_query(Xapian::Query("this"));
414 
415  Xapian::MSet mymset = enquire.get_mset(0, 10);
416  tout << "mymset.size() = " << mymset.size() << '\n';
417  TEST(mymset.size() >= 2);
418 
419  Xapian::RSet myrset;
420  Xapian::MSetIterator i = mymset.begin();
421  myrset.add_document(*i);
422  myrset.add_document(*(++i));
423 
424  Xapian::ESet myeset = enquire.get_eset(1, myrset);
425  TEST_EQUAL(myeset.size(), 1);
426  TEST_REL(myeset.get_ebound(), >=, myeset.size());
427 }
428 
429 // tests that a pure boolean query has all weights set to 0
430 DEFINE_TESTCASE(boolquery1, backend) {
431  Xapian::Query myboolquery(query("this"));
432 
433  // open the database (in this case a simple text file
434  // we prepared earlier)
435  Xapian::Enquire enquire(get_database("apitest_simpledata"));
436  enquire.set_query(myboolquery);
438 
439  // retrieve the top results
440  Xapian::MSet mymset = enquire.get_mset(0, 10);
441 
442  TEST_NOT_EQUAL(mymset.size(), 0);
443  TEST_EQUAL(mymset.get_max_possible(), 0);
444  for (Xapian::MSetIterator i = mymset.begin(); i != mymset.end(); ++i) {
445  TEST_EQUAL(i.get_weight(), 0);
446  }
447 }
448 
449 // tests that get_mset() specifying "this" works as expected
450 DEFINE_TESTCASE(msetfirst1, backend) {
451  Xapian::Enquire enquire(get_database("apitest_simpledata"));
452  enquire.set_query(query("this"));
453  Xapian::MSet mymset1 = enquire.get_mset(0, 6);
454  Xapian::MSet mymset2 = enquire.get_mset(3, 3);
455  TEST(mset_range_is_same(mymset1, 3, mymset2, 0, 3));
456 
457  // Regression test - we weren't adjusting the index into items[] by
458  // firstitem in api/omenquire.cc.
459  TEST_EQUAL(mymset1[5].get_document().get_data(),
460  mymset2[2].get_document().get_data());
461 }
462 
463 // tests the converting-to-percent functions
464 DEFINE_TESTCASE(topercent1, backend) {
465  Xapian::Enquire enquire(get_database("apitest_simpledata"));
466  enquire.set_query(query("this"));
467  Xapian::MSet mymset = enquire.get_mset(0, 20);
468 
469  int last_pct = 100;
470  Xapian::MSetIterator i = mymset.begin();
471  for ( ; i != mymset.end(); ++i) {
472  int pct = mymset.convert_to_percent(i);
473  TEST_AND_EXPLAIN(pct == i.get_percent(),
474  "convert_to_%(msetitor) != convert_to_%(wt)");
476  "convert_to_%(msetitor) != convert_to_%(wt)");
477  TEST_AND_EXPLAIN(pct >= 0 && pct <= 100,
478  "percentage out of range: " << pct);
479  TEST_AND_EXPLAIN(pct <= last_pct, "percentage increased down mset");
480  last_pct = pct;
481  }
482 }
483 
484 // tests the percentage values returned
485 DEFINE_TESTCASE(topercent2, backend) {
486  Xapian::Enquire enquire(get_database("apitest_simpledata"));
487 
488  int pct;
489 
490  // First, test a search in which the top document scores 100%.
491  enquire.set_query(query("this"));
492  Xapian::MSet mymset = enquire.get_mset(0, 20);
493 
494  Xapian::MSetIterator i = mymset.begin();
495  TEST(i != mymset.end());
496  pct = mymset.convert_to_percent(i);
497  TEST_EQUAL(pct, 100);
498 
499  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
500  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
501  TEST_EQUAL(mymset.get_matches_estimated(), 6);
502  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 0.0553904060041786);
503  TEST_EQUAL(mymset.size(), 6);
504  mset_expect_order(mymset, 2, 1, 3, 5, 6, 4);
505 
506  // A search in which the top document doesn't have 100%
508  "this", "line", "paragraph", "rubbish");
509  enquire.set_query(q);
510  mymset = enquire.get_mset(0, 20);
511 
512  i = mymset.begin();
513  TEST(i != mymset.end());
514  pct = mymset.convert_to_percent(i);
515  TEST_REL(pct,>,60);
516  TEST_REL(pct,<,76);
517 
518  ++i;
519 
520  TEST(i != mymset.end());
521  pct = mymset.convert_to_percent(i);
522  TEST_REL(pct,>,40);
523  TEST_REL(pct,<,50);
524 
525  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
526  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
527  TEST_EQUAL(mymset.get_matches_estimated(), 6);
528  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 1.67412192414056);
529  TEST_EQUAL(mymset.size(), 6);
530  mset_expect_order(mymset, 3, 1, 4, 2, 5, 6);
531 }
532 
534  public:
535  bool operator()(const string & tname) const {
536  unsigned long sum = 0;
537  for (unsigned ch : tname) {
538  sum += ch;
539  }
540 // if (verbose) {
541 // tout << tname << "==> " << sum << "\n";
542 // }
543  return (sum % 2) == 0;
544  }
545 };
546 
547 // tests the expand decision functor
548 DEFINE_TESTCASE(expandfunctor1, backend) {
549  Xapian::Enquire enquire(get_database("apitest_simpledata"));
550  enquire.set_query(Xapian::Query("this"));
551 
552  Xapian::MSet mymset = enquire.get_mset(0, 10);
553  TEST(mymset.size() >= 2);
554 
555  Xapian::RSet myrset;
556  Xapian::MSetIterator i = mymset.begin();
557  myrset.add_document(*i);
558  myrset.add_document(*(++i));
559 
560  EvenParityExpandFunctor myfunctor;
561 
562  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
563  unsigned int neweset_size = 0;
564  Xapian::ESetIterator j = myeset_orig.begin();
565  for ( ; j != myeset_orig.end(); ++j) {
566  if (myfunctor(*j)) neweset_size++;
567  }
568  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
569 
570 #if 0
571  // Compare myeset with the hand-filtered version of myeset_orig.
572  if (verbose) {
573  tout << "orig_eset: ";
574  copy(myeset_orig.begin(), myeset_orig.end(),
575  ostream_iterator<Xapian::ESetItem>(tout, " "));
576  tout << "\n";
577 
578  tout << "new_eset: ";
579  copy(myeset.begin(), myeset.end(),
580  ostream_iterator<Xapian::ESetItem>(tout, " "));
581  tout << "\n";
582  }
583 #endif
584  Xapian::ESetIterator orig = myeset_orig.begin();
585  Xapian::ESetIterator filt = myeset.begin();
586  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
587  // skip over items that shouldn't be in myeset
588  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
589  ++orig;
590  }
591 
592  TEST_AND_EXPLAIN(*orig == *filt &&
593  orig.get_weight() == filt.get_weight(),
594  "Mismatch in items " << *orig << " vs. " << *filt
595  << " after filtering");
596  }
597 
598  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
599  ++orig;
600  }
601 
602  TEST_EQUAL(orig, myeset_orig.end());
603  TEST_AND_EXPLAIN(filt == myeset.end(),
604  "Extra items in the filtered eset.");
605 }
606 
607 DEFINE_TESTCASE(expanddeciderfilterprefix2, backend) {
608  Xapian::Enquire enquire(get_database("apitest_simpledata"));
609  enquire.set_query(Xapian::Query("this"));
610 
611  Xapian::MSet mymset = enquire.get_mset(0, 10);
612  TEST(mymset.size() >= 2);
613 
614  Xapian::RSet myrset;
615  Xapian::MSetIterator i = mymset.begin();
616  myrset.add_document(*i);
617  myrset.add_document(*(++i));
618 
619  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
620  unsigned int neweset_size = 0;
621 
622  // Choose the first char in the first term as prefix.
623  Xapian::ESetIterator j = myeset_orig.begin();
624  TEST(myeset_orig.size() >= 1);
625  string prefix(*j, 0, 1);
626  Xapian::ExpandDeciderFilterPrefix myfunctor(prefix);
627 
628  for ( ; j != myeset_orig.end(); ++j) {
629  if (myfunctor(*j)) neweset_size++;
630  }
631  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
632 
633  Xapian::ESetIterator orig = myeset_orig.begin();
634  Xapian::ESetIterator filt = myeset.begin();
635  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
636  // skip over items that shouldn't be in myeset
637  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
638  ++orig;
639  }
640 
641  TEST_AND_EXPLAIN(*orig == *filt &&
642  orig.get_weight() == filt.get_weight(),
643  "Mismatch in items " << *orig << " vs. " << *filt
644  << " after filtering");
645  }
646 
647  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
648  ++orig;
649  }
650 
651  TEST_EQUAL(orig, myeset_orig.end());
652  TEST_AND_EXPLAIN(filt == myeset.end(),
653  "Extra items in the filtered eset.");
654 }
655 
656 // tests the percent cutoff option
657 DEFINE_TESTCASE(pctcutoff1, backend) {
658  Xapian::Enquire enquire(get_database("apitest_simpledata"));
660  "this", "line", "paragraph", "rubbish"));
661  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
662 
663  if (verbose) {
664  tout << "Original mset pcts:";
665  print_mset_percentages(mymset1);
666  tout << "\n";
667  }
668 
669  unsigned int num_items = 0;
670  int my_pct = 100;
671  int changes = 0;
672  Xapian::MSetIterator i = mymset1.begin();
673  int c = 0;
674  for ( ; i != mymset1.end(); ++i, ++c) {
675  int new_pct = mymset1.convert_to_percent(i);
676  if (new_pct != my_pct) {
677  changes++;
678  if (changes > 3) break;
679  num_items = c;
680  my_pct = new_pct;
681  }
682  }
683 
684  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
685  if (verbose) {
686  tout << "Cutoff percent: " << my_pct << "\n";
687  }
688 
689  enquire.set_cutoff(my_pct);
690  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
691 
692  if (verbose) {
693  tout << "Percentages after cutoff:";
694  print_mset_percentages(mymset2);
695  tout << "\n";
696  }
697 
698  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
699  "Match with % cutoff lost too many items");
700 
701  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
702  (mymset2.convert_to_percent(mymset2[num_items]) == my_pct &&
703  mymset2.convert_to_percent(mymset2.back()) == my_pct),
704  "Match with % cutoff returned too many items");
705 }
706 
707 // Tests the percent cutoff option combined with collapsing
708 DEFINE_TESTCASE(pctcutoff2, backend) {
709  Xapian::Enquire enquire(get_database("apitest_simpledata"));
711  Xapian::MSet mset = enquire.get_mset(0, 100);
712 
713  if (verbose) {
714  tout << "Original mset pcts:";
716  tout << "\n";
717  }
718 
719  TEST(mset.size() >= 2);
720  TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
721 
722  int cutoff = mset[0].get_percent() + mset[1].get_percent();
723  cutoff /= 2;
724 
725  enquire.set_cutoff(cutoff);
726  enquire.set_collapse_key(1234); // Value which is always empty.
727 
728  Xapian::MSet mset2 = enquire.get_mset(0, 1);
729  TEST_EQUAL(mset2.size(), 1);
730  TEST_REL(mset2.get_matches_lower_bound(),>=,1);
732  mset2.get_matches_lower_bound());
737 }
738 
739 // Test that the percent cutoff option returns all the answers it should.
740 DEFINE_TESTCASE(pctcutoff3, backend) {
741  Xapian::Enquire enquire(get_database("apitest_simpledata"));
742  enquire.set_query(Xapian::Query("this"));
743  Xapian::MSet mset1 = enquire.get_mset(0, 10);
744 
745  if (verbose) {
746  tout << "Original mset pcts:";
747  print_mset_percentages(mset1);
748  tout << "\n";
749  }
750 
751  int percent = 100;
752  for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
753  int new_percent = mset1.convert_to_percent(i);
754  if (new_percent != percent) {
755  tout.str(string());
756  tout << "Testing " << percent << "% cutoff\n";
757  enquire.set_cutoff(percent);
758  Xapian::MSet mset2 = enquire.get_mset(0, 10);
759  TEST_EQUAL(mset2.back().get_percent(), percent);
760  TEST_EQUAL(mset2.size(), i.get_rank());
761  percent = new_percent;
762  }
763  }
764 }
765 
766 // tests the cutoff option
767 DEFINE_TESTCASE(cutoff1, backend) {
768  Xapian::Enquire enquire(get_database("apitest_simpledata"));
770  "this", "line", "paragraph", "rubbish"));
771  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
772 
773  if (verbose) {
774  tout << "Original mset weights:";
775  print_mset_weights(mymset1);
776  tout << "\n";
777  }
778 
779  unsigned int num_items = 0;
780  double my_wt = -100;
781  int changes = 0;
782  Xapian::MSetIterator i = mymset1.begin();
783  int c = 0;
784  for ( ; i != mymset1.end(); ++i, ++c) {
785  double new_wt = i.get_weight();
786  if (new_wt != my_wt) {
787  changes++;
788  if (changes > 3) break;
789  num_items = c;
790  my_wt = new_wt;
791  }
792  }
793 
794  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
795  if (verbose) {
796  tout << "Cutoff weight: " << my_wt << "\n";
797  }
798 
799  enquire.set_cutoff(0, my_wt);
800  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
801 
802  if (verbose) {
803  tout << "Weights after cutoff:";
804  print_mset_weights(mymset2);
805  tout << "\n";
806  }
807 
808  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
809  "Match with cutoff lost too many items");
810 
811  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
812  (mymset2[num_items].get_weight() == my_wt &&
813  mymset2.back().get_weight() == my_wt),
814  "Match with cutoff returned too many items");
815 }
816 
817 // tests the allow query terms expand option
818 DEFINE_TESTCASE(allowqterms1, backend) {
819  Xapian::Enquire enquire(get_database("apitest_simpledata"));
820  string term = "paragraph";
821  enquire.set_query(Xapian::Query(term));
822 
823  Xapian::MSet mymset = enquire.get_mset(0, 10);
824  TEST(mymset.size() >= 2);
825 
826  Xapian::RSet myrset;
827  Xapian::MSetIterator i = mymset.begin();
828  myrset.add_document(*i);
829  myrset.add_document(*(++i));
830 
831  Xapian::ESet myeset = enquire.get_eset(1000, myrset);
832  Xapian::ESetIterator j = myeset.begin();
833  for ( ; j != myeset.end(); ++j) {
834  TEST_NOT_EQUAL(*j, term);
835  }
836 
837  Xapian::ESet myeset2 = enquire.get_eset(1000, myrset, Xapian::Enquire::INCLUDE_QUERY_TERMS);
838  j = myeset2.begin();
839  for ( ; j != myeset2.end(); ++j) {
840  if (*j == term) break;
841  }
842  TEST(j != myeset2.end());
843 }
844 
845 // tests that the MSet max_attained works
846 DEFINE_TESTCASE(maxattain1, backend) {
847  Xapian::Enquire enquire(get_database("apitest_simpledata"));
848  enquire.set_query(query("this"));
849  Xapian::MSet mymset = enquire.get_mset(0, 100);
850 
851  double mymax = 0;
852  Xapian::MSetIterator i = mymset.begin();
853  for ( ; i != mymset.end(); ++i) {
854  if (i.get_weight() > mymax) mymax = i.get_weight();
855  }
856  TEST_EQUAL(mymax, mymset.get_max_attained());
857 }
858 
859 // tests a reversed boolean query
860 DEFINE_TESTCASE(reversebool1, backend) {
861  Xapian::Enquire enquire(get_database("apitest_simpledata"));
862  enquire.set_query(Xapian::Query("this"));
864 
865  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
866  TEST_AND_EXPLAIN(mymset1.size() > 1,
867  "Mset was too small to test properly");
868 
870  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
872  Xapian::MSet mymset3 = enquire.get_mset(0, 100);
873 
874  // mymset1 and mymset2 should be identical
875  TEST_EQUAL(mymset1.size(), mymset2.size());
876 
877  {
878  Xapian::MSetIterator i = mymset1.begin();
879  Xapian::MSetIterator j = mymset2.begin();
880  for ( ; i != mymset1.end(); ++i, j++) {
881  TEST(j != mymset2.end());
882  // if this fails, then setting match_sort_forward=true was not
883  // the same as the default.
884  TEST_EQUAL(*i, *j);
885  }
886  TEST(j == mymset2.end());
887  }
888 
889  // mymset1 and mymset3 should be same but reversed
890  TEST_EQUAL(mymset1.size(), mymset3.size());
891 
892  {
893  Xapian::MSetIterator i = mymset1.begin();
894  Xapian::MSetIterator j = mymset3.end();
895  for ( ; i != mymset1.end(); ++i) {
896  --j;
897  // if this fails, then setting match_sort_forward=false didn't
898  // reverse the results.
899  TEST_EQUAL(*i, *j);
900  }
901  }
902 }
903 
904 // tests a reversed boolean query, where the full mset isn't returned
905 DEFINE_TESTCASE(reversebool2, backend) {
906  Xapian::Enquire enquire(get_database("apitest_simpledata"));
907  enquire.set_query(Xapian::Query("this"));
909 
910  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
911 
912  TEST_AND_EXPLAIN(mymset1.size() > 1,
913  "Mset was too small to test properly");
914 
916  Xapian::doccount msize = mymset1.size() / 2;
917  Xapian::MSet mymset2 = enquire.get_mset(0, msize);
919  Xapian::MSet mymset3 = enquire.get_mset(0, msize);
920 
921  // mymset2 should be first msize items of mymset1
922  TEST_EQUAL(msize, mymset2.size());
923  {
924  Xapian::MSetIterator i = mymset1.begin();
925  Xapian::MSetIterator j = mymset2.begin();
926  for ( ; j != mymset2.end(); ++i, ++j) {
927  TEST(i != mymset1.end());
928  // if this fails, then setting match_sort_forward=true was not
929  // the same as the default.
930  TEST_EQUAL(*i, *j);
931  }
932  // mymset1 should be larger.
933  TEST(i != mymset1.end());
934  }
935 
936  // mymset3 should be last msize items of mymset1, in reverse order
937  TEST_EQUAL(msize, mymset3.size());
938  {
939  Xapian::MSetIterator i = mymset1.end();
941  for (j = mymset3.begin(); j != mymset3.end(); ++j) {
942  // if this fails, then setting match_sort_forward=false didn't
943  // reverse the results.
944  --i;
945  TEST_EQUAL(*i, *j);
946  }
947  }
948 }
949 
950 // tests that get_matching_terms() returns the terms in the right order
951 DEFINE_TESTCASE(getmterms1, backend) {
952  list<string> answers_list;
953  answers_list.push_back("one");
954  answers_list.push_back("two");
955  answers_list.push_back("three");
956  answers_list.push_back("four");
957 
958  Xapian::Database mydb(get_database("apitest_termorder"));
959  Xapian::Enquire enquire(mydb);
960 
963  Xapian::Query("one", 1, 1),
964  Xapian::Query("three", 1, 3)),
966  Xapian::Query("four", 1, 4),
967  Xapian::Query("two", 1, 2)));
968 
969  enquire.set_query(myquery);
970 
971  Xapian::MSet mymset = enquire.get_mset(0, 10);
972 
973  TEST_MSET_SIZE(mymset, 1);
974  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
975  enquire.get_matching_terms_end(mymset.begin()));
976  TEST(list == answers_list);
977 }
978 
979 // tests that get_matching_terms() returns the terms only once
980 DEFINE_TESTCASE(getmterms2, backend) {
981  list<string> answers_list;
982  answers_list.push_back("one");
983  answers_list.push_back("two");
984  answers_list.push_back("three");
985 
986  Xapian::Database mydb(get_database("apitest_termorder"));
987  Xapian::Enquire enquire(mydb);
988 
991  Xapian::Query("one", 1, 1),
992  Xapian::Query("three", 1, 3)),
994  Xapian::Query("one", 1, 4),
995  Xapian::Query("two", 1, 2)));
996 
997  enquire.set_query(myquery);
998 
999  Xapian::MSet mymset = enquire.get_mset(0, 10);
1000 
1001  TEST_MSET_SIZE(mymset, 1);
1002  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1003  enquire.get_matching_terms_end(mymset.begin()));
1004  TEST(list == answers_list);
1005 }
1006 
1007 // test that running a query twice returns the same results
1008 DEFINE_TESTCASE(repeatquery1, backend) {
1009  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1010  enquire.set_query(Xapian::Query("this"));
1011 
1012  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1013 
1014  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1015  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1016  TEST_EQUAL(mymset1, mymset2);
1017 }
1018 
1019 // test that prefetching documents works (at least, gives same results)
1020 DEFINE_TESTCASE(fetchdocs1, backend) {
1021  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1022  enquire.set_query(Xapian::Query("this"));
1023 
1024  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1025 
1026  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1027  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1028  TEST_EQUAL(mymset1, mymset2);
1029  mymset2.fetch(mymset2[0], mymset2[mymset2.size() - 1]);
1030  mymset2.fetch(mymset2.begin(), mymset2.end());
1031  mymset2.fetch(mymset2.begin());
1032  mymset2.fetch();
1033 
1034  Xapian::MSetIterator it1 = mymset1.begin();
1035  Xapian::MSetIterator it2 = mymset2.begin();
1036 
1037  while (it1 != mymset1.end() && it2 != mymset2.end()) {
1039  it2.get_document().get_data());
1040  TEST_NOT_EQUAL(it1.get_document().get_data(), "");
1041  TEST_NOT_EQUAL(it2.get_document().get_data(), "");
1042  it1++;
1043  it2++;
1044  }
1045  TEST_EQUAL(it1, mymset1.end());
1046  TEST_EQUAL(it1, mymset2.end());
1047 }
1048 
1049 // test that searching for a term not in the database fails nicely
1050 DEFINE_TESTCASE(absentterm1, backend) {
1051  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1053  enquire.set_query(Xapian::Query("frink"));
1054 
1055  Xapian::MSet mymset = enquire.get_mset(0, 10);
1056  mset_expect_order(mymset);
1057 }
1058 
1059 // as absentterm1, but setting query from a vector of terms
1060 DEFINE_TESTCASE(absentterm2, backend) {
1061  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1062  vector<string> terms;
1063  terms.push_back("frink");
1064 
1065  Xapian::Query query(Xapian::Query::OP_OR, terms.begin(), terms.end());
1066  enquire.set_query(query);
1067 
1068  Xapian::MSet mymset = enquire.get_mset(0, 10);
1069  mset_expect_order(mymset);
1070 }
1071 
1072 // test that rsets do sensible things
1073 DEFINE_TESTCASE(rset1, backend) {
1074  Xapian::Database mydb(get_database("apitest_rset"));
1075  Xapian::Enquire enquire(mydb);
1076  Xapian::Query myquery = query(Xapian::Query::OP_OR, "giraffe", "tiger");
1077  enquire.set_query(myquery);
1078 
1079  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1080 
1081  Xapian::RSet myrset;
1082  myrset.add_document(1);
1083 
1084  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1085 
1086  // We should have the same documents turn up, but 1 and 3 should
1087  // have higher weights with the RSet.
1088  TEST_MSET_SIZE(mymset1, 3);
1089  TEST_MSET_SIZE(mymset2, 3);
1090 }
1091 
1092 // test that rsets do more sensible things
1093 DEFINE_TESTCASE(rset2, backend) {
1094  Xapian::Database mydb(get_database("apitest_rset"));
1095  Xapian::Enquire enquire(mydb);
1096  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
1097  enquire.set_query(myquery);
1098 
1099  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1100 
1101  Xapian::RSet myrset;
1102  myrset.add_document(2);
1103 
1104  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1105 
1106  mset_expect_order(mymset1, 1, 2);
1107  mset_expect_order(mymset2, 2, 1);
1108 }
1109 
1110 // test that rsets behave correctly with multiDBs
1111 DEFINE_TESTCASE(rsetmultidb1, backend && !multi) {
1112  Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
1113  Xapian::Database mydb2(get_database("apitest_rset"));
1114  mydb2.add_database(get_database("apitest_simpledata2"));
1115 
1116  Xapian::Enquire enquire1(mydb1);
1117  Xapian::Enquire enquire2(mydb2);
1118 
1119  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "multiple");
1120 
1121  enquire1.set_query(myquery);
1122  enquire2.set_query(myquery);
1123 
1124  Xapian::RSet myrset1;
1125  Xapian::RSet myrset2;
1126  myrset1.add_document(4);
1127  myrset2.add_document(2);
1128 
1129  Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
1130  Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
1131  Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
1132  Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
1133 
1134  mset_expect_order(mymset1a, 1, 4);
1135  mset_expect_order(mymset1b, 4, 1);
1136  mset_expect_order(mymset2a, 1, 2);
1137  mset_expect_order(mymset2b, 2, 1);
1138 
1139  TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
1140  TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
1141  TEST_NOT_EQUAL(mymset1a, mymset1b);
1142  TEST_NOT_EQUAL(mymset2a, mymset2b);
1143 }
1144 
1145 // regression tests - used to cause assertion in stats.h to fail
1146 // Doesn't actually fail for multi but it doesn't make sense to run there.
1147 DEFINE_TESTCASE(rsetmultidb3, backend && !multi) {
1148  Xapian::Enquire enquire(get_database("apitest_simpledata2"));
1149  enquire.set_query(query(Xapian::Query::OP_OR, "cuddly", "people"));
1150  Xapian::MSet mset = enquire.get_mset(0, 10); // used to fail assertion
1151 }
1152 
1154 DEFINE_TESTCASE(eliteset1, backend && !multi) {
1155  Xapian::Database mydb(get_database("apitest_simpledata"));
1156  Xapian::Enquire enquire(mydb);
1157 
1158  Xapian::Query myquery1 = query(Xapian::Query::OP_OR, "word");
1159 
1161  "simple", "word");
1162 
1163  enquire.set_query(myquery1, 2); // So the query lengths are the same.
1164  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1165 
1166  enquire.set_query(myquery2);
1167  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1168 
1169  TEST_EQUAL(mymset1, mymset2);
1170 }
1171 
1173 DEFINE_TESTCASE(elitesetmulti1, multi) {
1174  Xapian::Database mydb(get_database("apitest_simpledata"));
1175  Xapian::Enquire enquire(mydb);
1176 
1178  "simple", "word");
1179 
1180  enquire.set_query(myquery2);
1181  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1182 
1183  // For a sharded database, the elite set is resolved per shard and can
1184  // select different terms because the max term weights vary with the
1185  // per-shard term statistics. I can't see a feasible way to create
1186  // an equivalent MSet to compare with so for now at least we hard-code
1187  // the expected values.
1188  TEST_EQUAL(mymset2.size(), 3);
1189  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1190  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1191  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1192  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.1736756775723788948);
1193  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.0464816871772451012);
1194  mset_expect_order(mymset2, 2, 4, 5);
1195  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.0464816871772451012);
1196  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.64098768659591376373);
1197  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.46338869498075929698);
1198 }
1199 
1202 DEFINE_TESTCASE(eliteset2, backend && !multi) {
1203  Xapian::Database mydb(get_database("apitest_simpledata"));
1204  Xapian::Enquire enquire(mydb);
1205 
1206  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1207 
1208  vector<Xapian::Query> qs;
1209  qs.push_back(query("this"));
1210  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1212  qs.begin(), qs.end(), 1);
1213 
1214  enquire.set_query(myquery1);
1215  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1216 
1217  enquire.set_query(myquery2);
1218  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1219 
1220  TEST_EQUAL(mymset1, mymset2);
1221 }
1222 
1224 DEFINE_TESTCASE(elitesetmulti2, multi) {
1225  Xapian::Database mydb(get_database("apitest_simpledata"));
1226  Xapian::Enquire enquire(mydb);
1227 
1228  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1229 
1230  vector<Xapian::Query> qs;
1231  qs.push_back(query("this"));
1232  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1234  qs.begin(), qs.end(), 1);
1235 
1236  enquire.set_query(myquery2);
1237  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1238 
1239  // For a sharded database, the elite set is resolved per shard and can
1240  // select different terms because the max term weights vary with the
1241  // per-shard term statistics. I can't see a feasible way to create
1242  // an equivalent MSet to compare with so for now at least we hard-code
1243  // the expected values.
1244  TEST_EQUAL(mymset2.size(), 4);
1245  TEST_EQUAL(mymset2.get_matches_lower_bound(), 4);
1246  TEST_EQUAL(mymset2.get_matches_estimated(), 4);
1247  TEST_EQUAL(mymset2.get_matches_upper_bound(), 4);
1248  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 2.6585705165783908299);
1249  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.9700834242150864206);
1250  mset_expect_order(mymset2, 2, 1, 3, 5);
1251  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.9700834242150864206);
1252  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.051103097360122341775);
1253  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.043131803408968119595);
1254  TEST_EQUAL_DOUBLE(mymset2[3].get_weight(), 0.043131803408968119595);
1255 }
1256 
1257 
1260 DEFINE_TESTCASE(eliteset3, backend) {
1261  Xapian::Database mydb1(get_database("apitest_simpledata"));
1262  Xapian::Enquire enquire1(mydb1);
1263 
1264  Xapian::Database mydb2(get_database("apitest_simpledata"));
1265  Xapian::Enquire enquire2(mydb2);
1266 
1267  // make a query
1268  Xapian::Stem stemmer("english");
1269 
1270  string term1 = stemmer("word");
1271  string term2 = stemmer("rubbish");
1272  string term3 = stemmer("banana");
1273 
1274  vector<string> terms;
1275  terms.push_back(term1);
1276  terms.push_back(term2);
1277  terms.push_back(term3);
1278 
1279  Xapian::Query myquery1(Xapian::Query::OP_OR, terms.begin(), terms.end());
1280  enquire1.set_query(myquery1);
1281 
1282  Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET, terms.begin(), terms.end(), 3);
1283  enquire2.set_query(myquery2);
1284 
1285  // retrieve the results
1286  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1287  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1288 
1289  TEST_EQUAL(mymset1, mymset2);
1290 
1291  TEST_EQUAL(mymset1.get_termfreq(term1),
1292  mymset2.get_termfreq(term1));
1293  TEST_EQUAL(mymset1.get_termweight(term1),
1294  mymset2.get_termweight(term1));
1295  TEST_EQUAL(mymset1.get_termfreq(term2),
1296  mymset2.get_termfreq(term2));
1297  TEST_EQUAL(mymset1.get_termweight(term2),
1298  mymset2.get_termweight(term2));
1299  TEST_EQUAL(mymset1.get_termfreq(term3),
1300  mymset2.get_termfreq(term3));
1301  TEST_EQUAL(mymset1.get_termweight(term3),
1302  mymset2.get_termweight(term3));
1303 }
1304 
1306 DEFINE_TESTCASE(eliteset4, backend && !multi) {
1307  Xapian::Database mydb1(get_database("apitest_simpledata"));
1308  Xapian::Enquire enquire1(mydb1);
1309 
1310  Xapian::Database mydb2(get_database("apitest_simpledata"));
1311  Xapian::Enquire enquire2(mydb2);
1312 
1313  Xapian::Query myquery1 = query("rubbish");
1315  "word", "rubbish", "fibble");
1316  enquire1.set_query(myquery1);
1317  enquire2.set_query(myquery2);
1318 
1319  // retrieve the results
1320  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1321  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1322 
1323  TEST_NOT_EQUAL(mymset2.size(), 0);
1324  TEST_EQUAL(mymset1, mymset2);
1325 }
1326 
1328 DEFINE_TESTCASE(elitesetmulti4, multi) {
1329  Xapian::Database mydb2(get_database("apitest_simpledata"));
1330  Xapian::Enquire enquire2(mydb2);
1331 
1333  "word", "rubbish", "fibble");
1334  enquire2.set_query(myquery2);
1335 
1336  // retrieve the results
1337  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1338 
1339  // For a sharded database, the elite set is resolved per shard and can
1340  // select different terms because the max term weights vary with the
1341  // per-shard term statistics. I can't see a feasible way to create
1342  // an equivalent MSet to compare with so for now at least we hard-code
1343  // the expected values.
1344  TEST_EQUAL(mymset2.size(), 3);
1345  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1346  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1347  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1348  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.4848948390060121572);
1349  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.4848948390060121572);
1350  mset_expect_order(mymset2, 3, 2, 4);
1351  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.4848948390060121572);
1352  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 1.0464816871772451012);
1353  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.64098768659591376373);
1354 }
1355 
1357 DEFINE_TESTCASE(eliteset5, backend) {
1358  Xapian::Database mydb1(get_database("apitest_simpledata"));
1359  Xapian::Enquire enquire1(mydb1);
1360 
1361  vector<string> v;
1362  for (int i = 0; i != 3; ++i) {
1363  v.push_back("simpl");
1364  v.push_back("queri");
1365 
1366  v.push_back("rubbish");
1367  v.push_back("rubbish");
1368  v.push_back("rubbish");
1369  v.push_back("word");
1370  v.push_back("word");
1371  v.push_back("word");
1372  }
1373 
1374  for (Xapian::termcount n = 1; n != v.size(); ++n) {
1376  v.begin(), v.end(), n);
1378  myquery1,
1379  0.004);
1380 
1381  enquire1.set_query(myquery1);
1382  // On architectures with excess precision (or, at least, on x86), the
1383  // following call used to result in a segfault (at least when n=1).
1384  enquire1.get_mset(0, 10);
1385  }
1386 }
1387 
1389 DEFINE_TESTCASE(termlisttermfreq1, backend) {
1390  Xapian::Database mydb(get_database("apitest_simpledata"));
1391  Xapian::Enquire enquire(mydb);
1392  Xapian::Stem stemmer("english");
1393  Xapian::RSet rset1;
1394  Xapian::RSet rset2;
1395  rset1.add_document(5);
1396  rset2.add_document(6);
1397 
1398  Xapian::ESet eset1 = enquire.get_eset(1000, rset1);
1399  Xapian::ESet eset2 = enquire.get_eset(1000, rset2);
1400 
1401  // search for weight of term 'another'
1402  string theterm = stemmer("another");
1403 
1404  double wt1 = 0;
1405  double wt2 = 0;
1406  {
1407  Xapian::ESetIterator i = eset1.begin();
1408  for ( ; i != eset1.end(); ++i) {
1409  if (*i == theterm) {
1410  wt1 = i.get_weight();
1411  break;
1412  }
1413  }
1414  }
1415  {
1416  Xapian::ESetIterator i = eset2.begin();
1417  for ( ; i != eset2.end(); ++i) {
1418  if (*i == theterm) {
1419  wt2 = i.get_weight();
1420  break;
1421  }
1422  }
1423  }
1424 
1425  TEST_NOT_EQUAL(wt1, 0);
1426  TEST_NOT_EQUAL(wt2, 0);
1427  TEST_EQUAL(wt1, wt2);
1428 }
1429 
1431 DEFINE_TESTCASE(qterminfo1, backend) {
1432  Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
1433  Xapian::Enquire enquire1(mydb1);
1434 
1435  Xapian::Database mydb2(get_database("apitest_simpledata"));
1436  mydb2.add_database(get_database("apitest_simpledata2"));
1437  Xapian::Enquire enquire2(mydb2);
1438 
1439  // make a query
1440  Xapian::Stem stemmer("english");
1441 
1442  string term1 = stemmer("word");
1443  string term2 = stemmer("inmemory");
1444  string term3 = stemmer("flibble");
1445 
1447  Xapian::Query(term1),
1449  Xapian::Query(term2),
1450  Xapian::Query(term3)));
1451  enquire1.set_query(myquery);
1452  enquire2.set_query(myquery);
1453 
1454  // retrieve the results
1455  Xapian::MSet mymset1a = enquire1.get_mset(0, 0);
1456  Xapian::MSet mymset2a = enquire2.get_mset(0, 0);
1457 
1458  TEST_EQUAL(mymset1a.get_termfreq(term1),
1459  mymset2a.get_termfreq(term1));
1460  TEST_EQUAL(mymset1a.get_termfreq(term2),
1461  mymset2a.get_termfreq(term2));
1462  TEST_EQUAL(mymset1a.get_termfreq(term3),
1463  mymset2a.get_termfreq(term3));
1464 
1465  TEST_EQUAL(mymset1a.get_termfreq(term1), 3);
1466  TEST_EQUAL(mymset1a.get_termfreq(term2), 1);
1467  TEST_EQUAL(mymset1a.get_termfreq(term3), 0);
1468 
1469  TEST_NOT_EQUAL(mymset1a.get_termweight(term1), 0);
1470  TEST_NOT_EQUAL(mymset1a.get_termweight(term2), 0);
1471  // non-existent terms should have 0 weight.
1472  TEST_EQUAL(mymset1a.get_termweight(term3), 0);
1473 
1474  TEST_EQUAL(mymset1a.get_termfreq(stemmer("banana")), 1);
1476  mymset1a.get_termweight(stemmer("banana")));
1477 
1478  TEST_EQUAL(mymset1a.get_termfreq("sponge"), 0);
1480  mymset1a.get_termweight("sponge"));
1481 }
1482 
1484 DEFINE_TESTCASE(qterminfo2, backend) {
1485  Xapian::Database db(get_database("apitest_simpledata"));
1486  Xapian::Enquire enquire(db);
1487 
1488  // make a query
1489  Xapian::Stem stemmer("english");
1490 
1491  string term1 = stemmer("paragraph");
1492  string term2 = stemmer("another");
1493 
1494  enquire.set_query(Xapian::Query(term1));
1495  Xapian::MSet mset0 = enquire.get_mset(0, 10);
1496 
1497  TEST_NOT_EQUAL(mset0.get_termweight("paragraph"), 0);
1498 
1500  Xapian::Query(Xapian::Query::OP_AND, term1, term2));
1501  enquire.set_query(query);
1502 
1503  // retrieve the results
1504  // Note: get_mset() used to throw "AssertionError" in debug builds
1505  Xapian::MSet mset = enquire.get_mset(0, 10);
1506 
1507  TEST_NOT_EQUAL(mset.get_termweight("paragraph"), 0);
1508 }
1509 
1510 // tests that when specifying that no items are to be returned, those
1511 // statistics which should be the same are.
1512 DEFINE_TESTCASE(msetzeroitems1, backend) {
1513  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1514  enquire.set_query(query("this"));
1515  Xapian::MSet mymset1 = enquire.get_mset(0, 0);
1516 
1517  Xapian::MSet mymset2 = enquire.get_mset(0, 1);
1518 
1519  TEST_EQUAL(mymset1.get_max_possible(), mymset2.get_max_possible());
1520 }
1521 
1522 // test that the matches_* of a simple query are as expected
1523 DEFINE_TESTCASE(matches1, backend) {
1524  bool remote = get_dbtype().find("remote") != string::npos;
1525 
1526  Xapian::Database db = get_database("apitest_simpledata");
1527  Xapian::Enquire enquire(db);
1528  Xapian::Query myquery;
1529  Xapian::MSet mymset;
1530 
1531  myquery = query("word");
1532  enquire.set_query(myquery);
1533  mymset = enquire.get_mset(0, 10);
1534  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1535  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1536  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1540 
1541  myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
1542  enquire.set_query(myquery);
1543  mymset = enquire.get_mset(0, 10);
1544  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1545  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1546  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1550 
1551  myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
1552  enquire.set_query(myquery);
1553  mymset = enquire.get_mset(0, 10);
1554  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1555  TEST_EQUAL(mymset.get_matches_estimated(), 0);
1556  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
1560 
1561  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1562  enquire.set_query(myquery);
1563  mymset = enquire.get_mset(0, 10);
1564  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1565  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1566  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1570 
1571  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1572  enquire.set_query(myquery);
1573  mymset = enquire.get_mset(0, 0);
1574  if (db.size() == 1) {
1575  // This isn't true for sharded DBs since there one sub-database has 3
1576  // documents and simple and word both have termfreq of 2, so the
1577  // matcher can tell at least one document must match!)
1578  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1579  }
1581  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1582  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1586 
1587  mymset = enquire.get_mset(0, 1);
1588  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1589  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1590  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1591  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1592  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1593  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1594 
1595  mymset = enquire.get_mset(0, 2);
1596  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1597  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1598  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1599  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1600  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1601  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1602 
1603  myquery = query(Xapian::Query::OP_AND, "paragraph", "another");
1604  enquire.set_query(myquery);
1605  mymset = enquire.get_mset(0, 0);
1606  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1607  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1608  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1609  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1610  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1611  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1612 
1613  mymset = enquire.get_mset(0, 1);
1614  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1615  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1616  if (db.size() > 1 && remote) {
1617  // The matcher can tell there's only one match in this case.
1618  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1619  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1620  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1621  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1622  } else {
1623  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1624  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1625  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1626  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1627  }
1628 
1629  mymset = enquire.get_mset(0, 2);
1630  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1631  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1632  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1633  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1634  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1635  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1636 
1637  mymset = enquire.get_mset(1, 20);
1638  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1639  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1640  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1641  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1642  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1643  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1644 }
1645 
1646 // tests that wqf affects the document weights
1647 DEFINE_TESTCASE(wqf1, backend) {
1648  // Both queries have length 2; in q1 word has wqf=2, in q2 word has wqf=1
1649  Xapian::Query q1("word", 2);
1650  Xapian::Query q2("word");
1651  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1652  enquire.set_query(q1);
1653  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1654  enquire.set_query(q2);
1655  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1656  // Check the weights
1657  TEST(mset1.begin().get_weight() > mset2.begin().get_weight());
1658 }
1659 
1660 // tests that query length affects the document weights
1661 DEFINE_TESTCASE(qlen1, backend) {
1662  Xapian::Query q1("word");
1663  Xapian::Query q2("word");
1664  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1665  enquire.set_query(q1);
1666  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1667  enquire.set_query(q2);
1668  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1669  // Check the weights
1670  // TEST(mset1.begin().get_weight() < mset2.begin().get_weight());
1671  TEST(mset1.begin().get_weight() == mset2.begin().get_weight());
1672 }
1673 
1674 // tests that opening a non-existent termlist throws the correct exception
1675 DEFINE_TESTCASE(termlist1, backend) {
1676  Xapian::Database db(get_database("apitest_onedoc"));
1681  /* Cause the database to be used properly, showing up problems
1682  * with the link being in a bad state. CME */
1683  Xapian::TermIterator temp = db.termlist_begin(1);
1685  Xapian::TermIterator t = db.termlist_begin(999999999));
1686 }
1687 
1688 // tests that a Xapian::TermIterator works as an STL iterator
1689 DEFINE_TESTCASE(termlist2, backend) {
1690  Xapian::Database db(get_database("apitest_onedoc"));
1692  Xapian::TermIterator tend = db.termlist_end(1);
1693 
1694  // test operator= creates a copy which compares equal
1695  Xapian::TermIterator t_copy = t;
1696  TEST_EQUAL(t, t_copy);
1697 
1698  // test copy constructor creates a copy which compares equal
1699  Xapian::TermIterator t_clone(t);
1700  TEST_EQUAL(t, t_clone);
1701 
1702  vector<string> v(t, tend);
1703 
1704  t = db.termlist_begin(1);
1705  tend = db.termlist_end(1);
1706  vector<string>::const_iterator i;
1707  for (i = v.begin(); i != v.end(); ++i) {
1708  TEST_NOT_EQUAL(t, tend);
1709  TEST_EQUAL(*i, *t);
1710  t++;
1711  }
1712  TEST_EQUAL(t, tend);
1713 }
1714 
1715 static Xapian::TermIterator
1717 {
1718  Xapian::Database db(get_database("apitest_onedoc"));
1719  return db.termlist_begin(1);
1720 }
1721 
1722 // tests that a Xapian::TermIterator still works when the DB is deleted
1723 DEFINE_TESTCASE(termlist3, backend) {
1725  Xapian::Database db(get_database("apitest_onedoc"));
1727  Xapian::TermIterator tend = db.termlist_end(1);
1728 
1729  while (t != tend) {
1730  TEST_EQUAL(*t, *u);
1731  t++;
1732  u++;
1733  }
1734 }
1735 
1736 // tests skip_to
1737 DEFINE_TESTCASE(termlist4, backend) {
1738  Xapian::Database db(get_database("apitest_onedoc"));
1740  i.skip_to("");
1741  i.skip_to("\xff");
1742 }
1743 
1744 // tests punctuation is OK in terms (particularly in remote queries)
1745 DEFINE_TESTCASE(puncterms1, backend) {
1746  Xapian::Database db(get_database("apitest_punc"));
1747  Xapian::Enquire enquire(db);
1748 
1749  Xapian::Query q1("semi;colon");
1750  enquire.set_query(q1);
1751  Xapian::MSet m1 = enquire.get_mset(0, 10);
1752 
1753  Xapian::Query q2("col:on");
1754  enquire.set_query(q2);
1755  Xapian::MSet m2 = enquire.get_mset(0, 10);
1756 
1757  Xapian::Query q3("com,ma");
1758  enquire.set_query(q3);
1759  Xapian::MSet m3 = enquire.get_mset(0, 10);
1760 }
1761 
1762 // test that searching for a term with a space or backslash in it works
1763 DEFINE_TESTCASE(spaceterms1, backend) {
1764  Xapian::Enquire enquire(get_database("apitest_space"));
1765  Xapian::MSet mymset;
1766  Xapian::doccount count;
1768  Xapian::Stem stemmer("english");
1769 
1770  enquire.set_query(stemmer("space man"));
1771  mymset = enquire.get_mset(0, 10);
1772  TEST_MSET_SIZE(mymset, 1);
1773  count = 0;
1774  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1775  TEST_EQUAL(count, 1);
1776 
1777  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
1778  TEST_NOT_EQUAL(mymset.begin().get_document().get_data(), "");
1779  TEST_NOT_EQUAL(mymset.begin().get_document().get_value(value_no), "");
1780  }
1781 
1782  enquire.set_query(stemmer("tab\tby"));
1783  mymset = enquire.get_mset(0, 10);
1784  TEST_MSET_SIZE(mymset, 1);
1785  count = 0;
1786  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1787  TEST_EQUAL(count, 1);
1788 
1789  for (Xapian::valueno value_no = 0; value_no < 7; ++value_no) {
1790  string value = mymset.begin().get_document().get_value(value_no);
1791  TEST_NOT_EQUAL(value, "");
1792  if (value_no == 0) {
1793  TEST(value.size() > 262);
1794  TEST_EQUAL(static_cast<unsigned char>(value[262]), 255);
1795  }
1796  }
1797 
1798  enquire.set_query(stemmer("back\\slash"));
1799  mymset = enquire.get_mset(0, 10);
1800  TEST_MSET_SIZE(mymset, 1);
1801  count = 0;
1802  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1803  TEST_EQUAL(count, 1);
1804 }
1805 
1806 // test that XOR queries work
1807 DEFINE_TESTCASE(xor1, backend) {
1808  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1809  Xapian::Stem stemmer("english");
1810 
1811  vector<string> terms;
1812  terms.push_back(stemmer("this"));
1813  terms.push_back(stemmer("word"));
1814  terms.push_back(stemmer("of"));
1815 
1816  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1818  enquire.set_query(query);
1819 
1820  Xapian::MSet mymset = enquire.get_mset(0, 10);
1821  // Docid this word of Match?
1822  // 1 * *
1823  // 2 * * * *
1824  // 3 * *
1825  // 4 * *
1826  // 5 * *
1827  // 6 * *
1828  mset_expect_order(mymset, 1, 2, 5, 6);
1829 }
1830 
1832 DEFINE_TESTCASE(xor2, backend) {
1833  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1834  Xapian::Stem stemmer("english");
1835 
1836  vector<string> terms;
1837  terms.push_back(stemmer("this"));
1838  terms.push_back(stemmer("word"));
1839  terms.push_back(stemmer("of"));
1840 
1841  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1842  enquire.set_query(query);
1843 
1844  Xapian::MSet mymset = enquire.get_mset(0, 10);
1845  // Docid LEN this word of Match?
1846  // 1 28 2 *
1847  // 2 81 5 8 1 *
1848  // 3 15 1 2
1849  // 4 31 1 1
1850  // 5 15 1 *
1851  // 6 15 1 *
1852  mset_expect_order(mymset, 2, 1, 5, 6);
1853 }
1854 
1855 // test Xapian::Database::get_document()
1856 DEFINE_TESTCASE(getdoc1, backend) {
1857  Xapian::Database db(get_database("apitest_onedoc"));
1858  Xapian::Document doc(db.get_document(1));
1864  // Check that Document works as a handle on modification
1865  // (this was broken for the first try at Xapian::Document prior to 0.7).
1866  Xapian::Document doc2 = doc;
1867  doc.set_data("modified!");
1868  TEST_EQUAL(doc.get_data(), "modified!");
1869  TEST_EQUAL(doc.get_data(), doc2.get_data());
1870 }
1871 
1872 // test whether operators with no elements work as a null query
1873 DEFINE_TESTCASE(emptyop1, backend) {
1874  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1875  vector<Xapian::Query> nullvec;
1876 
1877  Xapian::Query query1(Xapian::Query::OP_XOR, nullvec.begin(), nullvec.end());
1878 
1879  enquire.set_query(query1);
1880  Xapian::MSet mymset = enquire.get_mset(0, 10);
1881  TEST_MSET_SIZE(mymset, 0);
1882  // In Xapian < 1.3.0, this gave InvalidArgumentError (because
1883  // query1.empty()) but elsewhere we treat an empty query as just not
1884  // matching any documents, so we now do the same here too.
1886  enquire.get_matching_terms_end(1));
1887 }
1888 
1889 // Regression test for check_at_least SEGV when there are no matches.
1890 DEFINE_TESTCASE(checkatleast1, backend) {
1891  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1892  enquire.set_query(Xapian::Query("thom"));
1893  Xapian::MSet mymset = enquire.get_mset(0, 10, 11);
1894  TEST_EQUAL(0, mymset.size());
1895 }
1896 
1897 // Regression test - if check_at_least was set we returned (check_at_least - 1)
1898 // results, rather than the requested msize. Fixed in 1.0.2.
1899 DEFINE_TESTCASE(checkatleast2, backend) {
1900  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1901  enquire.set_query(Xapian::Query("paragraph"));
1902 
1903  Xapian::MSet mymset = enquire.get_mset(0, 3, 10);
1904  TEST_MSET_SIZE(mymset, 3);
1905  TEST_EQUAL(mymset.get_matches_lower_bound(), 5);
1907 
1908  mymset = enquire.get_mset(0, 2, 4);
1909  TEST_MSET_SIZE(mymset, 2);
1910  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1911  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1912  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1913  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1914 }
1915 
1916 // Feature tests - check_at_least with various sorting options.
1917 DEFINE_TESTCASE(checkatleast3, backend) {
1918  Xapian::Enquire enquire(get_database("etext"));
1919  enquire.set_query(Xapian::Query("prussian")); // 60 matches.
1920 
1921  for (int order = 0; order < 3; ++order) {
1922  switch (order) {
1923  case 0:
1925  break;
1926  case 1:
1928  break;
1929  case 2:
1931  break;
1932  }
1933 
1934  for (int sort = 0; sort < 7; ++sort) {
1935  bool reverse = (sort & 1);
1936  switch (sort) {
1937  case 0:
1938  enquire.set_sort_by_relevance();
1939  break;
1940  case 1: case 2:
1941  enquire.set_sort_by_value(0, reverse);
1942  break;
1943  case 3: case 4:
1944  enquire.set_sort_by_value_then_relevance(0, reverse);
1945  break;
1946  case 5: case 6:
1947  enquire.set_sort_by_relevance_then_value(0, reverse);
1948  break;
1949  }
1950 
1951  Xapian::MSet mset = enquire.get_mset(0, 100, 500);
1952  TEST_MSET_SIZE(mset, 60);
1953  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1954  TEST_EQUAL(mset.get_matches_estimated(), 60);
1955  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1959 
1960  mset = enquire.get_mset(0, 50, 100);
1961  TEST_MSET_SIZE(mset, 50);
1962  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1963  TEST_EQUAL(mset.get_matches_estimated(), 60);
1964  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1965  TEST_EQUAL(mset.get_uncollapsed_matches_lower_bound(), 60);
1966  TEST_EQUAL(mset.get_uncollapsed_matches_estimated(), 60);
1967  TEST_EQUAL(mset.get_uncollapsed_matches_upper_bound(), 60);
1968 
1969  mset = enquire.get_mset(0, 10, 50);
1970  TEST_MSET_SIZE(mset, 10);
1971  TEST_REL(mset.get_matches_lower_bound(),>=,50);
1972  TEST_REL(mset.get_uncollapsed_matches_lower_bound(),>=,50);
1973  }
1974  }
1975 }
1976 
1977 // tests all document postlists
1978 DEFINE_TESTCASE(allpostlist1, backend) {
1979  Xapian::Database db(get_database("apitest_manydocs"));
1981  unsigned int j = 1;
1982  while (i != db.postlist_end("")) {
1983  TEST_EQUAL(*i, j);
1984  i++;
1985  j++;
1986  }
1987  TEST_EQUAL(j, 513);
1988 
1989  i = db.postlist_begin("");
1990  j = 1;
1991  while (i != db.postlist_end("")) {
1992  TEST_EQUAL(*i, j);
1993  i++;
1994  j++;
1995  if (j == 50) {
1996  j += 10;
1997  i.skip_to(j);
1998  }
1999  }
2000  TEST_EQUAL(j, 513);
2001 }
2002 
2004 {
2005  // Don't bother with postlist_begin() because allpostlist tests cover that.
2007  TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
2008  TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
2010 }
2011 
2012 // tests results of passing an empty term to various methods
2013 DEFINE_TESTCASE(emptyterm1, backend) {
2014  Xapian::Database db(get_database("apitest_manydocs"));
2015  TEST_EQUAL(db.get_doccount(), 512);
2017 
2018  db = get_database("apitest_onedoc");
2019  TEST_EQUAL(db.get_doccount(), 1);
2021 
2022  db = get_database("");
2023  TEST_EQUAL(db.get_doccount(), 0);
2025 }
2026 
2027 // Test for alldocs postlist with a sparse database.
2028 DEFINE_TESTCASE(alldocspl1, backend) {
2029  Xapian::Database db = get_database("alldocspl1",
2030  [](Xapian::WritableDatabase& wdb,
2031  const string&) {
2032  Xapian::Document doc;
2033  doc.set_data("5");
2034  doc.add_value(0, "5");
2035  wdb.replace_document(5, doc);
2036  });
2038  TEST(i != db.postlist_end(""));
2039  TEST_EQUAL(*i, 5);
2040  TEST_EQUAL(i.get_doclength(), 0);
2041  TEST_EQUAL(i.get_unique_terms(), 0);
2042  TEST_EQUAL(i.get_wdf(), 1);
2043  ++i;
2044  TEST(i == db.postlist_end(""));
2045 }
2046 
2047 // Test reading and writing a modified alldocspostlist.
2048 DEFINE_TESTCASE(alldocspl2, writable) {
2049  Xapian::PostingIterator i, end;
2050  {
2052  Xapian::Document doc;
2053  doc.set_data("5");
2054  doc.add_value(0, "5");
2055  db.replace_document(5, doc);
2056 
2057  // Test iterating before committing the changes.
2058  i = db.postlist_begin("");
2059  end = db.postlist_end("");
2060  TEST(i != end);
2061  TEST_EQUAL(*i, 5);
2062  TEST_EQUAL(i.get_doclength(), 0);
2063  TEST_EQUAL(i.get_unique_terms(), 0);
2064  TEST_EQUAL(i.get_wdf(), 1);
2065  ++i;
2066  TEST(i == end);
2067 
2068  db.commit();
2069 
2070  // Test iterating after committing the changes.
2071  i = db.postlist_begin("");
2072  end = db.postlist_end("");
2073  TEST(i != end);
2074  TEST_EQUAL(*i, 5);
2075  TEST_EQUAL(i.get_doclength(), 0);
2076  TEST_EQUAL(i.get_unique_terms(), 0);
2077  TEST_EQUAL(i.get_wdf(), 1);
2078  ++i;
2079  TEST(i == end);
2080 
2081  // Add another document.
2082  doc = Xapian::Document();
2083  doc.set_data("5");
2084  doc.add_value(0, "7");
2085  db.replace_document(7, doc);
2086 
2087  // Test iterating through before committing the changes.
2088  i = db.postlist_begin("");
2089  end = db.postlist_end("");
2090  TEST(i != end);
2091  TEST_EQUAL(*i, 5);
2092  TEST_EQUAL(i.get_doclength(), 0);
2093  TEST_EQUAL(i.get_unique_terms(), 0);
2094  TEST_EQUAL(i.get_wdf(), 1);
2095  ++i;
2096  TEST(i != end);
2097  TEST_EQUAL(*i, 7);
2098  TEST_EQUAL(i.get_doclength(), 0);
2099  TEST_EQUAL(i.get_unique_terms(), 0);
2100  TEST_EQUAL(i.get_wdf(), 1);
2101  ++i;
2102  TEST(i == end);
2103 
2104  // Delete the first document.
2105  db.delete_document(5);
2106 
2107  // Test iterating through before committing the changes.
2108  i = db.postlist_begin("");
2109  end = db.postlist_end("");
2110  TEST(i != end);
2111  TEST_EQUAL(*i, 7);
2112  TEST_EQUAL(i.get_doclength(), 0);
2113  TEST_EQUAL(i.get_unique_terms(), 0);
2114  TEST_EQUAL(i.get_wdf(), 1);
2115  ++i;
2116  TEST(i == end);
2117 
2118  // Test iterating through after committing the changes, and dropping the
2119  // reference to the main DB.
2120  db.commit();
2121  i = db.postlist_begin("");
2122  end = db.postlist_end("");
2123  }
2124 
2125  TEST(i != end);
2126  TEST_EQUAL(*i, 7);
2127  TEST_EQUAL(i.get_doclength(), 0);
2128  TEST_EQUAL(i.get_unique_terms(), 0);
2129  TEST_EQUAL(i.get_wdf(), 1);
2130  ++i;
2131  TEST(i == end);
2132 }
2133 
2134 // Feature test for Query::OP_SCALE_WEIGHT.
2135 DEFINE_TESTCASE(scaleweight1, backend) {
2136  Xapian::Database db(get_database("apitest_phrase"));
2137  Xapian::Enquire enq(db);
2139 
2140  static const char * const queries[] = {
2141  "pad",
2142  "milk fridge",
2143  "leave milk on fridge",
2144  "ordered milk operator",
2145  "ordered phrase operator",
2146  "leave \"milk on fridge\"",
2147  "notpresent",
2148  "leave \"milk notpresent\"",
2149  };
2150  static const double multipliers[] = {
2151  -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2152  0, 0
2153  };
2154 
2155  for (auto qstr : queries) {
2156  tout.str(string());
2157  Xapian::Query query1 = qp.parse_query(qstr);
2158  tout << "query1: " << query1.get_description() << '\n';
2159  for (const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2160  double mult = *multp;
2161  if (mult < 0) {
2164  query1, mult));
2165  continue;
2166  }
2167  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, mult);
2168  tout << "query2: " << query2.get_description() << '\n';
2169 
2170  enq.set_query(query1);
2171  Xapian::MSet mset1 = enq.get_mset(0, 20);
2172  enq.set_query(query2);
2173  Xapian::MSet mset2 = enq.get_mset(0, 20);
2174 
2175  TEST_EQUAL(mset1.size(), mset2.size());
2176 
2177  Xapian::MSetIterator i1, i2;
2178  if (mult > 0) {
2179  for (i1 = mset1.begin(), i2 = mset2.begin();
2180  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2181  TEST_EQUAL_DOUBLE(i1.get_weight() * mult, i2.get_weight());
2182  TEST_EQUAL(*i1, *i2);
2183  }
2184  } else {
2185  // Weights in mset2 are 0; so it should be sorted by docid.
2186  vector<Xapian::docid> ids1;
2187  vector<Xapian::docid> ids2;
2188  for (i1 = mset1.begin(), i2 = mset2.begin();
2189  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2190  TEST_NOT_EQUAL_DOUBLE(i1.get_weight(), 0);
2191  TEST_EQUAL_DOUBLE(i2.get_weight(), 0);
2192  ids1.push_back(*i1);
2193  ids2.push_back(*i2);
2194  }
2195  sort(ids1.begin(), ids1.end());
2196  TEST_EQUAL(ids1, ids2);
2197  }
2198  }
2199  }
2200 }
2201 
2202 // Test Query::OP_SCALE_WEIGHT being used to multiply some of the weights of a
2203 // search by zero.
2204 DEFINE_TESTCASE(scaleweight2, backend) {
2205  Xapian::Database db(get_database("apitest_phrase"));
2206  Xapian::Enquire enq(db);
2208 
2209  Xapian::Query query1("fridg");
2210  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, 2.5);
2211  Xapian::Query query3("milk");
2212  Xapian::Query query4(Xapian::Query::OP_SCALE_WEIGHT, query3, 0);
2213  Xapian::Query query5(Xapian::Query::OP_OR, query2, query4);
2214 
2215  // query5 should first return the same results as query1, in the same
2216  // order, and then return the results of query3 which aren't also results
2217  // of query1, in ascending docid order. We test that this happens.
2218 
2219  // First, build a vector of docids matching the first part of the query,
2220  // and append the non-duplicate docids matching the second part of the
2221  // query.
2222  vector<Xapian::docid> ids1;
2223  set<Xapian::docid> idsin1;
2224  vector<Xapian::docid> ids3;
2225 
2226  enq.set_query(query1);
2227  Xapian::MSet mset1 = enq.get_mset(0, 20);
2228  enq.set_query(query3);
2229  Xapian::MSet mset3 = enq.get_mset(0, 20);
2230  TEST_NOT_EQUAL(mset1.size(), 0);
2231  for (i = mset1.begin(); i != mset1.end(); ++i) {
2232  ids1.push_back(*i);
2233  idsin1.insert(*i);
2234  }
2235  TEST_NOT_EQUAL(mset3.size(), 0);
2236  for (i = mset3.begin(); i != mset3.end(); ++i) {
2237  if (idsin1.find(*i) != idsin1.end())
2238  continue;
2239  ids3.push_back(*i);
2240  }
2241  sort(ids3.begin(), ids3.end());
2242  ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2243 
2244  // Now, run the combined query and build a vector of the matching docids.
2245  vector<Xapian::docid> ids5;
2246  enq.set_query(query5);
2247  Xapian::MSet mset5 = enq.get_mset(0, 20);
2248  for (i = mset5.begin(); i != mset5.end(); ++i) {
2249  ids5.push_back(*i);
2250  }
2251 
2252  TEST_EQUAL(ids1, ids5);
2253 }
2254 
2255 // Regression test for bug fixed in 1.0.5 - this test would failed under
2256 // valgrind because it used an uninitialised value.
2257 DEFINE_TESTCASE(bm25weight1, backend) {
2258  Xapian::Enquire enquire(get_database("apitest_simpledata"));
2259  enquire.set_weighting_scheme(Xapian::BM25Weight(1, 25, 1, 0.01, 0.5));
2260  enquire.set_query(Xapian::Query("word"));
2261 
2262  Xapian::MSet mset = enquire.get_mset(0, 25);
2263 }
2264 
2265 // Feature test for TradWeight.
2266 DEFINE_TESTCASE(tradweight1, backend) {
2267  Xapian::Enquire enquire(get_database("apitest_simpledata"));
2269  enquire.set_query(Xapian::Query("word"));
2270 
2271  Xapian::MSet mset = enquire.get_mset(0, 25);
2272  TEST_EQUAL(mset.size(), 2);
2273 
2275  enquire.set_query(Xapian::Query("this"));
2276 
2277  mset = enquire.get_mset(0, 25);
2278  TEST_EQUAL(mset.size(), 6);
2279 
2280  // Check that TradWeight(0) means wdf and doc length really don't affect
2281  // the weights as stated in the documentation.
2282  TEST_EQUAL(mset[0].get_weight(), mset[5].get_weight());
2283 }
2284 
2285 // Test TradWeight when weighting documents using an RSet.
2286 // Simply changed the weighting scheme used by rset2 testcase.
2287 DEFINE_TESTCASE(tradweight4, backend) {
2288  Xapian::Database mydb(get_database("apitest_rset"));
2289  Xapian::Enquire enquire(mydb);
2290  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
2291 
2292  enquire.set_query(myquery);
2294 
2295  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
2296 
2297  Xapian::RSet myrset;
2298  myrset.add_document(2);
2299 
2300  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
2301 
2302  mset_expect_order(mymset1, 1, 2);
2303  // Document 2 should have higher weight than document 1 despite the wdf of
2304  // "people" being 1 because "people" indexes a document in the RSet whereas
2305  // "cuddly" (wdf=2) does not.
2306  mset_expect_order(mymset2, 2, 1);
2307 }
2308 
2309 // Feature test for Database::get_uuid().
2310 DEFINE_TESTCASE(uuid1, backend && !multi) {
2311  SKIP_TEST_FOR_BACKEND("inmemory");
2312  Xapian::Database db = get_database("apitest_simpledata");
2313  string uuid1 = db.get_uuid();
2314  TEST_EQUAL(uuid1.size(), 36);
2315 
2316  // A database with no sub-databases has an empty UUID.
2317  Xapian::Database db2;
2318  TEST(db2.get_uuid().empty());
2319 
2320  db2.add_database(db);
2321  TEST_EQUAL(uuid1, db2.get_uuid());
2322 
2323  // Multi-database has multiple UUIDs (we don't define the format exactly
2324  // so this assumes something about the implementation).
2325  db2.add_database(db);
2326  TEST_EQUAL(uuid1 + ":" + uuid1, db2.get_uuid());
2327 
2328 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
2329  // This relies on InMemory databases not supporting uuids.
2330  // A multi-database containing a database with no uuid has no uuid.
2331  db2.add_database(Xapian::Database(string(), Xapian::DB_BACKEND_INMEMORY));
2332  TEST(db2.get_uuid().empty());
2333 #endif
2334 }
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Definition: testutils.h:78
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
Definition: constants.h:195
ExpandDecider subclass which restrict terms to a particular prefix.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
Definition: omenquire.cc:829
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: omenquire.cc:878
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
Definition: omdatabase.cc:198
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_sort_by_relevance()
Set the sorting to be by relevance only.
Definition: omenquire.cc:863
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:850
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
static void print_mset_percentages(const Xapian::MSet &mset)
Definition: api_anydb.cc:54
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: omenquire.cc:198
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:869
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Definition: eset.h:362
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:715
Match documents which an odd number of subqueries match.
Definition: query.h:107
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:856
Class representing a stemming algorithm.
Definition: stem.h:62
PositionIterator positionlist_begin(Xapian::docid did, const std::string &tname) const
An iterator pointing to the start of the position list for a given term in a given document...
Definition: omdatabase.cc:250
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
op
Query operators.
Definition: query.h:78
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails...
Definition: testsuite.h:267
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: omenquire.cc:246
static void test_emptyterm1_helper(Xapian::Database &db)
Definition: api_anydb.cc:2003
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:300
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
double get_max_attained() const
The maximum weight attained by any document.
Definition: omenquire.cc:297
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
a generic test suite engine
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
Definition: enquire.h:603
Class representing a list of search results.
Definition: mset.h:44
void skip_to(const std::string &term)
Advance the iterator to term term.
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
static Xapian::Stem stemmer
Definition: stemtest.cc:41
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
Definition: enquire.h:593
double get_weight() const
Get the weight for the current position.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:956
test functionality of the Xapian API
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: omenquire.cc:262
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:345
Class for iterating over a list of terms.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:941
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
Xapian::doccount size() const
Return number of items in this ESet object.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: omenquire.cc:276
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
docids sort in whatever order is most efficient for the backend.
Definition: enquire.h:331
static int verbose
Definition: xapian-delve.cc:47
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
This class provides read/write access to a database.
Definition: database.h:789
DEFINE_TESTCASE(zerodocid1, backend)
Definition: api_anydb.cc:118
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:368
Scale the weight contributed by a subquery.
Definition: query.h:166
Public interfaces for the Xapian library.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: omenquire.cc:887
docids sort in ascending order (default)
Definition: enquire.h:326
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string get_dbtype()
Definition: apitest.cc:42
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
Definition: mset.h:612
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Definition: omenquire.cc:222
int percent
The percentage score for a document in an MSet.
Definition: types.h:66
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:763
static void print_mset_weights(const Xapian::MSet &mset)
Definition: api_anydb.cc:45
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Definition: queryparser.cc:161
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:531
Iterator over a Xapian::ESet.
Definition: eset.h:160
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
Definition: database.h:240
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:75
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
size_t size() const
Return number of shards in this Database object.
Definition: database.h:93
bool term_exists(const std::string &tname) const
Check if a given term exists in the database.
Definition: omdatabase.cc:524
double get_weight() const
Get the weight for the current position.
Definition: omenquire.cc:460
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
Match only documents which all subqueries match.
Definition: query.h:84
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
static Xapian::TermIterator test_termlist3_helper()
Definition: api_anydb.cc:1716
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
bool operator()(const string &tname) const
Do we want this term in the ESet?
Definition: api_anydb.cc:535
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:283
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:111
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Definition: omenquire.cc:206
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
Class representing a list of search results.
Definition: eset.h:43
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: omenquire.cc:450
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:641
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Definition: omenquire.cc:842
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:350
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:535
A relevance set (R-Set).
Definition: enquire.h:60
std::string get_uuid() const
Get a UUID for the database.
Definition: omdatabase.cc:776
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:269
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339
docids sort in descending order.
Definition: enquire.h:328