xapian-core  1.4.19
api_anydb.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2015,2016,2017,2020 Olly Betts
7  * Copyright 2006,2008 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #include <config.h>
27 
28 #include "api_anydb.h"
29 
30 #include <algorithm>
31 #include <string>
32 
33 #define XAPIAN_DEPRECATED(X) X
34 #include <xapian.h>
35 #include "testsuite.h"
36 #include "testutils.h"
37 
38 #include "apitest.h"
39 
40 #include <list>
41 
42 using namespace std;
43 
44 static void
46 {
47  Xapian::MSetIterator i = mset.begin();
48  for ( ; i != mset.end(); ++i) {
49  tout << " " << i.get_weight();
50  }
51 }
52 
53 static void
55 {
56  Xapian::MSetIterator i = mset.begin();
57  for ( ; i != mset.end(); ++i) {
58  tout << " " << mset.convert_to_percent(i);
59  }
60 }
61 
62 static Xapian::Query
64  const string & t1 = string(), const string & t2 = string(),
65  const string & t3 = string(), const string & t4 = string(),
66  const string & t5 = string(), const string & t6 = string(),
67  const string & t7 = string(), const string & t8 = string(),
68  const string & t9 = string(), const string & t10 = string())
69 {
70  vector<string> v;
71  Xapian::Stem stemmer("english");
72  if (!t1.empty()) v.push_back(stemmer(t1));
73  if (!t2.empty()) v.push_back(stemmer(t2));
74  if (!t3.empty()) v.push_back(stemmer(t3));
75  if (!t4.empty()) v.push_back(stemmer(t4));
76  if (!t5.empty()) v.push_back(stemmer(t5));
77  if (!t6.empty()) v.push_back(stemmer(t6));
78  if (!t7.empty()) v.push_back(stemmer(t7));
79  if (!t8.empty()) v.push_back(stemmer(t8));
80  if (!t9.empty()) v.push_back(stemmer(t9));
81  if (!t10.empty()) v.push_back(stemmer(t10));
82  return Xapian::Query(op, v.begin(), v.end());
83 }
84 
85 static Xapian::Query
87  const string & t1 = string(), const string & t2 = string(),
88  const string & t3 = string(), const string & t4 = string(),
89  const string & t5 = string(), const string & t6 = string(),
90  const string & t7 = string(), const string & t8 = string(),
91  const string & t9 = string(), const string & t10 = string())
92 {
93  vector<string> v;
94  Xapian::Stem stemmer("english");
95  if (!t1.empty()) v.push_back(stemmer(t1));
96  if (!t2.empty()) v.push_back(stemmer(t2));
97  if (!t3.empty()) v.push_back(stemmer(t3));
98  if (!t4.empty()) v.push_back(stemmer(t4));
99  if (!t5.empty()) v.push_back(stemmer(t5));
100  if (!t6.empty()) v.push_back(stemmer(t6));
101  if (!t7.empty()) v.push_back(stemmer(t7));
102  if (!t8.empty()) v.push_back(stemmer(t8));
103  if (!t9.empty()) v.push_back(stemmer(t9));
104  if (!t10.empty()) v.push_back(stemmer(t10));
105  return Xapian::Query(op, v.begin(), v.end(), parameter);
106 }
107 
108 static Xapian::Query
109 query(const string &t)
110 {
111  return Xapian::Query(Xapian::Stem("english")(t));
112 }
113 
114 // #######################################################################
115 // # Tests start here
116 
117 // tests that the backend doesn't return zero docids
118 DEFINE_TESTCASE(zerodocid1, backend) {
119  // open the database (in this case a simple text file
120  // we prepared earlier)
121 
122  Xapian::Database mydb(get_database("apitest_onedoc"));
123 
124  Xapian::Enquire enquire(mydb);
125 
126  // make a simple query, with one word in it - "word".
127  enquire.set_query(Xapian::Query("word"));
128 
129  // retrieve the top ten results (we only expect one)
130  Xapian::MSet mymset = enquire.get_mset(0, 10);
131 
132  // We've done the query, now check that the result is what
133  // we expect (1 document, with non-zero docid)
134  TEST_MSET_SIZE(mymset, 1);
135 
136  TEST_AND_EXPLAIN(*(mymset.begin()) != 0,
137  "A query on a database returned a zero docid");
138 }
139 
140 // tests that an empty query returns no matches
141 DEFINE_TESTCASE(emptyquery1, backend) {
142  Xapian::Enquire enquire(get_database("apitest_simpledata"));
143 
144  enquire.set_query(Xapian::Query());
145  Xapian::MSet mymset = enquire.get_mset(0, 10);
146  TEST_MSET_SIZE(mymset, 0);
147  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
148  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
149  TEST_EQUAL(mymset.get_matches_estimated(), 0);
153 
154  vector<Xapian::Query> v;
155  enquire.set_query(Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()));
156  mymset = enquire.get_mset(0, 10);
157  TEST_MSET_SIZE(mymset, 0);
158  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
159  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
160  TEST_EQUAL(mymset.get_matches_estimated(), 0);
164 }
165 
166 // tests the document count for a simple query
167 DEFINE_TESTCASE(simplequery1, backend) {
168  Xapian::Enquire enquire(get_database("apitest_simpledata"));
169  enquire.set_query(Xapian::Query("word"));
170  Xapian::MSet mymset = enquire.get_mset(0, 10);
171  TEST_MSET_SIZE(mymset, 2);
172 }
173 
174 // tests for the right documents and weights returned with simple query
175 DEFINE_TESTCASE(simplequery2, backend) {
176  // open the database (in this case a simple text file
177  // we prepared earlier)
178  Xapian::Database db = get_database("apitest_simpledata");
179  Xapian::Enquire enquire(db);
180  enquire.set_query(Xapian::Query("word"));
181 
182  // retrieve the top results
183  Xapian::MSet mymset = enquire.get_mset(0, 10);
184 
185  // We've done the query, now check that the result is what
186  // we expect (documents 2 and 4)
187  mset_expect_order(mymset, 2, 4);
188 
189  // Check the weights
190  Xapian::MSetIterator i = mymset.begin();
191  // These weights are for BM25Weight(1,0,1,0.5,0.5)
192  TEST_EQUAL_DOUBLE(i.get_weight(), 1.04648168717725);
193  i++;
194  TEST_EQUAL_DOUBLE(i.get_weight(), 0.640987686595914);
195 }
196 
197 // tests for the right document count for another simple query
198 DEFINE_TESTCASE(simplequery3, backend) {
199  Xapian::Enquire enquire(get_database("apitest_simpledata"));
200  enquire.set_query(query("this"));
201  Xapian::MSet mymset = enquire.get_mset(0, 10);
202 
203  // Check that 6 documents were returned.
204  TEST_MSET_SIZE(mymset, 6);
205 }
206 
207 // multidb2 no longer exists.
208 
209 // test that a multidb with 2 dbs query returns correct docids
210 DEFINE_TESTCASE(multidb3, backend && !multi) {
211  Xapian::Database mydb2(get_database("apitest_simpledata"));
212  mydb2.add_database(get_database("apitest_simpledata2"));
213  Xapian::Enquire enquire(mydb2);
214 
215  // make a query
216  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
218  enquire.set_query(myquery);
219 
220  // retrieve the top ten results
221  Xapian::MSet mymset = enquire.get_mset(0, 10);
222  mset_expect_order(mymset, 2, 3, 7);
223 }
224 
225 // test that a multidb with 3 dbs query returns correct docids
226 DEFINE_TESTCASE(multidb4, backend && !multi) {
227  Xapian::Database mydb2(get_database("apitest_simpledata"));
228  mydb2.add_database(get_database("apitest_simpledata2"));
229  mydb2.add_database(get_database("apitest_termorder"));
230  Xapian::Enquire enquire(mydb2);
231 
232  // make a query
233  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
235  enquire.set_query(myquery);
236 
237  // retrieve the top ten results
238  Xapian::MSet mymset = enquire.get_mset(0, 10);
239  mset_expect_order(mymset, 2, 3, 4, 10);
240 }
241 
242 // tests MultiPostList::skip_to().
243 DEFINE_TESTCASE(multidb5, backend && !multi) {
244  Xapian::Database mydb2(get_database("apitest_simpledata"));
245  mydb2.add_database(get_database("apitest_simpledata2"));
246  Xapian::Enquire enquire(mydb2);
247 
248  // make a query
249  Xapian::Query myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
251  enquire.set_query(myquery);
252 
253  // retrieve the top ten results
254  Xapian::MSet mymset = enquire.get_mset(0, 10);
255  mset_expect_order(mymset, 2);
256 }
257 
258 // tests that when specifying maxitems to get_mset, no more than
259 // that are returned.
260 DEFINE_TESTCASE(msetmaxitems1, backend) {
261  Xapian::Enquire enquire(get_database("apitest_simpledata"));
262  enquire.set_query(query("this"));
263  Xapian::MSet mymset = enquire.get_mset(0, 1);
264  TEST_MSET_SIZE(mymset, 1);
265 
266  mymset = enquire.get_mset(0, 5);
267  TEST_MSET_SIZE(mymset, 5);
268 }
269 
270 // tests the returned weights are as expected (regression test for remote
271 // backend which was using the average weight rather than the actual document
272 // weight for computing weights - fixed in 1.0.0).
273 DEFINE_TESTCASE(expandweights1, backend) {
274  Xapian::Enquire enquire(get_database("apitest_simpledata"));
275  enquire.set_query(Xapian::Query("this"));
276 
277  Xapian::MSet mymset = enquire.get_mset(0, 10);
278 
279  Xapian::RSet myrset;
280  Xapian::MSetIterator i = mymset.begin();
281  myrset.add_document(*i);
282  myrset.add_document(*(++i));
283 
284  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
285  TEST_EQUAL(eset.size(), 3);
286  TEST_REL(eset.get_ebound(), >=, eset.size());
287  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
288  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
289  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
290 
291  // Test non-default k too.
292  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ, 2.0);
293  TEST_EQUAL(eset.size(), 3);
294  TEST_REL(eset.get_ebound(), >=, eset.size());
295  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 5.88109547674955);
296  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 5.88109547674955);
297  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.44473599216144);
298 }
299 
300 // Just like test_expandweights1 but without USE_EXACT_TERMFREQ.
301 DEFINE_TESTCASE(expandweights2, backend) {
302  Xapian::Enquire enquire(get_database("apitest_simpledata"));
303  enquire.set_query(Xapian::Query("this"));
304 
305  Xapian::MSet mymset = enquire.get_mset(0, 10);
306 
307  Xapian::RSet myrset;
308  Xapian::MSetIterator i = mymset.begin();
309  myrset.add_document(*i);
310  myrset.add_document(*(++i));
311 
312  Xapian::ESet eset = enquire.get_eset(3, myrset);
313  TEST_EQUAL(eset.size(), 3);
314  TEST_REL(eset.get_ebound(), >=, eset.size());
315  // With a multi backend, the top three terms all happen to occur in both
316  // shard so their termfreq is exactly known even without
317  // USE_EXACT_TERMFREQ and so the weights should be the same for all
318  // test harness backends.
319  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
320  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
321  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
322 }
323 
324 DEFINE_TESTCASE(expandweights3, backend) {
325  Xapian::Enquire enquire(get_database("apitest_simpledata"));
326  enquire.set_query(Xapian::Query("this"));
327 
328  Xapian::MSet mymset = enquire.get_mset(0, 10);
329 
330  Xapian::RSet myrset;
331  Xapian::MSetIterator i = mymset.begin();
332  myrset.add_document(*i);
333  myrset.add_document(*(++i));
334 
335  // Set min_wt to 6.0
336  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 0, 6.0);
337  TEST_EQUAL(eset.size(), 2);
338  TEST_REL(eset.get_ebound(), >=, eset.size());
339  // With a multi backend, the top two terms all happen to occur in both
340  // shard so their termfreq is exactly known even without
341  // USE_EXACT_TERMFREQ and so the weights should be the same for all
342  // test harness backends.
343  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
344  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
345 }
346 
347 // tests that negative weights are returned
348 DEFINE_TESTCASE(expandweights4, backend) {
349  Xapian::Enquire enquire(get_database("apitest_simpledata"));
350  enquire.set_query(Xapian::Query("paragraph"));
351 
352  Xapian::MSet mymset = enquire.get_mset(0, 10);
353 
354  Xapian::RSet myrset;
355  Xapian::MSetIterator i = mymset.begin();
356  myrset.add_document(*i);
357  myrset.add_document(*(++i));
358 
359  Xapian::ESet eset = enquire.get_eset(37, myrset, 0, 0, -100);
360  // Now include negative weights
361  TEST_EQUAL(eset.size(), 37);
362  TEST_REL(eset.get_ebound(), >=, eset.size());
363  TEST_REL(eset[36].get_weight(), <, 0);
364  TEST_REL(eset[36].get_weight(), >=, -100);
365 }
366 
367 // test for Bo1EWeight
368 DEFINE_TESTCASE(expandweights5, backend) {
369  Xapian::Enquire enquire(get_database("apitest_simpledata"));
370  enquire.set_query(Xapian::Query("this"));
371 
372  Xapian::MSet mymset = enquire.get_mset(0, 10);
373 
374  Xapian::RSet myrset;
375  Xapian::MSetIterator i = mymset.begin();
376  myrset.add_document(*i);
377  myrset.add_document(*(++i));
378 
379  enquire.set_expansion_scheme("bo1");
380  Xapian::ESet eset = enquire.get_eset(3, myrset);
381 
382  TEST_EQUAL(eset.size(), 3);
383  TEST_REL(eset.get_ebound(), >=, eset.size());
384  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 7.21765284821702);
385  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.661623193760022);
386  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.58090119783738);
387 }
388 
389 // test that "trad" can be set as an expansion scheme.
390 DEFINE_TESTCASE(expandweights6, backend) {
391  Xapian::Enquire enquire(get_database("apitest_simpledata"));
392  enquire.set_query(Xapian::Query("this"));
393 
394  Xapian::MSet mymset = enquire.get_mset(0, 10);
395 
396  Xapian::RSet myrset;
397  Xapian::MSetIterator i = mymset.begin();
398  myrset.add_document(*i);
399  myrset.add_document(*(++i));
400 
401  enquire.set_expansion_scheme("trad");
402  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
403 
404  TEST_EQUAL(eset.size(), 3);
405  TEST_REL(eset.get_ebound(), >=, eset.size());
406  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
407  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
408  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
409 }
410 
411 // test that invalid scheme names are not accepted
412 DEFINE_TESTCASE(expandweights7, backend) {
413  Xapian::Enquire enquire(get_database("apitest_simpledata"));
414 
416  enquire.set_expansion_scheme("no_such_scheme"));
417 }
418 
419 // test that "expand_k" can be passed as a parameter to get_eset
420 DEFINE_TESTCASE(expandweights8, backend) {
421  Xapian::Enquire enquire(get_database("apitest_simpledata"));
422  enquire.set_query(Xapian::Query("this"));
423 
424  Xapian::MSet mymset = enquire.get_mset(0, 10);
425 
426  Xapian::RSet myrset;
427  Xapian::MSetIterator i = mymset.begin();
428  myrset.add_document(*i);
429  myrset.add_document(*(++i));
430 
431  // Set expand_k to 1.0 and min_wt to 0
432  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 1.0, 0, 0);
433  // With a multi backend, the top three terms all happen to occur in both
434  // shard so their termfreq is exactly known even without
435  // USE_EXACT_TERMFREQ and so the weights should be the same for all
436  // test harness backends.
437  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
438  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
439  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
440  TEST_REL(eset.back().get_weight(),>=,0);
441 }
442 
443 // tests that when specifying maxitems to get_eset, no more than
444 // that are returned.
445 DEFINE_TESTCASE(expandmaxitems1, backend) {
446  Xapian::Enquire enquire(get_database("apitest_simpledata"));
447  enquire.set_query(Xapian::Query("this"));
448 
449  Xapian::MSet mymset = enquire.get_mset(0, 10);
450  tout << "mymset.size() = " << mymset.size() << endl;
451  TEST(mymset.size() >= 2);
452 
453  Xapian::RSet myrset;
454  Xapian::MSetIterator i = mymset.begin();
455  myrset.add_document(*i);
456  myrset.add_document(*(++i));
457 
458  Xapian::ESet myeset = enquire.get_eset(1, myrset);
459  TEST_EQUAL(myeset.size(), 1);
460  TEST_REL(myeset.get_ebound(), >=, myeset.size());
461 }
462 
463 // tests that a pure boolean query has all weights set to 0
464 DEFINE_TESTCASE(boolquery1, backend) {
465  Xapian::Query myboolquery(query("this"));
466 
467  // open the database (in this case a simple text file
468  // we prepared earlier)
469  Xapian::Enquire enquire(get_database("apitest_simpledata"));
470  enquire.set_query(myboolquery);
472 
473  // retrieve the top results
474  Xapian::MSet mymset = enquire.get_mset(0, 10);
475 
476  TEST_NOT_EQUAL(mymset.size(), 0);
477  TEST_EQUAL(mymset.get_max_possible(), 0);
478  for (Xapian::MSetIterator i = mymset.begin(); i != mymset.end(); ++i) {
479  TEST_EQUAL(i.get_weight(), 0);
480  }
481 }
482 
483 // tests that get_mset() specifying "this" works as expected
484 DEFINE_TESTCASE(msetfirst1, backend) {
485  Xapian::Enquire enquire(get_database("apitest_simpledata"));
486  enquire.set_query(query("this"));
487  Xapian::MSet mymset1 = enquire.get_mset(0, 6);
488  Xapian::MSet mymset2 = enquire.get_mset(3, 3);
489  TEST(mset_range_is_same(mymset1, 3, mymset2, 0, 3));
490 
491  // Regression test - we weren't adjusting the index into items[] by
492  // firstitem in api/omenquire.cc.
493  TEST_EQUAL(mymset1[5].get_document().get_data(),
494  mymset2[2].get_document().get_data());
495 }
496 
497 // tests the converting-to-percent functions
498 DEFINE_TESTCASE(topercent1, backend) {
499  Xapian::Enquire enquire(get_database("apitest_simpledata"));
500  enquire.set_query(query("this"));
501  Xapian::MSet mymset = enquire.get_mset(0, 20);
502 
503  int last_pct = 100;
504  Xapian::MSetIterator i = mymset.begin();
505  for ( ; i != mymset.end(); ++i) {
506  int pct = mymset.convert_to_percent(i);
507  TEST_AND_EXPLAIN(pct == i.get_percent(),
508  "convert_to_%(msetitor) != convert_to_%(wt)");
510  "convert_to_%(msetitor) != convert_to_%(wt)");
511  TEST_AND_EXPLAIN(pct >= 0 && pct <= 100,
512  "percentage out of range: " << pct);
513  TEST_AND_EXPLAIN(pct <= last_pct, "percentage increased down mset");
514  last_pct = pct;
515  }
516 }
517 
518 // tests the percentage values returned
519 DEFINE_TESTCASE(topercent2, backend) {
520  Xapian::Enquire enquire(get_database("apitest_simpledata"));
521 
522  int pct;
523 
524  // First, test a search in which the top document scores 100%.
525  enquire.set_query(query("this"));
526  Xapian::MSet mymset = enquire.get_mset(0, 20);
527 
528  Xapian::MSetIterator i = mymset.begin();
529  TEST(i != mymset.end());
530  pct = mymset.convert_to_percent(i);
531  TEST_EQUAL(pct, 100);
532 
533  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
534  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
535  TEST_EQUAL(mymset.get_matches_estimated(), 6);
536  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 0.0553904060041786);
537  TEST_EQUAL(mymset.size(), 6);
538  mset_expect_order(mymset, 2, 1, 3, 5, 6, 4);
539 
540  // A search in which the top document doesn't have 100%
542  "this", "line", "paragraph", "rubbish");
543  enquire.set_query(q);
544  mymset = enquire.get_mset(0, 20);
545 
546  i = mymset.begin();
547  TEST(i != mymset.end());
548  pct = mymset.convert_to_percent(i);
549  TEST_REL(pct,>,60);
550  TEST_REL(pct,<,76);
551 
552  ++i;
553 
554  TEST(i != mymset.end());
555  pct = mymset.convert_to_percent(i);
556  TEST_REL(pct,>,40);
557  TEST_REL(pct,<,50);
558 
559  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
560  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
561  TEST_EQUAL(mymset.get_matches_estimated(), 6);
562  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 1.67412192414056);
563  TEST_EQUAL(mymset.size(), 6);
564  mset_expect_order(mymset, 3, 1, 4, 2, 5, 6);
565 }
566 
568  public:
569  bool operator()(const string & tname) const {
570  unsigned long sum = 0;
571  for (unsigned ch : tname) {
572  sum += ch;
573  }
574 // if (verbose) {
575 // tout << tname << "==> " << sum << "\n";
576 // }
577  return (sum % 2) == 0;
578  }
579 };
580 
581 // tests the expand decision functor
582 DEFINE_TESTCASE(expandfunctor1, backend) {
583  Xapian::Enquire enquire(get_database("apitest_simpledata"));
584  enquire.set_query(Xapian::Query("this"));
585 
586  Xapian::MSet mymset = enquire.get_mset(0, 10);
587  TEST(mymset.size() >= 2);
588 
589  Xapian::RSet myrset;
590  Xapian::MSetIterator i = mymset.begin();
591  myrset.add_document(*i);
592  myrset.add_document(*(++i));
593 
594  EvenParityExpandFunctor myfunctor;
595 
596  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
597  unsigned int neweset_size = 0;
598  Xapian::ESetIterator j = myeset_orig.begin();
599  for ( ; j != myeset_orig.end(); ++j) {
600  if (myfunctor(*j)) neweset_size++;
601  }
602  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
603 
604 #if 0
605  // Compare myeset with the hand-filtered version of myeset_orig.
606  if (verbose) {
607  tout << "orig_eset: ";
608  copy(myeset_orig.begin(), myeset_orig.end(),
609  ostream_iterator<Xapian::ESetItem>(tout, " "));
610  tout << "\n";
611 
612  tout << "new_eset: ";
613  copy(myeset.begin(), myeset.end(),
614  ostream_iterator<Xapian::ESetItem>(tout, " "));
615  tout << "\n";
616  }
617 #endif
618  Xapian::ESetIterator orig = myeset_orig.begin();
619  Xapian::ESetIterator filt = myeset.begin();
620  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
621  // skip over items that shouldn't be in myeset
622  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
623  ++orig;
624  }
625 
626  TEST_AND_EXPLAIN(*orig == *filt &&
627  orig.get_weight() == filt.get_weight(),
628  "Mismatch in items " << *orig << " vs. " << *filt
629  << " after filtering");
630  }
631 
632  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
633  ++orig;
634  }
635 
636  TEST_EQUAL(orig, myeset_orig.end());
637  TEST_AND_EXPLAIN(filt == myeset.end(),
638  "Extra items in the filtered eset.");
639 }
640 
641 DEFINE_TESTCASE(expanddeciderfilterprefix2, backend) {
642  Xapian::Enquire enquire(get_database("apitest_simpledata"));
643  enquire.set_query(Xapian::Query("this"));
644 
645  Xapian::MSet mymset = enquire.get_mset(0, 10);
646  TEST(mymset.size() >= 2);
647 
648  Xapian::RSet myrset;
649  Xapian::MSetIterator i = mymset.begin();
650  myrset.add_document(*i);
651  myrset.add_document(*(++i));
652 
653  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
654  unsigned int neweset_size = 0;
655 
656  // Choose the first char in the first term as prefix.
657  Xapian::ESetIterator j = myeset_orig.begin();
658  TEST(myeset_orig.size() >= 1);
659  string prefix(*j, 0, 1);
660  Xapian::ExpandDeciderFilterPrefix myfunctor(prefix);
661 
662  for ( ; j != myeset_orig.end(); ++j) {
663  if (myfunctor(*j)) neweset_size++;
664  }
665  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
666 
667  Xapian::ESetIterator orig = myeset_orig.begin();
668  Xapian::ESetIterator filt = myeset.begin();
669  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
670  // skip over items that shouldn't be in myeset
671  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
672  ++orig;
673  }
674 
675  TEST_AND_EXPLAIN(*orig == *filt &&
676  orig.get_weight() == filt.get_weight(),
677  "Mismatch in items " << *orig << " vs. " << *filt
678  << " after filtering");
679  }
680 
681  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
682  ++orig;
683  }
684 
685  TEST_EQUAL(orig, myeset_orig.end());
686  TEST_AND_EXPLAIN(filt == myeset.end(),
687  "Extra items in the filtered eset.");
688 }
689 
690 // tests the percent cutoff option
691 DEFINE_TESTCASE(pctcutoff1, backend) {
692  Xapian::Enquire enquire(get_database("apitest_simpledata"));
694  "this", "line", "paragraph", "rubbish"));
695  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
696 
697  if (verbose) {
698  tout << "Original mset pcts:";
699  print_mset_percentages(mymset1);
700  tout << "\n";
701  }
702 
703  unsigned int num_items = 0;
704  int my_pct = 100;
705  int changes = 0;
706  Xapian::MSetIterator i = mymset1.begin();
707  int c = 0;
708  for ( ; i != mymset1.end(); ++i, ++c) {
709  int new_pct = mymset1.convert_to_percent(i);
710  if (new_pct != my_pct) {
711  changes++;
712  if (changes > 3) break;
713  num_items = c;
714  my_pct = new_pct;
715  }
716  }
717 
718  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
719  if (verbose) {
720  tout << "Cutoff percent: " << my_pct << "\n";
721  }
722 
723  enquire.set_cutoff(my_pct);
724  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
725 
726  if (verbose) {
727  tout << "Percentages after cutoff:";
728  print_mset_percentages(mymset2);
729  tout << "\n";
730  }
731 
732  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
733  "Match with % cutoff lost too many items");
734 
735  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
736  (mymset2.convert_to_percent(mymset2[num_items]) == my_pct &&
737  mymset2.convert_to_percent(mymset2.back()) == my_pct),
738  "Match with % cutoff returned too many items");
739 }
740 
741 // Tests the percent cutoff option combined with collapsing
742 DEFINE_TESTCASE(pctcutoff2, backend) {
743  Xapian::Enquire enquire(get_database("apitest_simpledata"));
744  enquire.set_query(Xapian::Query("this"));
746  Xapian::MSet mset = enquire.get_mset(0, 100);
747 
748  if (verbose) {
749  tout << "Original mset pcts:";
751  tout << "\n";
752  }
753 
754  TEST(mset.size() >= 2);
755  TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
756 
757  int cutoff = mset[0].get_percent() + mset[1].get_percent();
758  cutoff /= 2;
759 
760  enquire.set_cutoff(cutoff);
761  enquire.set_collapse_key(1234); // Value which is always empty.
762 
763  Xapian::MSet mset2 = enquire.get_mset(0, 1);
764  TEST_EQUAL(mset2.size(), 1);
765  TEST_REL(mset2.get_matches_lower_bound(),>=,1);
767  mset2.get_matches_lower_bound());
772 }
773 
774 // Test that the percent cutoff option returns all the answers it should.
775 DEFINE_TESTCASE(pctcutoff3, backend) {
776  Xapian::Enquire enquire(get_database("apitest_simpledata"));
777  enquire.set_query(Xapian::Query("this"));
778  Xapian::MSet mset1 = enquire.get_mset(0, 10);
779 
780  if (verbose) {
781  tout << "Original mset pcts:";
782  print_mset_percentages(mset1);
783  tout << "\n";
784  }
785 
786  int percent = 100;
787  for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
788  int new_percent = mset1.convert_to_percent(i);
789  if (new_percent != percent) {
790  tout.str(string());
791  tout << "Testing " << percent << "% cutoff" << endl;
792  enquire.set_cutoff(percent);
793  Xapian::MSet mset2 = enquire.get_mset(0, 10);
794  TEST_EQUAL(mset2.back().get_percent(), percent);
795  TEST_EQUAL(mset2.size(), i.get_rank());
796  percent = new_percent;
797  }
798  }
799 }
800 
801 // tests the cutoff option
802 DEFINE_TESTCASE(cutoff1, backend) {
803  Xapian::Enquire enquire(get_database("apitest_simpledata"));
805  "this", "line", "paragraph", "rubbish"));
806  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
807 
808  if (verbose) {
809  tout << "Original mset weights:";
810  print_mset_weights(mymset1);
811  tout << "\n";
812  }
813 
814  unsigned int num_items = 0;
815  double my_wt = -100;
816  int changes = 0;
817  Xapian::MSetIterator i = mymset1.begin();
818  int c = 0;
819  for ( ; i != mymset1.end(); ++i, ++c) {
820  double new_wt = i.get_weight();
821  if (new_wt != my_wt) {
822  changes++;
823  if (changes > 3) break;
824  num_items = c;
825  my_wt = new_wt;
826  }
827  }
828 
829  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
830  if (verbose) {
831  tout << "Cutoff weight: " << my_wt << "\n";
832  }
833 
834  enquire.set_cutoff(0, my_wt);
835  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
836 
837  if (verbose) {
838  tout << "Weights after cutoff:";
839  print_mset_weights(mymset2);
840  tout << "\n";
841  }
842 
843  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
844  "Match with cutoff lost too many items");
845 
846  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
847  (mymset2[num_items].get_weight() == my_wt &&
848  mymset2.back().get_weight() == my_wt),
849  "Match with cutoff returned too many items");
850 }
851 
852 // tests the allow query terms expand option
853 DEFINE_TESTCASE(allowqterms1, backend) {
854  Xapian::Enquire enquire(get_database("apitest_simpledata"));
855  string term = "paragraph";
856  enquire.set_query(Xapian::Query(term));
857 
858  Xapian::MSet mymset = enquire.get_mset(0, 10);
859  TEST(mymset.size() >= 2);
860 
861  Xapian::RSet myrset;
862  Xapian::MSetIterator i = mymset.begin();
863  myrset.add_document(*i);
864  myrset.add_document(*(++i));
865 
866  Xapian::ESet myeset = enquire.get_eset(1000, myrset);
867  Xapian::ESetIterator j = myeset.begin();
868  for ( ; j != myeset.end(); ++j) {
869  TEST_NOT_EQUAL(*j, term);
870  }
871 
872  Xapian::ESet myeset2 = enquire.get_eset(1000, myrset, Xapian::Enquire::INCLUDE_QUERY_TERMS);
873  j = myeset2.begin();
874  for ( ; j != myeset2.end(); ++j) {
875  if (*j == term) break;
876  }
877  TEST(j != myeset2.end());
878 }
879 
880 // tests that the MSet max_attained works
881 DEFINE_TESTCASE(maxattain1, backend) {
882  Xapian::Enquire enquire(get_database("apitest_simpledata"));
883  enquire.set_query(query("this"));
884  Xapian::MSet mymset = enquire.get_mset(0, 100);
885 
886  double mymax = 0;
887  Xapian::MSetIterator i = mymset.begin();
888  for ( ; i != mymset.end(); ++i) {
889  if (i.get_weight() > mymax) mymax = i.get_weight();
890  }
891  TEST_EQUAL(mymax, mymset.get_max_attained());
892 }
893 
894 // tests a reversed boolean query
895 DEFINE_TESTCASE(reversebool1, backend) {
896  Xapian::Enquire enquire(get_database("apitest_simpledata"));
897  enquire.set_query(Xapian::Query("this"));
899 
900  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
901  TEST_AND_EXPLAIN(mymset1.size() > 1,
902  "Mset was too small to test properly");
903 
905  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
907  Xapian::MSet mymset3 = enquire.get_mset(0, 100);
908 
909  // mymset1 and mymset2 should be identical
910  TEST_EQUAL(mymset1.size(), mymset2.size());
911 
912  {
913  Xapian::MSetIterator i = mymset1.begin();
914  Xapian::MSetIterator j = mymset2.begin();
915  for ( ; i != mymset1.end(); ++i, j++) {
916  TEST(j != mymset2.end());
917  // if this fails, then setting match_sort_forward=true was not
918  // the same as the default.
919  TEST_EQUAL(*i, *j);
920  }
921  TEST(j == mymset2.end());
922  }
923 
924  // mymset1 and mymset3 should be same but reversed
925  TEST_EQUAL(mymset1.size(), mymset3.size());
926 
927  {
928  Xapian::MSetIterator i = mymset1.begin();
929  Xapian::MSetIterator j = mymset3.end();
930  for ( ; i != mymset1.end(); ++i) {
931  --j;
932  // if this fails, then setting match_sort_forward=false didn't
933  // reverse the results.
934  TEST_EQUAL(*i, *j);
935  }
936  }
937 }
938 
939 // tests a reversed boolean query, where the full mset isn't returned
940 DEFINE_TESTCASE(reversebool2, backend) {
941  Xapian::Enquire enquire(get_database("apitest_simpledata"));
942  enquire.set_query(Xapian::Query("this"));
944 
945  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
946 
947  TEST_AND_EXPLAIN(mymset1.size() > 1,
948  "Mset was too small to test properly");
949 
951  Xapian::doccount msize = mymset1.size() / 2;
952  Xapian::MSet mymset2 = enquire.get_mset(0, msize);
954  Xapian::MSet mymset3 = enquire.get_mset(0, msize);
955 
956  // mymset2 should be first msize items of mymset1
957  TEST_EQUAL(msize, mymset2.size());
958  {
959  Xapian::MSetIterator i = mymset1.begin();
960  Xapian::MSetIterator j = mymset2.begin();
961  for ( ; j != mymset2.end(); ++i, ++j) {
962  TEST(i != mymset1.end());
963  // if this fails, then setting match_sort_forward=true was not
964  // the same as the default.
965  TEST_EQUAL(*i, *j);
966  }
967  // mymset1 should be larger.
968  TEST(i != mymset1.end());
969  }
970 
971  // mymset3 should be last msize items of mymset1, in reverse order
972  TEST_EQUAL(msize, mymset3.size());
973  {
974  Xapian::MSetIterator i = mymset1.end();
976  for (j = mymset3.begin(); j != mymset3.end(); ++j) {
977  // if this fails, then setting match_sort_forward=false didn't
978  // reverse the results.
979  --i;
980  TEST_EQUAL(*i, *j);
981  }
982  }
983 }
984 
985 // tests that get_matching_terms() returns the terms in the right order
986 DEFINE_TESTCASE(getmterms1, backend) {
987  list<string> answers_list;
988  answers_list.push_back("one");
989  answers_list.push_back("two");
990  answers_list.push_back("three");
991  answers_list.push_back("four");
992 
993  Xapian::Database mydb(get_database("apitest_termorder"));
994  Xapian::Enquire enquire(mydb);
995 
998  Xapian::Query("one", 1, 1),
999  Xapian::Query("three", 1, 3)),
1001  Xapian::Query("four", 1, 4),
1002  Xapian::Query("two", 1, 2)));
1003 
1004  enquire.set_query(myquery);
1005 
1006  Xapian::MSet mymset = enquire.get_mset(0, 10);
1007 
1008  TEST_MSET_SIZE(mymset, 1);
1009  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1010  enquire.get_matching_terms_end(mymset.begin()));
1011  TEST(list == answers_list);
1012 }
1013 
1014 // tests that get_matching_terms() returns the terms only once
1015 DEFINE_TESTCASE(getmterms2, backend) {
1016  list<string> answers_list;
1017  answers_list.push_back("one");
1018  answers_list.push_back("two");
1019  answers_list.push_back("three");
1020 
1021  Xapian::Database mydb(get_database("apitest_termorder"));
1022  Xapian::Enquire enquire(mydb);
1023 
1026  Xapian::Query("one", 1, 1),
1027  Xapian::Query("three", 1, 3)),
1029  Xapian::Query("one", 1, 4),
1030  Xapian::Query("two", 1, 2)));
1031 
1032  enquire.set_query(myquery);
1033 
1034  Xapian::MSet mymset = enquire.get_mset(0, 10);
1035 
1036  TEST_MSET_SIZE(mymset, 1);
1037  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1038  enquire.get_matching_terms_end(mymset.begin()));
1039  TEST(list == answers_list);
1040 }
1041 
1042 // test that running a query twice returns the same results
1043 DEFINE_TESTCASE(repeatquery1, backend) {
1044  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1045  enquire.set_query(Xapian::Query("this"));
1046 
1047  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1048 
1049  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1050  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1051  TEST_EQUAL(mymset1, mymset2);
1052 }
1053 
1054 // test that prefetching documents works (at least, gives same results)
1055 DEFINE_TESTCASE(fetchdocs1, backend) {
1056  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1057  enquire.set_query(Xapian::Query("this"));
1058 
1059  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1060 
1061  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1062  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1063  TEST_EQUAL(mymset1, mymset2);
1064  mymset2.fetch(mymset2[0], mymset2[mymset2.size() - 1]);
1065  mymset2.fetch(mymset2.begin(), mymset2.end());
1066  mymset2.fetch(mymset2.begin());
1067  mymset2.fetch();
1068 
1069  Xapian::MSetIterator it1 = mymset1.begin();
1070  Xapian::MSetIterator it2 = mymset2.begin();
1071 
1072  while (it1 != mymset1.end() && it2 != mymset2.end()) {
1074  it2.get_document().get_data());
1075  TEST_NOT_EQUAL(it1.get_document().get_data(), "");
1076  TEST_NOT_EQUAL(it2.get_document().get_data(), "");
1077  it1++;
1078  it2++;
1079  }
1080  TEST_EQUAL(it1, mymset1.end());
1081  TEST_EQUAL(it1, mymset2.end());
1082 }
1083 
1084 // test that searching for a term not in the database fails nicely
1085 DEFINE_TESTCASE(absentterm1, backend) {
1086  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1088  enquire.set_query(Xapian::Query("frink"));
1089 
1090  Xapian::MSet mymset = enquire.get_mset(0, 10);
1091  mset_expect_order(mymset);
1092 }
1093 
1094 // as absentterm1, but setting query from a vector of terms
1095 DEFINE_TESTCASE(absentterm2, backend) {
1096  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1097  vector<string> terms;
1098  terms.push_back("frink");
1099 
1100  Xapian::Query query(Xapian::Query::OP_OR, terms.begin(), terms.end());
1101  enquire.set_query(query);
1102 
1103  Xapian::MSet mymset = enquire.get_mset(0, 10);
1104  mset_expect_order(mymset);
1105 }
1106 
1107 // test that rsets do sensible things
1108 DEFINE_TESTCASE(rset1, backend) {
1109  Xapian::Database mydb(get_database("apitest_rset"));
1110  Xapian::Enquire enquire(mydb);
1111  Xapian::Query myquery = query(Xapian::Query::OP_OR, "giraffe", "tiger");
1112  enquire.set_query(myquery);
1113 
1114  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1115 
1116  Xapian::RSet myrset;
1117  myrset.add_document(1);
1118 
1119  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1120 
1121  // We should have the same documents turn up, but 1 and 3 should
1122  // have higher weights with the RSet.
1123  TEST_MSET_SIZE(mymset1, 3);
1124  TEST_MSET_SIZE(mymset2, 3);
1125 }
1126 
1127 // test that rsets do more sensible things
1128 DEFINE_TESTCASE(rset2, backend) {
1129  Xapian::Database mydb(get_database("apitest_rset"));
1130  Xapian::Enquire enquire(mydb);
1131  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
1132  enquire.set_query(myquery);
1133 
1134  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1135 
1136  Xapian::RSet myrset;
1137  myrset.add_document(2);
1138 
1139  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1140 
1141  mset_expect_order(mymset1, 1, 2);
1142  mset_expect_order(mymset2, 2, 1);
1143 }
1144 
1145 // test that rsets behave correctly with multiDBs
1146 DEFINE_TESTCASE(rsetmultidb1, backend && !multi) {
1147  Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
1148  Xapian::Database mydb2(get_database("apitest_rset"));
1149  mydb2.add_database(get_database("apitest_simpledata2"));
1150 
1151  Xapian::Enquire enquire1(mydb1);
1152  Xapian::Enquire enquire2(mydb2);
1153 
1154  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "multiple");
1155 
1156  enquire1.set_query(myquery);
1157  enquire2.set_query(myquery);
1158 
1159  Xapian::RSet myrset1;
1160  Xapian::RSet myrset2;
1161  myrset1.add_document(4);
1162  myrset2.add_document(2);
1163 
1164  Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
1165  Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
1166  Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
1167  Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
1168 
1169  mset_expect_order(mymset1a, 1, 4);
1170  mset_expect_order(mymset1b, 4, 1);
1171  mset_expect_order(mymset2a, 1, 2);
1172  mset_expect_order(mymset2b, 2, 1);
1173 
1174  TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
1175  TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
1176  TEST_NOT_EQUAL(mymset1a, mymset1b);
1177  TEST_NOT_EQUAL(mymset2a, mymset2b);
1178 }
1179 
1180 // regression tests - used to cause assertion in stats.h to fail
1181 // Doesn't actually fail for multi but it doesn't make sense to run there.
1182 DEFINE_TESTCASE(rsetmultidb3, backend && !multi) {
1183  Xapian::Enquire enquire(get_database("apitest_simpledata2"));
1184  enquire.set_query(query(Xapian::Query::OP_OR, "cuddly", "people"));
1185  Xapian::MSet mset = enquire.get_mset(0, 10); // used to fail assertion
1186 }
1187 
1189 DEFINE_TESTCASE(eliteset1, backend && !multi) {
1190  Xapian::Database mydb(get_database("apitest_simpledata"));
1191  Xapian::Enquire enquire(mydb);
1192 
1193  Xapian::Query myquery1 = query(Xapian::Query::OP_OR, "word");
1194 
1196  "simple", "word");
1197 
1198  enquire.set_query(myquery1, 2); // So the query lengths are the same.
1199  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1200 
1201  enquire.set_query(myquery2);
1202  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1203 
1204  TEST_EQUAL(mymset1, mymset2);
1205 }
1206 
1208 DEFINE_TESTCASE(elitesetmulti1, multi) {
1209  Xapian::Database mydb(get_database("apitest_simpledata"));
1210  Xapian::Enquire enquire(mydb);
1211 
1213  "simple", "word");
1214 
1215  enquire.set_query(myquery2);
1216  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1217 
1218  // For a sharded database, the elite set is resolved per shard and can
1219  // select different terms because the max term weights vary with the
1220  // per-shard term statistics. I can't see a feasible way to create
1221  // an equivalent MSet to compare with so for now at least we hard-code
1222  // the expected values.
1223  TEST_EQUAL(mymset2.size(), 3);
1224  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1225  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1226  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1227  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.1736756775723788948);
1228  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.0464816871772451012);
1229  mset_expect_order(mymset2, 2, 4, 5);
1230  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.0464816871772451012);
1231  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.64098768659591376373);
1232  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.46338869498075929698);
1233 }
1234 
1237 DEFINE_TESTCASE(eliteset2, backend && !multi) {
1238  Xapian::Database mydb(get_database("apitest_simpledata"));
1239  Xapian::Enquire enquire(mydb);
1240 
1241  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1242 
1243  vector<Xapian::Query> qs;
1244  qs.push_back(query("this"));
1245  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1247  qs.begin(), qs.end(), 1);
1248 
1249  enquire.set_query(myquery1);
1250  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1251 
1252  enquire.set_query(myquery2);
1253  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1254 
1255  TEST_EQUAL(mymset1, mymset2);
1256 }
1257 
1259 DEFINE_TESTCASE(elitesetmulti2, multi) {
1260  Xapian::Database mydb(get_database("apitest_simpledata"));
1261  Xapian::Enquire enquire(mydb);
1262 
1263  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1264 
1265  vector<Xapian::Query> qs;
1266  qs.push_back(query("this"));
1267  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1269  qs.begin(), qs.end(), 1);
1270 
1271  enquire.set_query(myquery2);
1272  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1273 
1274  // For a sharded database, the elite set is resolved per shard and can
1275  // select different terms because the max term weights vary with the
1276  // per-shard term statistics. I can't see a feasible way to create
1277  // an equivalent MSet to compare with so for now at least we hard-code
1278  // the expected values.
1279  TEST_EQUAL(mymset2.size(), 4);
1280  TEST_EQUAL(mymset2.get_matches_lower_bound(), 4);
1281  TEST_EQUAL(mymset2.get_matches_estimated(), 4);
1282  TEST_EQUAL(mymset2.get_matches_upper_bound(), 4);
1283  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 2.6585705165783908299);
1284  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.9700834242150864206);
1285  mset_expect_order(mymset2, 2, 1, 3, 5);
1286  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.9700834242150864206);
1287  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.051103097360122341775);
1288  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.043131803408968119595);
1289  TEST_EQUAL_DOUBLE(mymset2[3].get_weight(), 0.043131803408968119595);
1290 }
1291 
1292 
1295 DEFINE_TESTCASE(eliteset3, backend) {
1296  Xapian::Database mydb1(get_database("apitest_simpledata"));
1297  Xapian::Enquire enquire1(mydb1);
1298 
1299  Xapian::Database mydb2(get_database("apitest_simpledata"));
1300  Xapian::Enquire enquire2(mydb2);
1301 
1302  // make a query
1303  Xapian::Stem stemmer("english");
1304 
1305  string term1 = stemmer("word");
1306  string term2 = stemmer("rubbish");
1307  string term3 = stemmer("banana");
1308 
1309  vector<string> terms;
1310  terms.push_back(term1);
1311  terms.push_back(term2);
1312  terms.push_back(term3);
1313 
1314  Xapian::Query myquery1(Xapian::Query::OP_OR, terms.begin(), terms.end());
1315  enquire1.set_query(myquery1);
1316 
1317  Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET, terms.begin(), terms.end(), 3);
1318  enquire2.set_query(myquery2);
1319 
1320  // retrieve the results
1321  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1322  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1323 
1324  TEST_EQUAL(mymset1, mymset2);
1325 
1326  TEST_EQUAL(mymset1.get_termfreq(term1),
1327  mymset2.get_termfreq(term1));
1328  TEST_EQUAL(mymset1.get_termweight(term1),
1329  mymset2.get_termweight(term1));
1330  TEST_EQUAL(mymset1.get_termfreq(term2),
1331  mymset2.get_termfreq(term2));
1332  TEST_EQUAL(mymset1.get_termweight(term2),
1333  mymset2.get_termweight(term2));
1334  TEST_EQUAL(mymset1.get_termfreq(term3),
1335  mymset2.get_termfreq(term3));
1336  TEST_EQUAL(mymset1.get_termweight(term3),
1337  mymset2.get_termweight(term3));
1338 }
1339 
1341 DEFINE_TESTCASE(eliteset4, backend && !multi) {
1342  Xapian::Database mydb1(get_database("apitest_simpledata"));
1343  Xapian::Enquire enquire1(mydb1);
1344 
1345  Xapian::Database mydb2(get_database("apitest_simpledata"));
1346  Xapian::Enquire enquire2(mydb2);
1347 
1348  Xapian::Query myquery1 = query("rubbish");
1350  "word", "rubbish", "fibble");
1351  enquire1.set_query(myquery1);
1352  enquire2.set_query(myquery2);
1353 
1354  // retrieve the results
1355  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1356  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1357 
1358  TEST_NOT_EQUAL(mymset2.size(), 0);
1359  TEST_EQUAL(mymset1, mymset2);
1360 }
1361 
1363 DEFINE_TESTCASE(elitesetmulti4, multi) {
1364  Xapian::Database mydb2(get_database("apitest_simpledata"));
1365  Xapian::Enquire enquire2(mydb2);
1366 
1368  "word", "rubbish", "fibble");
1369  enquire2.set_query(myquery2);
1370 
1371  // retrieve the results
1372  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1373 
1374  // For a sharded database, the elite set is resolved per shard and can
1375  // select different terms because the max term weights vary with the
1376  // per-shard term statistics. I can't see a feasible way to create
1377  // an equivalent MSet to compare with so for now at least we hard-code
1378  // the expected values.
1379  TEST_EQUAL(mymset2.size(), 3);
1380  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1381  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1382  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1383  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.4848948390060121572);
1384  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.4848948390060121572);
1385  mset_expect_order(mymset2, 3, 2, 4);
1386  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.4848948390060121572);
1387  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 1.0464816871772451012);
1388  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.64098768659591376373);
1389 }
1390 
1392 DEFINE_TESTCASE(eliteset5, backend) {
1393  Xapian::Database mydb1(get_database("apitest_simpledata"));
1394  Xapian::Enquire enquire1(mydb1);
1395 
1396  vector<string> v;
1397  for (int i = 0; i != 3; ++i) {
1398  v.push_back("simpl");
1399  v.push_back("queri");
1400 
1401  v.push_back("rubbish");
1402  v.push_back("rubbish");
1403  v.push_back("rubbish");
1404  v.push_back("word");
1405  v.push_back("word");
1406  v.push_back("word");
1407  }
1408 
1409  for (Xapian::termcount n = 1; n != v.size(); ++n) {
1411  v.begin(), v.end(), n);
1413  myquery1,
1414  0.004);
1415 
1416  enquire1.set_query(myquery1);
1417  // On architectures with excess precision (or, at least, on x86), the
1418  // following call used to result in a segfault (at least when n=1).
1419  enquire1.get_mset(0, 10);
1420  }
1421 }
1422 
1424 DEFINE_TESTCASE(termlisttermfreq1, backend) {
1425  Xapian::Database mydb(get_database("apitest_simpledata"));
1426  Xapian::Enquire enquire(mydb);
1427  Xapian::Stem stemmer("english");
1428  Xapian::RSet rset1;
1429  Xapian::RSet rset2;
1430  rset1.add_document(5);
1431  rset2.add_document(6);
1432 
1433  Xapian::ESet eset1 = enquire.get_eset(1000, rset1);
1434  Xapian::ESet eset2 = enquire.get_eset(1000, rset2);
1435 
1436  // search for weight of term 'another'
1437  string theterm = stemmer("another");
1438 
1439  double wt1 = 0;
1440  double wt2 = 0;
1441  {
1442  Xapian::ESetIterator i = eset1.begin();
1443  for ( ; i != eset1.end(); ++i) {
1444  if (*i == theterm) {
1445  wt1 = i.get_weight();
1446  break;
1447  }
1448  }
1449  }
1450  {
1451  Xapian::ESetIterator i = eset2.begin();
1452  for ( ; i != eset2.end(); ++i) {
1453  if (*i == theterm) {
1454  wt2 = i.get_weight();
1455  break;
1456  }
1457  }
1458  }
1459 
1460  TEST_NOT_EQUAL(wt1, 0);
1461  TEST_NOT_EQUAL(wt2, 0);
1462  TEST_EQUAL(wt1, wt2);
1463 }
1464 
1466 DEFINE_TESTCASE(qterminfo1, backend) {
1467  Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
1468  Xapian::Enquire enquire1(mydb1);
1469 
1470  Xapian::Database mydb2(get_database("apitest_simpledata"));
1471  mydb2.add_database(get_database("apitest_simpledata2"));
1472  Xapian::Enquire enquire2(mydb2);
1473 
1474  // make a query
1475  Xapian::Stem stemmer("english");
1476 
1477  string term1 = stemmer("word");
1478  string term2 = stemmer("inmemory");
1479  string term3 = stemmer("flibble");
1480 
1482  Xapian::Query(term1),
1484  Xapian::Query(term2),
1485  Xapian::Query(term3)));
1486  enquire1.set_query(myquery);
1487  enquire2.set_query(myquery);
1488 
1489  // retrieve the results
1490  Xapian::MSet mymset1a = enquire1.get_mset(0, 0);
1491  Xapian::MSet mymset2a = enquire2.get_mset(0, 0);
1492 
1493  TEST_EQUAL(mymset1a.get_termfreq(term1),
1494  mymset2a.get_termfreq(term1));
1495  TEST_EQUAL(mymset1a.get_termfreq(term2),
1496  mymset2a.get_termfreq(term2));
1497  TEST_EQUAL(mymset1a.get_termfreq(term3),
1498  mymset2a.get_termfreq(term3));
1499 
1500  TEST_EQUAL(mymset1a.get_termfreq(term1), 3);
1501  TEST_EQUAL(mymset1a.get_termfreq(term2), 1);
1502  TEST_EQUAL(mymset1a.get_termfreq(term3), 0);
1503 
1504  TEST_NOT_EQUAL(mymset1a.get_termweight(term1), 0);
1505  TEST_NOT_EQUAL(mymset1a.get_termweight(term2), 0);
1506  // non-existent terms should have 0 weight.
1507  TEST_EQUAL(mymset1a.get_termweight(term3), 0);
1508 
1509  TEST_EQUAL(mymset1a.get_termfreq(stemmer("banana")), 1);
1511  mymset1a.get_termweight(stemmer("banana")));
1512 
1513  TEST_EQUAL(mymset1a.get_termfreq("sponge"), 0);
1515  mymset1a.get_termweight("sponge"));
1516 }
1517 
1519 DEFINE_TESTCASE(qterminfo2, backend) {
1520  Xapian::Database db(get_database("apitest_simpledata"));
1521  Xapian::Enquire enquire(db);
1522 
1523  // make a query
1524  Xapian::Stem stemmer("english");
1525 
1526  string term1 = stemmer("paragraph");
1527  string term2 = stemmer("another");
1528 
1529  enquire.set_query(Xapian::Query(term1));
1530  Xapian::MSet mset0 = enquire.get_mset(0, 10);
1531 
1532  TEST_NOT_EQUAL(mset0.get_termweight("paragraph"), 0);
1533 
1535  Xapian::Query(Xapian::Query::OP_AND, term1, term2));
1536  enquire.set_query(query);
1537 
1538  // retrieve the results
1539  // Note: get_mset() used to throw "AssertionError" in debug builds
1540  Xapian::MSet mset = enquire.get_mset(0, 10);
1541 
1542  TEST_NOT_EQUAL(mset.get_termweight("paragraph"), 0);
1543 }
1544 
1545 // tests that when specifying that no items are to be returned, those
1546 // statistics which should be the same are.
1547 DEFINE_TESTCASE(msetzeroitems1, backend) {
1548  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1549  enquire.set_query(query("this"));
1550  Xapian::MSet mymset1 = enquire.get_mset(0, 0);
1551 
1552  Xapian::MSet mymset2 = enquire.get_mset(0, 1);
1553 
1554  TEST_EQUAL(mymset1.get_max_possible(), mymset2.get_max_possible());
1555 }
1556 
1557 // test that the matches_* of a simple query are as expected
1558 DEFINE_TESTCASE(matches1, backend) {
1559  bool multi = startswith(get_dbtype(), "multi");
1560  bool remote = get_dbtype().find("remote") != string::npos;
1561 
1562  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1563  Xapian::Query myquery;
1564  Xapian::MSet mymset;
1565 
1566  myquery = query("word");
1567  enquire.set_query(myquery);
1568  mymset = enquire.get_mset(0, 10);
1569  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1570  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1571  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1575 
1576  myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
1577  enquire.set_query(myquery);
1578  mymset = enquire.get_mset(0, 10);
1579  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1580  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1581  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1585 
1586  myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
1587  enquire.set_query(myquery);
1588  mymset = enquire.get_mset(0, 10);
1589  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1590  TEST_EQUAL(mymset.get_matches_estimated(), 0);
1591  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
1595 
1596  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1597  enquire.set_query(myquery);
1598  mymset = enquire.get_mset(0, 10);
1599  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1600  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1601  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1605 
1606  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1607  enquire.set_query(myquery);
1608  mymset = enquire.get_mset(0, 0);
1609  if (!multi) {
1610  // This isn't true for sharded DBs since there one sub-database has 3
1611  // documents and simple and word both have termfreq of 2, so the
1612  // matcher can tell at least one document must match!)
1613  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1614  }
1616  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1617  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1621 
1622  mymset = enquire.get_mset(0, 1);
1623  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1624  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1625  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1626  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1627  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1628  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1629 
1630  mymset = enquire.get_mset(0, 2);
1631  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1632  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1633  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1634  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1635  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1636  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1637 
1638  myquery = query(Xapian::Query::OP_AND, "paragraph", "another");
1639  enquire.set_query(myquery);
1640  mymset = enquire.get_mset(0, 0);
1641  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1642  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1643  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1644  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1645  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1646  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1647 
1648  mymset = enquire.get_mset(0, 1);
1649  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1650  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1651  if (multi && remote) {
1652  // The matcher can tell there's only one match in this case.
1653  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1654  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1655  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1656  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1657  } else {
1658  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1659  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1660  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1661  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1662  }
1663 
1664  mymset = enquire.get_mset(0, 2);
1665  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1666  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1667  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1668  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1669  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1670  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1671 
1672  mymset = enquire.get_mset(1, 20);
1673  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1674  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1675  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1676  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1677  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1678  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1679 }
1680 
1681 // tests that wqf affects the document weights
1682 DEFINE_TESTCASE(wqf1, backend) {
1683  // Both queries have length 2; in q1 word has wqf=2, in q2 word has wqf=1
1684  Xapian::Query q1("word", 2);
1685  Xapian::Query q2("word");
1686  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1687  enquire.set_query(q1);
1688  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1689  enquire.set_query(q2);
1690  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1691  // Check the weights
1692  TEST(mset1.begin().get_weight() > mset2.begin().get_weight());
1693 }
1694 
1695 // tests that query length affects the document weights
1696 DEFINE_TESTCASE(qlen1, backend) {
1697  Xapian::Query q1("word");
1698  Xapian::Query q2("word");
1699  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1700  enquire.set_query(q1);
1701  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1702  enquire.set_query(q2);
1703  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1704  // Check the weights
1705  // TEST(mset1.begin().get_weight() < mset2.begin().get_weight());
1706  TEST(mset1.begin().get_weight() == mset2.begin().get_weight());
1707 }
1708 
1709 // tests that opening a non-existent termlist throws the correct exception
1710 DEFINE_TESTCASE(termlist1, backend) {
1711  Xapian::Database db(get_database("apitest_onedoc"));
1716  /* Cause the database to be used properly, showing up problems
1717  * with the link being in a bad state. CME */
1718  Xapian::TermIterator temp = db.termlist_begin(1);
1720  Xapian::TermIterator t = db.termlist_begin(999999999));
1721 }
1722 
1723 // tests that a Xapian::TermIterator works as an STL iterator
1724 DEFINE_TESTCASE(termlist2, backend) {
1725  Xapian::Database db(get_database("apitest_onedoc"));
1727  Xapian::TermIterator tend = db.termlist_end(1);
1728 
1729  // test operator= creates a copy which compares equal
1730  Xapian::TermIterator t_copy = t;
1731  TEST_EQUAL(t, t_copy);
1732 
1733  // test copy constructor creates a copy which compares equal
1734  Xapian::TermIterator t_clone(t);
1735  TEST_EQUAL(t, t_clone);
1736 
1737  vector<string> v(t, tend);
1738 
1739  t = db.termlist_begin(1);
1740  tend = db.termlist_end(1);
1741  vector<string>::const_iterator i;
1742  for (i = v.begin(); i != v.end(); ++i) {
1743  TEST_NOT_EQUAL(t, tend);
1744  TEST_EQUAL(*i, *t);
1745  t++;
1746  }
1747  TEST_EQUAL(t, tend);
1748 }
1749 
1750 static Xapian::TermIterator
1752 {
1753  Xapian::Database db(get_database("apitest_onedoc"));
1754  return db.termlist_begin(1);
1755 }
1756 
1757 // tests that a Xapian::TermIterator still works when the DB is deleted
1758 DEFINE_TESTCASE(termlist3, backend) {
1760  Xapian::Database db(get_database("apitest_onedoc"));
1762  Xapian::TermIterator tend = db.termlist_end(1);
1763 
1764  while (t != tend) {
1765  TEST_EQUAL(*t, *u);
1766  t++;
1767  u++;
1768  }
1769 }
1770 
1771 // tests skip_to
1772 DEFINE_TESTCASE(termlist4, backend) {
1773  Xapian::Database db(get_database("apitest_onedoc"));
1775  i.skip_to("");
1776  i.skip_to("\xff");
1777 }
1778 
1779 // tests punctuation is OK in terms (particularly in remote queries)
1780 DEFINE_TESTCASE(puncterms1, backend) {
1781  Xapian::Database db(get_database("apitest_punc"));
1782  Xapian::Enquire enquire(db);
1783 
1784  Xapian::Query q1("semi;colon");
1785  enquire.set_query(q1);
1786  Xapian::MSet m1 = enquire.get_mset(0, 10);
1787 
1788  Xapian::Query q2("col:on");
1789  enquire.set_query(q2);
1790  Xapian::MSet m2 = enquire.get_mset(0, 10);
1791 
1792  Xapian::Query q3("com,ma");
1793  enquire.set_query(q3);
1794  Xapian::MSet m3 = enquire.get_mset(0, 10);
1795 }
1796 
1797 // test that searching for a term with a space or backslash in it works
1798 DEFINE_TESTCASE(spaceterms1, backend) {
1799  Xapian::Enquire enquire(get_database("apitest_space"));
1800  Xapian::MSet mymset;
1801  Xapian::doccount count;
1803  Xapian::Stem stemmer("english");
1804 
1805  enquire.set_query(stemmer("space man"));
1806  mymset = enquire.get_mset(0, 10);
1807  TEST_MSET_SIZE(mymset, 1);
1808  count = 0;
1809  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1810  TEST_EQUAL(count, 1);
1811 
1812  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
1813  TEST_NOT_EQUAL(mymset.begin().get_document().get_data(), "");
1814  TEST_NOT_EQUAL(mymset.begin().get_document().get_value(value_no), "");
1815  }
1816 
1817  enquire.set_query(stemmer("tab\tby"));
1818  mymset = enquire.get_mset(0, 10);
1819  TEST_MSET_SIZE(mymset, 1);
1820  count = 0;
1821  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1822  TEST_EQUAL(count, 1);
1823 
1824  for (Xapian::valueno value_no = 0; value_no < 7; ++value_no) {
1825  string value = mymset.begin().get_document().get_value(value_no);
1826  TEST_NOT_EQUAL(value, "");
1827  if (value_no == 0) {
1828  TEST(value.size() > 262);
1829  TEST_EQUAL(static_cast<unsigned char>(value[262]), 255);
1830  }
1831  }
1832 
1833  enquire.set_query(stemmer("back\\slash"));
1834  mymset = enquire.get_mset(0, 10);
1835  TEST_MSET_SIZE(mymset, 1);
1836  count = 0;
1837  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1838  TEST_EQUAL(count, 1);
1839 }
1840 
1841 // test that XOR queries work
1842 DEFINE_TESTCASE(xor1, backend) {
1843  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1844  Xapian::Stem stemmer("english");
1845 
1846  vector<string> terms;
1847  terms.push_back(stemmer("this"));
1848  terms.push_back(stemmer("word"));
1849  terms.push_back(stemmer("of"));
1850 
1851  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1853  enquire.set_query(query);
1854 
1855  Xapian::MSet mymset = enquire.get_mset(0, 10);
1856  // Docid this word of Match?
1857  // 1 * *
1858  // 2 * * * *
1859  // 3 * *
1860  // 4 * *
1861  // 5 * *
1862  // 6 * *
1863  mset_expect_order(mymset, 1, 2, 5, 6);
1864 }
1865 
1867 DEFINE_TESTCASE(xor2, backend) {
1868  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1869  Xapian::Stem stemmer("english");
1870 
1871  vector<string> terms;
1872  terms.push_back(stemmer("this"));
1873  terms.push_back(stemmer("word"));
1874  terms.push_back(stemmer("of"));
1875 
1876  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1877  enquire.set_query(query);
1878 
1879  Xapian::MSet mymset = enquire.get_mset(0, 10);
1880  // Docid LEN this word of Match?
1881  // 1 28 2 *
1882  // 2 81 5 8 1 *
1883  // 3 15 1 2
1884  // 4 31 1 1
1885  // 5 15 1 *
1886  // 6 15 1 *
1887  mset_expect_order(mymset, 2, 1, 5, 6);
1888 }
1889 
1890 // test Xapian::Database::get_document()
1891 DEFINE_TESTCASE(getdoc1, backend) {
1892  Xapian::Database db(get_database("apitest_onedoc"));
1893  Xapian::Document doc(db.get_document(1));
1899  // Check that Document works as a handle on modification
1900  // (this was broken for the first try at Xapian::Document prior to 0.7).
1901  Xapian::Document doc2 = doc;
1902  doc.set_data("modified!");
1903  TEST_EQUAL(doc.get_data(), "modified!");
1904  TEST_EQUAL(doc.get_data(), doc2.get_data());
1905 }
1906 
1907 // test whether operators with no elements work as a null query
1908 DEFINE_TESTCASE(emptyop1, backend) {
1909  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1910  vector<Xapian::Query> nullvec;
1911 
1912  Xapian::Query query1(Xapian::Query::OP_XOR, nullvec.begin(), nullvec.end());
1913 
1914  enquire.set_query(query1);
1915  Xapian::MSet mymset = enquire.get_mset(0, 10);
1916  TEST_MSET_SIZE(mymset, 0);
1917  // In Xapian < 1.3.0, this gave InvalidArgumentError (because
1918  // query1.empty()) but elsewhere we treat an empty query as just not
1919  // matching any documents, so we now do the same here too.
1921  enquire.get_matching_terms_end(1));
1922 }
1923 
1924 // Regression test for check_at_least SEGV when there are no matches.
1925 DEFINE_TESTCASE(checkatleast1, backend) {
1926  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1927  enquire.set_query(Xapian::Query("thom"));
1928  Xapian::MSet mymset = enquire.get_mset(0, 10, 11);
1929  TEST_EQUAL(0, mymset.size());
1930 }
1931 
1932 // Regression test - if check_at_least was set we returned (check_at_least - 1)
1933 // results, rather than the requested msize. Fixed in 1.0.2.
1934 DEFINE_TESTCASE(checkatleast2, backend) {
1935  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1936  enquire.set_query(Xapian::Query("paragraph"));
1937 
1938  Xapian::MSet mymset = enquire.get_mset(0, 3, 10);
1939  TEST_MSET_SIZE(mymset, 3);
1940  TEST_EQUAL(mymset.get_matches_lower_bound(), 5);
1942 
1943  mymset = enquire.get_mset(0, 2, 4);
1944  TEST_MSET_SIZE(mymset, 2);
1945  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1946  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1947  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1948  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1949 }
1950 
1951 // Feature tests - check_at_least with various sorting options.
1952 DEFINE_TESTCASE(checkatleast3, backend) {
1953  Xapian::Enquire enquire(get_database("etext"));
1954  enquire.set_query(Xapian::Query("prussian")); // 60 matches.
1955 
1956  for (int order = 0; order < 3; ++order) {
1957  switch (order) {
1958  case 0:
1960  break;
1961  case 1:
1963  break;
1964  case 2:
1966  break;
1967  }
1968 
1969  for (int sort = 0; sort < 7; ++sort) {
1970  bool reverse = (sort & 1);
1971  switch (sort) {
1972  case 0:
1973  enquire.set_sort_by_relevance();
1974  break;
1975  case 1: case 2:
1976  enquire.set_sort_by_value(0, reverse);
1977  break;
1978  case 3: case 4:
1979  enquire.set_sort_by_value_then_relevance(0, reverse);
1980  break;
1981  case 5: case 6:
1982  enquire.set_sort_by_relevance_then_value(0, reverse);
1983  break;
1984  }
1985 
1986  Xapian::MSet mset = enquire.get_mset(0, 100, 500);
1987  TEST_MSET_SIZE(mset, 60);
1988  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1989  TEST_EQUAL(mset.get_matches_estimated(), 60);
1990  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1994 
1995  mset = enquire.get_mset(0, 50, 100);
1996  TEST_MSET_SIZE(mset, 50);
1997  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1998  TEST_EQUAL(mset.get_matches_estimated(), 60);
1999  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
2000  TEST_EQUAL(mset.get_uncollapsed_matches_lower_bound(), 60);
2001  TEST_EQUAL(mset.get_uncollapsed_matches_estimated(), 60);
2002  TEST_EQUAL(mset.get_uncollapsed_matches_upper_bound(), 60);
2003 
2004  mset = enquire.get_mset(0, 10, 50);
2005  TEST_MSET_SIZE(mset, 10);
2006  TEST_REL(mset.get_matches_lower_bound(),>=,50);
2007  TEST_REL(mset.get_uncollapsed_matches_lower_bound(),>=,50);
2008  }
2009  }
2010 }
2011 
2012 // tests all document postlists
2013 DEFINE_TESTCASE(allpostlist1, backend) {
2014  Xapian::Database db(get_database("apitest_manydocs"));
2016  unsigned int j = 1;
2017  while (i != db.postlist_end("")) {
2018  TEST_EQUAL(*i, j);
2019  i++;
2020  j++;
2021  }
2022  TEST_EQUAL(j, 513);
2023 
2024  i = db.postlist_begin("");
2025  j = 1;
2026  while (i != db.postlist_end("")) {
2027  TEST_EQUAL(*i, j);
2028  i++;
2029  j++;
2030  if (j == 50) {
2031  j += 10;
2032  i.skip_to(j);
2033  }
2034  }
2035  TEST_EQUAL(j, 513);
2036 }
2037 
2039 {
2040  // Don't bother with postlist_begin() because allpostlist tests cover that.
2042  TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
2043  TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
2045 }
2046 
2047 // tests results of passing an empty term to various methods
2048 DEFINE_TESTCASE(emptyterm1, backend) {
2049  Xapian::Database db(get_database("apitest_manydocs"));
2050  TEST_EQUAL(db.get_doccount(), 512);
2052 
2053  db = get_database("apitest_onedoc");
2054  TEST_EQUAL(db.get_doccount(), 1);
2056 
2057  db = get_database("");
2058  TEST_EQUAL(db.get_doccount(), 0);
2060 }
2061 
2062 // Test for alldocs postlist with a sparse database.
2063 DEFINE_TESTCASE(alldocspl1, writable) {
2065  Xapian::Document doc;
2066  doc.set_data("5");
2067  doc.add_value(0, "5");
2068  db.replace_document(5, doc);
2069 
2071  TEST(i != db.postlist_end(""));
2072  TEST_EQUAL(*i, 5);
2073  TEST_EQUAL(i.get_doclength(), 0);
2074  TEST_EQUAL(i.get_unique_terms(), 0);
2075  TEST_EQUAL(i.get_wdf(), 1);
2076  ++i;
2077  TEST(i == db.postlist_end(""));
2078 }
2079 
2080 // Test reading and writing a modified alldocspostlist.
2081 DEFINE_TESTCASE(alldocspl2, writable) {
2082  Xapian::PostingIterator i, end;
2083  {
2085  Xapian::Document doc;
2086  doc.set_data("5");
2087  doc.add_value(0, "5");
2088  db.replace_document(5, doc);
2089 
2090  // Test iterating before committing the changes.
2091  i = db.postlist_begin("");
2092  end = db.postlist_end("");
2093  TEST(i != end);
2094  TEST_EQUAL(*i, 5);
2095  TEST_EQUAL(i.get_doclength(), 0);
2096  TEST_EQUAL(i.get_unique_terms(), 0);
2097  TEST_EQUAL(i.get_wdf(), 1);
2098  ++i;
2099  TEST(i == end);
2100 
2101  db.commit();
2102 
2103  // Test iterating after committing the changes.
2104  i = db.postlist_begin("");
2105  end = db.postlist_end("");
2106  TEST(i != end);
2107  TEST_EQUAL(*i, 5);
2108  TEST_EQUAL(i.get_doclength(), 0);
2109  TEST_EQUAL(i.get_unique_terms(), 0);
2110  TEST_EQUAL(i.get_wdf(), 1);
2111  ++i;
2112  TEST(i == end);
2113 
2114  // Add another document.
2115  doc = Xapian::Document();
2116  doc.set_data("5");
2117  doc.add_value(0, "7");
2118  db.replace_document(7, doc);
2119 
2120  // Test iterating through before committing the changes.
2121  i = db.postlist_begin("");
2122  end = db.postlist_end("");
2123  TEST(i != end);
2124  TEST_EQUAL(*i, 5);
2125  TEST_EQUAL(i.get_doclength(), 0);
2126  TEST_EQUAL(i.get_unique_terms(), 0);
2127  TEST_EQUAL(i.get_wdf(), 1);
2128  ++i;
2129  TEST(i != end);
2130  TEST_EQUAL(*i, 7);
2131  TEST_EQUAL(i.get_doclength(), 0);
2132  TEST_EQUAL(i.get_unique_terms(), 0);
2133  TEST_EQUAL(i.get_wdf(), 1);
2134  ++i;
2135  TEST(i == end);
2136 
2137  // Delete the first document.
2138  db.delete_document(5);
2139 
2140  // Test iterating through before committing the changes.
2141  i = db.postlist_begin("");
2142  end = db.postlist_end("");
2143  TEST(i != end);
2144  TEST_EQUAL(*i, 7);
2145  TEST_EQUAL(i.get_doclength(), 0);
2146  TEST_EQUAL(i.get_unique_terms(), 0);
2147  TEST_EQUAL(i.get_wdf(), 1);
2148  ++i;
2149  TEST(i == end);
2150 
2151  // Test iterating through after committing the changes, and dropping the
2152  // reference to the main DB.
2153  db.commit();
2154  i = db.postlist_begin("");
2155  end = db.postlist_end("");
2156  }
2157 
2158  TEST(i != end);
2159  TEST_EQUAL(*i, 7);
2160  TEST_EQUAL(i.get_doclength(), 0);
2161  TEST_EQUAL(i.get_unique_terms(), 0);
2162  TEST_EQUAL(i.get_wdf(), 1);
2163  ++i;
2164  TEST(i == end);
2165 }
2166 
2167 // Feature test for Query::OP_SCALE_WEIGHT.
2168 DEFINE_TESTCASE(scaleweight1, backend) {
2169  Xapian::Database db(get_database("apitest_phrase"));
2170  Xapian::Enquire enq(db);
2172 
2173  static const char * const queries[] = {
2174  "pad",
2175  "milk fridge",
2176  "leave milk on fridge",
2177  "ordered milk operator",
2178  "ordered phrase operator",
2179  "leave \"milk on fridge\"",
2180  "notpresent",
2181  "leave \"milk notpresent\"",
2182  };
2183  static const double multipliers[] = {
2184  -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2185  0, 0
2186  };
2187 
2188  for (auto qstr : queries) {
2189  tout.str(string());
2190  Xapian::Query query1 = qp.parse_query(qstr);
2191  tout << "query1: " << query1.get_description() << endl;
2192  for (const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2193  double mult = *multp;
2194  if (mult < 0) {
2197  query1, mult));
2198  continue;
2199  }
2200  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, mult);
2201  tout << "query2: " << query2.get_description() << endl;
2202 
2203  enq.set_query(query1);
2204  Xapian::MSet mset1 = enq.get_mset(0, 20);
2205  enq.set_query(query2);
2206  Xapian::MSet mset2 = enq.get_mset(0, 20);
2207 
2208  TEST_EQUAL(mset1.size(), mset2.size());
2209 
2210  Xapian::MSetIterator i1, i2;
2211  if (mult > 0) {
2212  for (i1 = mset1.begin(), i2 = mset2.begin();
2213  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2214  TEST_EQUAL_DOUBLE(i1.get_weight() * mult, i2.get_weight());
2215  TEST_EQUAL(*i1, *i2);
2216  }
2217  } else {
2218  // Weights in mset2 are 0; so it should be sorted by docid.
2219  vector<Xapian::docid> ids1;
2220  vector<Xapian::docid> ids2;
2221  for (i1 = mset1.begin(), i2 = mset2.begin();
2222  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2223  TEST_NOT_EQUAL_DOUBLE(i1.get_weight(), 0);
2224  TEST_EQUAL_DOUBLE(i2.get_weight(), 0);
2225  ids1.push_back(*i1);
2226  ids2.push_back(*i2);
2227  }
2228  sort(ids1.begin(), ids1.end());
2229  TEST_EQUAL(ids1, ids2);
2230  }
2231  }
2232  }
2233 }
2234 
2235 // Test Query::OP_SCALE_WEIGHT being used to multiply some of the weights of a
2236 // search by zero.
2237 DEFINE_TESTCASE(scaleweight2, backend) {
2238  Xapian::Database db(get_database("apitest_phrase"));
2239  Xapian::Enquire enq(db);
2241 
2242  Xapian::Query query1("fridg");
2243  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, 2.5);
2244  Xapian::Query query3("milk");
2245  Xapian::Query query4(Xapian::Query::OP_SCALE_WEIGHT, query3, 0);
2246  Xapian::Query query5(Xapian::Query::OP_OR, query2, query4);
2247 
2248  // query5 should first return the same results as query1, in the same
2249  // order, and then return the results of query3 which aren't also results
2250  // of query1, in ascending docid order. We test that this happens.
2251 
2252  // First, build a vector of docids matching the first part of the query,
2253  // and append the non-duplicate docids matching the second part of the
2254  // query.
2255  vector<Xapian::docid> ids1;
2256  set<Xapian::docid> idsin1;
2257  vector<Xapian::docid> ids3;
2258 
2259  enq.set_query(query1);
2260  Xapian::MSet mset1 = enq.get_mset(0, 20);
2261  enq.set_query(query3);
2262  Xapian::MSet mset3 = enq.get_mset(0, 20);
2263  TEST_NOT_EQUAL(mset1.size(), 0);
2264  for (i = mset1.begin(); i != mset1.end(); ++i) {
2265  ids1.push_back(*i);
2266  idsin1.insert(*i);
2267  }
2268  TEST_NOT_EQUAL(mset3.size(), 0);
2269  for (i = mset3.begin(); i != mset3.end(); ++i) {
2270  if (idsin1.find(*i) != idsin1.end())
2271  continue;
2272  ids3.push_back(*i);
2273  }
2274  sort(ids3.begin(), ids3.end());
2275  ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2276 
2277  // Now, run the combined query and build a vector of the matching docids.
2278  vector<Xapian::docid> ids5;
2279  enq.set_query(query5);
2280  Xapian::MSet mset5 = enq.get_mset(0, 20);
2281  for (i = mset5.begin(); i != mset5.end(); ++i) {
2282  ids5.push_back(*i);
2283  }
2284 
2285  TEST_EQUAL(ids1, ids5);
2286 }
2287 
2288 // Regression test for bug fixed in 1.0.5 - this test would failed under
2289 // valgrind because it used an uninitialised value.
2290 DEFINE_TESTCASE(bm25weight1, backend) {
2291  Xapian::Enquire enquire(get_database("apitest_simpledata"));
2292  enquire.set_weighting_scheme(Xapian::BM25Weight(1, 25, 1, 0.01, 0.5));
2293  enquire.set_query(Xapian::Query("word"));
2294 
2295  Xapian::MSet mset = enquire.get_mset(0, 25);
2296 }
2297 
2298 // Feature test for TradWeight.
2299 DEFINE_TESTCASE(tradweight1, backend) {
2300  Xapian::Enquire enquire(get_database("apitest_simpledata"));
2302  enquire.set_query(Xapian::Query("word"));
2303 
2304  Xapian::MSet mset = enquire.get_mset(0, 25);
2305  TEST_EQUAL(mset.size(), 2);
2306 
2308  enquire.set_query(Xapian::Query("this"));
2309 
2310  mset = enquire.get_mset(0, 25);
2311  TEST_EQUAL(mset.size(), 6);
2312 
2313  // Check that TradWeight(0) means wdf and doc length really don't affect
2314  // the weights as stated in the documentation.
2315  TEST_EQUAL(mset[0].get_weight(), mset[5].get_weight());
2316 }
2317 
2318 // Test TradWeight when weighting documents using an RSet.
2319 // Simply changed the weighting scheme used by rset2 testcase.
2320 DEFINE_TESTCASE(tradweight4, backend) {
2321  Xapian::Database mydb(get_database("apitest_rset"));
2322  Xapian::Enquire enquire(mydb);
2323  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
2324 
2325  enquire.set_query(myquery);
2327 
2328  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
2329 
2330  Xapian::RSet myrset;
2331  myrset.add_document(2);
2332 
2333  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
2334 
2335  mset_expect_order(mymset1, 1, 2);
2336  // Document 2 should have higher weight than document 1 despite the wdf of
2337  // "people" being 1 because "people" indexes a document in the RSet whereas
2338  // "cuddly" (wdf=2) does not.
2339  mset_expect_order(mymset2, 2, 1);
2340 }
2341 
2342 // Feature test for Database::get_uuid().
2343 DEFINE_TESTCASE(uuid1, backend && !multi) {
2344  SKIP_TEST_FOR_BACKEND("inmemory");
2345  Xapian::Database db = get_database("apitest_simpledata");
2346  string uuid1 = db.get_uuid();
2347  TEST_EQUAL(uuid1.size(), 36);
2348 
2349  // A database with no sub-databases has an empty UUID.
2350  Xapian::Database db2;
2351  TEST(db2.get_uuid().empty());
2352 
2353  db2.add_database(db);
2354  TEST_EQUAL(uuid1, db2.get_uuid());
2355 
2356  // Multi-database has multiple UUIDs (we don't define the format exactly
2357  // so this assumes something about the implementation).
2358  db2.add_database(db);
2359  TEST_EQUAL(uuid1 + ":" + uuid1, db2.get_uuid());
2360 
2361 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
2362  // This relies on InMemory databases not supporting uuids.
2363  // A multi-database containing a database with no uuid has no uuid.
2364  db2.add_database(Xapian::Database(string(), Xapian::DB_BACKEND_INMEMORY));
2365  TEST(db2.get_uuid().empty());
2366 #endif
2367 }
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Definition: testutils.h:78
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
Definition: constants.h:195
ExpandDecider subclass which restrict terms to a particular prefix.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
Definition: omenquire.cc:829
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: omenquire.cc:878
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
Definition: omdatabase.cc:198
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_sort_by_relevance()
Set the sorting to be by relevance only.
Definition: omenquire.cc:863
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:850
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
static void print_mset_percentages(const Xapian::MSet &mset)
Definition: api_anydb.cc:54
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: omenquire.cc:198
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:869
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Definition: eset.h:362
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:713
Match documents which an odd number of subqueries match.
Definition: query.h:107
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:856
Class representing a stemming algorithm.
Definition: stem.h:62
PositionIterator positionlist_begin(Xapian::docid did, const std::string &tname) const
An iterator pointing to the start of the position list for a given term in a given document...
Definition: omdatabase.cc:250
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
op
Query operators.
Definition: query.h:78
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails...
Definition: testsuite.h:267
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: omenquire.cc:246
static void test_emptyterm1_helper(Xapian::Database &db)
Definition: api_anydb.cc:2038
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:300
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
double get_max_attained() const
The maximum weight attained by any document.
Definition: omenquire.cc:297
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
a generic test suite engine
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
Definition: enquire.h:601
Class representing a list of search results.
Definition: mset.h:44
void skip_to(const std::string &term)
Advance the iterator to term term.
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
static Xapian::Stem stemmer
Definition: stemtest.cc:41
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
Definition: enquire.h:591
double get_weight() const
Get the weight for the current position.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:956
test functionality of the Xapian API
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: omenquire.cc:262
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:345
Class for iterating over a list of terms.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:941
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
Xapian::doccount size() const
Return number of items in this ESet object.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: omenquire.cc:276
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
docids sort in whatever order is most efficient for the backend.
Definition: enquire.h:329
static int verbose
Definition: xapian-delve.cc:47
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
This class provides read/write access to a database.
Definition: database.h:785
DEFINE_TESTCASE(zerodocid1, backend)
Definition: api_anydb.cc:118
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:351
Scale the weight contributed by a subquery.
Definition: query.h:166
Public interfaces for the Xapian library.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: omenquire.cc:887
docids sort in ascending order (default)
Definition: enquire.h:324
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string get_dbtype()
Definition: apitest.cc:42
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
Definition: mset.h:595
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:607
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:612
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Definition: omenquire.cc:222
int percent
The percentage score for a document in an MSet.
Definition: types.h:66
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:768
static void print_mset_weights(const Xapian::MSet &mset)
Definition: api_anydb.cc:45
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Definition: queryparser.cc:161
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:514
Iterator over a Xapian::ESet.
Definition: eset.h:160
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
Definition: database.h:238
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:75
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
bool term_exists(const std::string &tname) const
Check if a given term exists in the database.
Definition: omdatabase.cc:524
double get_weight() const
Get the weight for the current position.
Definition: omenquire.cc:460
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
Match only documents which all subqueries match.
Definition: query.h:84
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
static Xapian::TermIterator test_termlist3_helper()
Definition: api_anydb.cc:1751
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
bool operator()(const string &tname) const
Do we want this term in the ESet?
Definition: api_anydb.cc:569
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:283
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:111
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Definition: omenquire.cc:206
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
Class representing a list of search results.
Definition: eset.h:43
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: omenquire.cc:450
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:624
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Definition: omenquire.cc:842
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:350
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:535
A relevance set (R-Set).
Definition: enquire.h:60
std::string get_uuid() const
Get a UUID for the database.
Definition: omdatabase.cc:776
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:269
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339
docids sort in descending order.
Definition: enquire.h:326