xapian-core  1.4.21
api_anydb.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2015,2016,2017,2020 Olly Betts
7  * Copyright 2006,2008 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #include <config.h>
27 
28 #include "api_anydb.h"
29 
30 #include <algorithm>
31 #include <string>
32 
33 #define XAPIAN_DEPRECATED(X) X
34 #include <xapian.h>
35 #include "testsuite.h"
36 #include "testutils.h"
37 
38 #include "apitest.h"
39 
40 #include <list>
41 
42 using namespace std;
43 
44 static void
46 {
47  Xapian::MSetIterator i = mset.begin();
48  for ( ; i != mset.end(); ++i) {
49  tout << " " << i.get_weight();
50  }
51 }
52 
53 static void
55 {
56  Xapian::MSetIterator i = mset.begin();
57  for ( ; i != mset.end(); ++i) {
58  tout << " " << mset.convert_to_percent(i);
59  }
60 }
61 
62 static Xapian::Query
64  const string & t1 = string(), const string & t2 = string(),
65  const string & t3 = string(), const string & t4 = string(),
66  const string & t5 = string(), const string & t6 = string(),
67  const string & t7 = string(), const string & t8 = string(),
68  const string & t9 = string(), const string & t10 = string())
69 {
70  vector<string> v;
71  Xapian::Stem stemmer("english");
72  if (!t1.empty()) v.push_back(stemmer(t1));
73  if (!t2.empty()) v.push_back(stemmer(t2));
74  if (!t3.empty()) v.push_back(stemmer(t3));
75  if (!t4.empty()) v.push_back(stemmer(t4));
76  if (!t5.empty()) v.push_back(stemmer(t5));
77  if (!t6.empty()) v.push_back(stemmer(t6));
78  if (!t7.empty()) v.push_back(stemmer(t7));
79  if (!t8.empty()) v.push_back(stemmer(t8));
80  if (!t9.empty()) v.push_back(stemmer(t9));
81  if (!t10.empty()) v.push_back(stemmer(t10));
82  return Xapian::Query(op, v.begin(), v.end());
83 }
84 
85 static Xapian::Query
87  const string & t1 = string(), const string & t2 = string(),
88  const string & t3 = string(), const string & t4 = string(),
89  const string & t5 = string(), const string & t6 = string(),
90  const string & t7 = string(), const string & t8 = string(),
91  const string & t9 = string(), const string & t10 = string())
92 {
93  vector<string> v;
94  Xapian::Stem stemmer("english");
95  if (!t1.empty()) v.push_back(stemmer(t1));
96  if (!t2.empty()) v.push_back(stemmer(t2));
97  if (!t3.empty()) v.push_back(stemmer(t3));
98  if (!t4.empty()) v.push_back(stemmer(t4));
99  if (!t5.empty()) v.push_back(stemmer(t5));
100  if (!t6.empty()) v.push_back(stemmer(t6));
101  if (!t7.empty()) v.push_back(stemmer(t7));
102  if (!t8.empty()) v.push_back(stemmer(t8));
103  if (!t9.empty()) v.push_back(stemmer(t9));
104  if (!t10.empty()) v.push_back(stemmer(t10));
105  return Xapian::Query(op, v.begin(), v.end(), parameter);
106 }
107 
108 static Xapian::Query
109 query(const string &t)
110 {
111  return Xapian::Query(Xapian::Stem("english")(t));
112 }
113 
114 // #######################################################################
115 // # Tests start here
116 
117 // tests that the backend doesn't return zero docids
118 DEFINE_TESTCASE(zerodocid1, backend) {
119  // open the database (in this case a simple text file
120  // we prepared earlier)
121 
122  Xapian::Database mydb(get_database("apitest_onedoc"));
123 
124  Xapian::Enquire enquire(mydb);
125 
126  // make a simple query, with one word in it - "word".
127  enquire.set_query(Xapian::Query("word"));
128 
129  // retrieve the top ten results (we only expect one)
130  Xapian::MSet mymset = enquire.get_mset(0, 10);
131 
132  // We've done the query, now check that the result is what
133  // we expect (1 document, with non-zero docid)
134  TEST_MSET_SIZE(mymset, 1);
135 
136  TEST_AND_EXPLAIN(*(mymset.begin()) != 0,
137  "A query on a database returned a zero docid");
138 }
139 
140 // tests that an empty query returns no matches
141 DEFINE_TESTCASE(emptyquery1, backend) {
142  Xapian::Enquire enquire(get_database("apitest_simpledata"));
143 
144  enquire.set_query(Xapian::Query());
145  Xapian::MSet mymset = enquire.get_mset(0, 10);
146  TEST_MSET_SIZE(mymset, 0);
147  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
148  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
149  TEST_EQUAL(mymset.get_matches_estimated(), 0);
153 
154  vector<Xapian::Query> v;
155  enquire.set_query(Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end()));
156  mymset = enquire.get_mset(0, 10);
157  TEST_MSET_SIZE(mymset, 0);
158  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
159  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
160  TEST_EQUAL(mymset.get_matches_estimated(), 0);
164 }
165 
166 // tests the document count for a simple query
167 DEFINE_TESTCASE(simplequery1, backend) {
168  Xapian::Enquire enquire(get_database("apitest_simpledata"));
169  enquire.set_query(Xapian::Query("word"));
170  Xapian::MSet mymset = enquire.get_mset(0, 10);
171  TEST_MSET_SIZE(mymset, 2);
172 }
173 
174 // tests for the right documents and weights returned with simple query
175 DEFINE_TESTCASE(simplequery2, backend) {
176  // open the database (in this case a simple text file
177  // we prepared earlier)
178  Xapian::Database db = get_database("apitest_simpledata");
179  Xapian::Enquire enquire(db);
180  enquire.set_query(Xapian::Query("word"));
181 
182  // retrieve the top results
183  Xapian::MSet mymset = enquire.get_mset(0, 10);
184 
185  // We've done the query, now check that the result is what
186  // we expect (documents 2 and 4)
187  mset_expect_order(mymset, 2, 4);
188 
189  // Check the weights
190  Xapian::MSetIterator i = mymset.begin();
191  // These weights are for BM25Weight(1,0,1,0.5,0.5)
192  TEST_EQUAL_DOUBLE(i.get_weight(), 1.04648168717725);
193  i++;
194  TEST_EQUAL_DOUBLE(i.get_weight(), 0.640987686595914);
195 }
196 
197 // tests for the right document count for another simple query
198 DEFINE_TESTCASE(simplequery3, backend) {
199  Xapian::Enquire enquire(get_database("apitest_simpledata"));
200  enquire.set_query(query("this"));
201  Xapian::MSet mymset = enquire.get_mset(0, 10);
202 
203  // Check that 6 documents were returned.
204  TEST_MSET_SIZE(mymset, 6);
205 }
206 
207 // multidb2 no longer exists.
208 
209 // test that a multidb with 2 dbs query returns correct docids
210 DEFINE_TESTCASE(multidb3, backend && !multi) {
211  Xapian::Database mydb2(get_database("apitest_simpledata"));
212  mydb2.add_database(get_database("apitest_simpledata2"));
213  Xapian::Enquire enquire(mydb2);
214 
215  // make a query
216  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
218  enquire.set_query(myquery);
219 
220  // retrieve the top ten results
221  Xapian::MSet mymset = enquire.get_mset(0, 10);
222  mset_expect_order(mymset, 2, 3, 7);
223 }
224 
225 // test that a multidb with 3 dbs query returns correct docids
226 DEFINE_TESTCASE(multidb4, backend && !multi) {
227  Xapian::Database mydb2(get_database("apitest_simpledata"));
228  mydb2.add_database(get_database("apitest_simpledata2"));
229  mydb2.add_database(get_database("apitest_termorder"));
230  Xapian::Enquire enquire(mydb2);
231 
232  // make a query
233  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
235  enquire.set_query(myquery);
236 
237  // retrieve the top ten results
238  Xapian::MSet mymset = enquire.get_mset(0, 10);
239  mset_expect_order(mymset, 2, 3, 4, 10);
240 }
241 
242 // tests MultiPostList::skip_to().
243 DEFINE_TESTCASE(multidb5, backend && !multi) {
244  Xapian::Database mydb2(get_database("apitest_simpledata"));
245  mydb2.add_database(get_database("apitest_simpledata2"));
246  Xapian::Enquire enquire(mydb2);
247 
248  // make a query
249  Xapian::Query myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
251  enquire.set_query(myquery);
252 
253  // retrieve the top ten results
254  Xapian::MSet mymset = enquire.get_mset(0, 10);
255  mset_expect_order(mymset, 2);
256 }
257 
258 // tests that when specifying maxitems to get_mset, no more than
259 // that are returned.
260 DEFINE_TESTCASE(msetmaxitems1, backend) {
261  Xapian::Enquire enquire(get_database("apitest_simpledata"));
262  enquire.set_query(query("this"));
263  Xapian::MSet mymset = enquire.get_mset(0, 1);
264  TEST_MSET_SIZE(mymset, 1);
265 
266  mymset = enquire.get_mset(0, 5);
267  TEST_MSET_SIZE(mymset, 5);
268 }
269 
270 // tests the returned weights are as expected (regression test for remote
271 // backend which was using the average weight rather than the actual document
272 // weight for computing weights - fixed in 1.0.0).
273 DEFINE_TESTCASE(expandweights1, backend) {
274  Xapian::Enquire enquire(get_database("apitest_simpledata"));
275  enquire.set_query(Xapian::Query("this"));
276 
277  Xapian::MSet mymset = enquire.get_mset(0, 10);
278 
279  Xapian::RSet myrset;
280  Xapian::MSetIterator i = mymset.begin();
281  myrset.add_document(*i);
282  myrset.add_document(*(++i));
283 
284  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
285  TEST_EQUAL(eset.size(), 3);
286  TEST_REL(eset.get_ebound(), >=, eset.size());
287  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
288  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
289  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
290 
291  // Test non-default k too.
292  eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ, 2.0);
293  TEST_EQUAL(eset.size(), 3);
294  TEST_REL(eset.get_ebound(), >=, eset.size());
295  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 5.88109547674955);
296  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 5.88109547674955);
297  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.44473599216144);
298 }
299 
300 // Just like test_expandweights1 but without USE_EXACT_TERMFREQ.
301 DEFINE_TESTCASE(expandweights2, backend) {
302  Xapian::Enquire enquire(get_database("apitest_simpledata"));
303  enquire.set_query(Xapian::Query("this"));
304 
305  Xapian::MSet mymset = enquire.get_mset(0, 10);
306 
307  Xapian::RSet myrset;
308  Xapian::MSetIterator i = mymset.begin();
309  myrset.add_document(*i);
310  myrset.add_document(*(++i));
311 
312  Xapian::ESet eset = enquire.get_eset(3, myrset);
313  TEST_EQUAL(eset.size(), 3);
314  TEST_REL(eset.get_ebound(), >=, eset.size());
315  // With a multi backend, the top three terms all happen to occur in both
316  // shard so their termfreq is exactly known even without
317  // USE_EXACT_TERMFREQ and so the weights should be the same for all
318  // test harness backends.
319  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
320  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
321  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
322 }
323 
324 DEFINE_TESTCASE(expandweights3, backend) {
325  Xapian::Enquire enquire(get_database("apitest_simpledata"));
326  enquire.set_query(Xapian::Query("this"));
327 
328  Xapian::MSet mymset = enquire.get_mset(0, 10);
329 
330  Xapian::RSet myrset;
331  Xapian::MSetIterator i = mymset.begin();
332  myrset.add_document(*i);
333  myrset.add_document(*(++i));
334 
335  // Set min_wt to 6.0
336  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 0, 6.0);
337  TEST_EQUAL(eset.size(), 2);
338  TEST_REL(eset.get_ebound(), >=, eset.size());
339  // With a multi backend, the top two terms all happen to occur in both
340  // shard so their termfreq is exactly known even without
341  // USE_EXACT_TERMFREQ and so the weights should be the same for all
342  // test harness backends.
343  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
344  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
345 }
346 
347 // tests that negative weights are returned
348 DEFINE_TESTCASE(expandweights4, backend) {
349  Xapian::Enquire enquire(get_database("apitest_simpledata"));
350  enquire.set_query(Xapian::Query("paragraph"));
351 
352  Xapian::MSet mymset = enquire.get_mset(0, 10);
353 
354  Xapian::RSet myrset;
355  Xapian::MSetIterator i = mymset.begin();
356  myrset.add_document(*i);
357  myrset.add_document(*(++i));
358 
359  Xapian::ESet eset = enquire.get_eset(37, myrset, 0, 0, -100);
360  // Now include negative weights
361  TEST_EQUAL(eset.size(), 37);
362  TEST_REL(eset.get_ebound(), >=, eset.size());
363  TEST_REL(eset[36].get_weight(), <, 0);
364  TEST_REL(eset[36].get_weight(), >=, -100);
365 }
366 
367 // test for Bo1EWeight
368 DEFINE_TESTCASE(expandweights5, backend) {
369  Xapian::Enquire enquire(get_database("apitest_simpledata"));
370  enquire.set_query(Xapian::Query("this"));
371 
372  Xapian::MSet mymset = enquire.get_mset(0, 10);
373 
374  Xapian::RSet myrset;
375  Xapian::MSetIterator i = mymset.begin();
376  myrset.add_document(*i);
377  myrset.add_document(*(++i));
378 
379  enquire.set_expansion_scheme("bo1");
380  Xapian::ESet eset = enquire.get_eset(3, myrset);
381 
382  TEST_EQUAL(eset.size(), 3);
383  TEST_REL(eset.get_ebound(), >=, eset.size());
384  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 7.21765284821702);
385  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.661623193760022);
386  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 5.58090119783738);
387 }
388 
389 // test that "trad" can be set as an expansion scheme.
390 DEFINE_TESTCASE(expandweights6, backend) {
391  Xapian::Enquire enquire(get_database("apitest_simpledata"));
392  enquire.set_query(Xapian::Query("this"));
393 
394  Xapian::MSet mymset = enquire.get_mset(0, 10);
395 
396  Xapian::RSet myrset;
397  Xapian::MSetIterator i = mymset.begin();
398  myrset.add_document(*i);
399  myrset.add_document(*(++i));
400 
401  enquire.set_expansion_scheme("trad");
402  Xapian::ESet eset = enquire.get_eset(3, myrset, enquire.USE_EXACT_TERMFREQ);
403 
404  TEST_EQUAL(eset.size(), 3);
405  TEST_REL(eset.get_ebound(), >=, eset.size());
406  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
407  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
408  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
409 }
410 
411 // test that invalid scheme names are not accepted
412 DEFINE_TESTCASE(expandweights7, backend) {
413  Xapian::Enquire enquire(get_database("apitest_simpledata"));
414 
416  enquire.set_expansion_scheme("no_such_scheme"));
417 }
418 
419 // test that "expand_k" can be passed as a parameter to get_eset
420 DEFINE_TESTCASE(expandweights8, backend) {
421  Xapian::Enquire enquire(get_database("apitest_simpledata"));
422  enquire.set_query(Xapian::Query("this"));
423 
424  Xapian::MSet mymset = enquire.get_mset(0, 10);
425 
426  Xapian::RSet myrset;
427  Xapian::MSetIterator i = mymset.begin();
428  myrset.add_document(*i);
429  myrset.add_document(*(++i));
430 
431  // Set expand_k to 1.0 and min_wt to 0
432  Xapian::ESet eset = enquire.get_eset(50, myrset, 0, 1.0, 0, 0);
433  // With a multi backend, the top three terms all happen to occur in both
434  // shard so their termfreq is exactly known even without
435  // USE_EXACT_TERMFREQ and so the weights should be the same for all
436  // test harness backends.
437  TEST_EQUAL_DOUBLE(eset[0].get_weight(), 6.08904001099445);
438  TEST_EQUAL_DOUBLE(eset[1].get_weight(), 6.08904001099445);
439  TEST_EQUAL_DOUBLE(eset[2].get_weight(), 4.73383620844021);
440  TEST_REL(eset.back().get_weight(),>=,0);
441 }
442 
443 // tests that when specifying maxitems to get_eset, no more than
444 // that are returned.
445 DEFINE_TESTCASE(expandmaxitems1, backend) {
446  Xapian::Enquire enquire(get_database("apitest_simpledata"));
447  enquire.set_query(Xapian::Query("this"));
448 
449  Xapian::MSet mymset = enquire.get_mset(0, 10);
450  tout << "mymset.size() = " << mymset.size() << endl;
451  TEST(mymset.size() >= 2);
452 
453  Xapian::RSet myrset;
454  Xapian::MSetIterator i = mymset.begin();
455  myrset.add_document(*i);
456  myrset.add_document(*(++i));
457 
458  Xapian::ESet myeset = enquire.get_eset(1, myrset);
459  TEST_EQUAL(myeset.size(), 1);
460  TEST_REL(myeset.get_ebound(), >=, myeset.size());
461 }
462 
463 // tests that a pure boolean query has all weights set to 0
464 DEFINE_TESTCASE(boolquery1, backend) {
465  Xapian::Query myboolquery(query("this"));
466 
467  // open the database (in this case a simple text file
468  // we prepared earlier)
469  Xapian::Enquire enquire(get_database("apitest_simpledata"));
470  enquire.set_query(myboolquery);
472 
473  // retrieve the top results
474  Xapian::MSet mymset = enquire.get_mset(0, 10);
475 
476  TEST_NOT_EQUAL(mymset.size(), 0);
477  TEST_EQUAL(mymset.get_max_possible(), 0);
478  for (Xapian::MSetIterator i = mymset.begin(); i != mymset.end(); ++i) {
479  TEST_EQUAL(i.get_weight(), 0);
480  }
481 }
482 
483 // tests that get_mset() specifying "this" works as expected
484 DEFINE_TESTCASE(msetfirst1, backend) {
485  Xapian::Enquire enquire(get_database("apitest_simpledata"));
486  enquire.set_query(query("this"));
487  Xapian::MSet mymset1 = enquire.get_mset(0, 6);
488  Xapian::MSet mymset2 = enquire.get_mset(3, 3);
489  TEST(mset_range_is_same(mymset1, 3, mymset2, 0, 3));
490 
491  // Regression test - we weren't adjusting the index into items[] by
492  // firstitem in api/omenquire.cc.
493  TEST_EQUAL(mymset1[5].get_document().get_data(),
494  mymset2[2].get_document().get_data());
495 }
496 
497 // tests the converting-to-percent functions
498 DEFINE_TESTCASE(topercent1, backend) {
499  Xapian::Enquire enquire(get_database("apitest_simpledata"));
500  enquire.set_query(query("this"));
501  Xapian::MSet mymset = enquire.get_mset(0, 20);
502 
503  int last_pct = 100;
504  Xapian::MSetIterator i = mymset.begin();
505  for ( ; i != mymset.end(); ++i) {
506  int pct = mymset.convert_to_percent(i);
507  TEST_AND_EXPLAIN(pct == i.get_percent(),
508  "convert_to_%(msetitor) != convert_to_%(wt)");
510  "convert_to_%(msetitor) != convert_to_%(wt)");
511  TEST_AND_EXPLAIN(pct >= 0 && pct <= 100,
512  "percentage out of range: " << pct);
513  TEST_AND_EXPLAIN(pct <= last_pct, "percentage increased down mset");
514  last_pct = pct;
515  }
516 }
517 
518 // tests the percentage values returned
519 DEFINE_TESTCASE(topercent2, backend) {
520  Xapian::Enquire enquire(get_database("apitest_simpledata"));
521 
522  int pct;
523 
524  // First, test a search in which the top document scores 100%.
525  enquire.set_query(query("this"));
526  Xapian::MSet mymset = enquire.get_mset(0, 20);
527 
528  Xapian::MSetIterator i = mymset.begin();
529  TEST(i != mymset.end());
530  pct = mymset.convert_to_percent(i);
531  TEST_EQUAL(pct, 100);
532 
533  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
534  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
535  TEST_EQUAL(mymset.get_matches_estimated(), 6);
536  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 0.0553904060041786);
537  TEST_EQUAL(mymset.size(), 6);
538  mset_expect_order(mymset, 2, 1, 3, 5, 6, 4);
539 
540  // A search in which the top document doesn't have 100%
542  "this", "line", "paragraph", "rubbish");
543  enquire.set_query(q);
544  mymset = enquire.get_mset(0, 20);
545 
546  i = mymset.begin();
547  TEST(i != mymset.end());
548  pct = mymset.convert_to_percent(i);
549  TEST_REL(pct,>,60);
550  TEST_REL(pct,<,76);
551 
552  ++i;
553 
554  TEST(i != mymset.end());
555  pct = mymset.convert_to_percent(i);
556  TEST_REL(pct,>,40);
557  TEST_REL(pct,<,50);
558 
559  TEST_EQUAL(mymset.get_matches_lower_bound(), 6);
560  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
561  TEST_EQUAL(mymset.get_matches_estimated(), 6);
562  TEST_EQUAL_DOUBLE(mymset.get_max_attained(), 1.67412192414056);
563  TEST_EQUAL(mymset.size(), 6);
564  mset_expect_order(mymset, 3, 1, 4, 2, 5, 6);
565 }
566 
568  public:
569  bool operator()(const string & tname) const {
570  unsigned long sum = 0;
571  for (unsigned ch : tname) {
572  sum += ch;
573  }
574 // if (verbose) {
575 // tout << tname << "==> " << sum << "\n";
576 // }
577  return (sum % 2) == 0;
578  }
579 };
580 
581 // tests the expand decision functor
582 DEFINE_TESTCASE(expandfunctor1, backend) {
583  Xapian::Enquire enquire(get_database("apitest_simpledata"));
584  enquire.set_query(Xapian::Query("this"));
585 
586  Xapian::MSet mymset = enquire.get_mset(0, 10);
587  TEST(mymset.size() >= 2);
588 
589  Xapian::RSet myrset;
590  Xapian::MSetIterator i = mymset.begin();
591  myrset.add_document(*i);
592  myrset.add_document(*(++i));
593 
594  EvenParityExpandFunctor myfunctor;
595 
596  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
597  unsigned int neweset_size = 0;
598  Xapian::ESetIterator j = myeset_orig.begin();
599  for ( ; j != myeset_orig.end(); ++j) {
600  if (myfunctor(*j)) neweset_size++;
601  }
602  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
603 
604 #if 0
605  // Compare myeset with the hand-filtered version of myeset_orig.
606  if (verbose) {
607  tout << "orig_eset: ";
608  copy(myeset_orig.begin(), myeset_orig.end(),
609  ostream_iterator<Xapian::ESetItem>(tout, " "));
610  tout << "\n";
611 
612  tout << "new_eset: ";
613  copy(myeset.begin(), myeset.end(),
614  ostream_iterator<Xapian::ESetItem>(tout, " "));
615  tout << "\n";
616  }
617 #endif
618  Xapian::ESetIterator orig = myeset_orig.begin();
619  Xapian::ESetIterator filt = myeset.begin();
620  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
621  // skip over items that shouldn't be in myeset
622  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
623  ++orig;
624  }
625 
626  TEST_AND_EXPLAIN(*orig == *filt &&
627  orig.get_weight() == filt.get_weight(),
628  "Mismatch in items " << *orig << " vs. " << *filt
629  << " after filtering");
630  }
631 
632  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
633  ++orig;
634  }
635 
636  TEST_EQUAL(orig, myeset_orig.end());
637  TEST_AND_EXPLAIN(filt == myeset.end(),
638  "Extra items in the filtered eset.");
639 }
640 
641 DEFINE_TESTCASE(expanddeciderfilterprefix2, backend) {
642  Xapian::Enquire enquire(get_database("apitest_simpledata"));
643  enquire.set_query(Xapian::Query("this"));
644 
645  Xapian::MSet mymset = enquire.get_mset(0, 10);
646  TEST(mymset.size() >= 2);
647 
648  Xapian::RSet myrset;
649  Xapian::MSetIterator i = mymset.begin();
650  myrset.add_document(*i);
651  myrset.add_document(*(++i));
652 
653  Xapian::ESet myeset_orig = enquire.get_eset(1000, myrset);
654  unsigned int neweset_size = 0;
655 
656  // Choose the first char in the first term as prefix.
657  Xapian::ESetIterator j = myeset_orig.begin();
658  TEST(myeset_orig.size() >= 1);
659  string prefix(*j, 0, 1);
660  Xapian::ExpandDeciderFilterPrefix myfunctor(prefix);
661 
662  for ( ; j != myeset_orig.end(); ++j) {
663  if (myfunctor(*j)) neweset_size++;
664  }
665  Xapian::ESet myeset = enquire.get_eset(neweset_size, myrset, &myfunctor);
666 
667  Xapian::ESetIterator orig = myeset_orig.begin();
668  Xapian::ESetIterator filt = myeset.begin();
669  for (; orig != myeset_orig.end() && filt != myeset.end(); ++orig, ++filt) {
670  // skip over items that shouldn't be in myeset
671  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
672  ++orig;
673  }
674 
675  TEST_AND_EXPLAIN(*orig == *filt &&
676  orig.get_weight() == filt.get_weight(),
677  "Mismatch in items " << *orig << " vs. " << *filt
678  << " after filtering");
679  }
680 
681  while (orig != myeset_orig.end() && !myfunctor(*orig)) {
682  ++orig;
683  }
684 
685  TEST_EQUAL(orig, myeset_orig.end());
686  TEST_AND_EXPLAIN(filt == myeset.end(),
687  "Extra items in the filtered eset.");
688 }
689 
690 // tests the percent cutoff option
691 DEFINE_TESTCASE(pctcutoff1, backend) {
692  Xapian::Enquire enquire(get_database("apitest_simpledata"));
694  "this", "line", "paragraph", "rubbish"));
695  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
696 
697  if (verbose) {
698  tout << "Original mset pcts:";
699  print_mset_percentages(mymset1);
700  tout << "\n";
701  }
702 
703  unsigned int num_items = 0;
704  int my_pct = 100;
705  int changes = 0;
706  Xapian::MSetIterator i = mymset1.begin();
707  int c = 0;
708  for ( ; i != mymset1.end(); ++i, ++c) {
709  int new_pct = mymset1.convert_to_percent(i);
710  if (new_pct != my_pct) {
711  changes++;
712  if (changes > 3) break;
713  num_items = c;
714  my_pct = new_pct;
715  }
716  }
717 
718  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
719  if (verbose) {
720  tout << "Cutoff percent: " << my_pct << "\n";
721  }
722 
723  enquire.set_cutoff(my_pct);
724  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
725 
726  if (verbose) {
727  tout << "Percentages after cutoff:";
728  print_mset_percentages(mymset2);
729  tout << "\n";
730  }
731 
732  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
733  "Match with % cutoff lost too many items");
734 
735  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
736  (mymset2.convert_to_percent(mymset2[num_items]) == my_pct &&
737  mymset2.convert_to_percent(mymset2.back()) == my_pct),
738  "Match with % cutoff returned too many items");
739 }
740 
741 // Tests the percent cutoff option combined with collapsing
742 DEFINE_TESTCASE(pctcutoff2, backend) {
743  Xapian::Enquire enquire(get_database("apitest_simpledata"));
745  Xapian::MSet mset = enquire.get_mset(0, 100);
746 
747  if (verbose) {
748  tout << "Original mset pcts:";
750  tout << "\n";
751  }
752 
753  TEST(mset.size() >= 2);
754  TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
755 
756  int cutoff = mset[0].get_percent() + mset[1].get_percent();
757  cutoff /= 2;
758 
759  enquire.set_cutoff(cutoff);
760  enquire.set_collapse_key(1234); // Value which is always empty.
761 
762  Xapian::MSet mset2 = enquire.get_mset(0, 1);
763  TEST_EQUAL(mset2.size(), 1);
764  TEST_REL(mset2.get_matches_lower_bound(),>=,1);
766  mset2.get_matches_lower_bound());
771 }
772 
773 // Test that the percent cutoff option returns all the answers it should.
774 DEFINE_TESTCASE(pctcutoff3, backend) {
775  Xapian::Enquire enquire(get_database("apitest_simpledata"));
776  enquire.set_query(Xapian::Query("this"));
777  Xapian::MSet mset1 = enquire.get_mset(0, 10);
778 
779  if (verbose) {
780  tout << "Original mset pcts:";
781  print_mset_percentages(mset1);
782  tout << "\n";
783  }
784 
785  int percent = 100;
786  for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
787  int new_percent = mset1.convert_to_percent(i);
788  if (new_percent != percent) {
789  tout.str(string());
790  tout << "Testing " << percent << "% cutoff" << endl;
791  enquire.set_cutoff(percent);
792  Xapian::MSet mset2 = enquire.get_mset(0, 10);
793  TEST_EQUAL(mset2.back().get_percent(), percent);
794  TEST_EQUAL(mset2.size(), i.get_rank());
795  percent = new_percent;
796  }
797  }
798 }
799 
800 // tests the cutoff option
801 DEFINE_TESTCASE(cutoff1, backend) {
802  Xapian::Enquire enquire(get_database("apitest_simpledata"));
804  "this", "line", "paragraph", "rubbish"));
805  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
806 
807  if (verbose) {
808  tout << "Original mset weights:";
809  print_mset_weights(mymset1);
810  tout << "\n";
811  }
812 
813  unsigned int num_items = 0;
814  double my_wt = -100;
815  int changes = 0;
816  Xapian::MSetIterator i = mymset1.begin();
817  int c = 0;
818  for ( ; i != mymset1.end(); ++i, ++c) {
819  double new_wt = i.get_weight();
820  if (new_wt != my_wt) {
821  changes++;
822  if (changes > 3) break;
823  num_items = c;
824  my_wt = new_wt;
825  }
826  }
827 
828  TEST_AND_EXPLAIN(changes > 3, "MSet not varied enough to test");
829  if (verbose) {
830  tout << "Cutoff weight: " << my_wt << "\n";
831  }
832 
833  enquire.set_cutoff(0, my_wt);
834  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
835 
836  if (verbose) {
837  tout << "Weights after cutoff:";
838  print_mset_weights(mymset2);
839  tout << "\n";
840  }
841 
842  TEST_AND_EXPLAIN(mymset2.size() >= num_items,
843  "Match with cutoff lost too many items");
844 
845  TEST_AND_EXPLAIN(mymset2.size() == num_items ||
846  (mymset2[num_items].get_weight() == my_wt &&
847  mymset2.back().get_weight() == my_wt),
848  "Match with cutoff returned too many items");
849 }
850 
851 // tests the allow query terms expand option
852 DEFINE_TESTCASE(allowqterms1, backend) {
853  Xapian::Enquire enquire(get_database("apitest_simpledata"));
854  string term = "paragraph";
855  enquire.set_query(Xapian::Query(term));
856 
857  Xapian::MSet mymset = enquire.get_mset(0, 10);
858  TEST(mymset.size() >= 2);
859 
860  Xapian::RSet myrset;
861  Xapian::MSetIterator i = mymset.begin();
862  myrset.add_document(*i);
863  myrset.add_document(*(++i));
864 
865  Xapian::ESet myeset = enquire.get_eset(1000, myrset);
866  Xapian::ESetIterator j = myeset.begin();
867  for ( ; j != myeset.end(); ++j) {
868  TEST_NOT_EQUAL(*j, term);
869  }
870 
871  Xapian::ESet myeset2 = enquire.get_eset(1000, myrset, Xapian::Enquire::INCLUDE_QUERY_TERMS);
872  j = myeset2.begin();
873  for ( ; j != myeset2.end(); ++j) {
874  if (*j == term) break;
875  }
876  TEST(j != myeset2.end());
877 }
878 
879 // tests that the MSet max_attained works
880 DEFINE_TESTCASE(maxattain1, backend) {
881  Xapian::Enquire enquire(get_database("apitest_simpledata"));
882  enquire.set_query(query("this"));
883  Xapian::MSet mymset = enquire.get_mset(0, 100);
884 
885  double mymax = 0;
886  Xapian::MSetIterator i = mymset.begin();
887  for ( ; i != mymset.end(); ++i) {
888  if (i.get_weight() > mymax) mymax = i.get_weight();
889  }
890  TEST_EQUAL(mymax, mymset.get_max_attained());
891 }
892 
893 // tests a reversed boolean query
894 DEFINE_TESTCASE(reversebool1, backend) {
895  Xapian::Enquire enquire(get_database("apitest_simpledata"));
896  enquire.set_query(Xapian::Query("this"));
898 
899  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
900  TEST_AND_EXPLAIN(mymset1.size() > 1,
901  "Mset was too small to test properly");
902 
904  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
906  Xapian::MSet mymset3 = enquire.get_mset(0, 100);
907 
908  // mymset1 and mymset2 should be identical
909  TEST_EQUAL(mymset1.size(), mymset2.size());
910 
911  {
912  Xapian::MSetIterator i = mymset1.begin();
913  Xapian::MSetIterator j = mymset2.begin();
914  for ( ; i != mymset1.end(); ++i, j++) {
915  TEST(j != mymset2.end());
916  // if this fails, then setting match_sort_forward=true was not
917  // the same as the default.
918  TEST_EQUAL(*i, *j);
919  }
920  TEST(j == mymset2.end());
921  }
922 
923  // mymset1 and mymset3 should be same but reversed
924  TEST_EQUAL(mymset1.size(), mymset3.size());
925 
926  {
927  Xapian::MSetIterator i = mymset1.begin();
928  Xapian::MSetIterator j = mymset3.end();
929  for ( ; i != mymset1.end(); ++i) {
930  --j;
931  // if this fails, then setting match_sort_forward=false didn't
932  // reverse the results.
933  TEST_EQUAL(*i, *j);
934  }
935  }
936 }
937 
938 // tests a reversed boolean query, where the full mset isn't returned
939 DEFINE_TESTCASE(reversebool2, backend) {
940  Xapian::Enquire enquire(get_database("apitest_simpledata"));
941  enquire.set_query(Xapian::Query("this"));
943 
944  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
945 
946  TEST_AND_EXPLAIN(mymset1.size() > 1,
947  "Mset was too small to test properly");
948 
950  Xapian::doccount msize = mymset1.size() / 2;
951  Xapian::MSet mymset2 = enquire.get_mset(0, msize);
953  Xapian::MSet mymset3 = enquire.get_mset(0, msize);
954 
955  // mymset2 should be first msize items of mymset1
956  TEST_EQUAL(msize, mymset2.size());
957  {
958  Xapian::MSetIterator i = mymset1.begin();
959  Xapian::MSetIterator j = mymset2.begin();
960  for ( ; j != mymset2.end(); ++i, ++j) {
961  TEST(i != mymset1.end());
962  // if this fails, then setting match_sort_forward=true was not
963  // the same as the default.
964  TEST_EQUAL(*i, *j);
965  }
966  // mymset1 should be larger.
967  TEST(i != mymset1.end());
968  }
969 
970  // mymset3 should be last msize items of mymset1, in reverse order
971  TEST_EQUAL(msize, mymset3.size());
972  {
973  Xapian::MSetIterator i = mymset1.end();
975  for (j = mymset3.begin(); j != mymset3.end(); ++j) {
976  // if this fails, then setting match_sort_forward=false didn't
977  // reverse the results.
978  --i;
979  TEST_EQUAL(*i, *j);
980  }
981  }
982 }
983 
984 // tests that get_matching_terms() returns the terms in the right order
985 DEFINE_TESTCASE(getmterms1, backend) {
986  list<string> answers_list;
987  answers_list.push_back("one");
988  answers_list.push_back("two");
989  answers_list.push_back("three");
990  answers_list.push_back("four");
991 
992  Xapian::Database mydb(get_database("apitest_termorder"));
993  Xapian::Enquire enquire(mydb);
994 
997  Xapian::Query("one", 1, 1),
998  Xapian::Query("three", 1, 3)),
1000  Xapian::Query("four", 1, 4),
1001  Xapian::Query("two", 1, 2)));
1002 
1003  enquire.set_query(myquery);
1004 
1005  Xapian::MSet mymset = enquire.get_mset(0, 10);
1006 
1007  TEST_MSET_SIZE(mymset, 1);
1008  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1009  enquire.get_matching_terms_end(mymset.begin()));
1010  TEST(list == answers_list);
1011 }
1012 
1013 // tests that get_matching_terms() returns the terms only once
1014 DEFINE_TESTCASE(getmterms2, backend) {
1015  list<string> answers_list;
1016  answers_list.push_back("one");
1017  answers_list.push_back("two");
1018  answers_list.push_back("three");
1019 
1020  Xapian::Database mydb(get_database("apitest_termorder"));
1021  Xapian::Enquire enquire(mydb);
1022 
1025  Xapian::Query("one", 1, 1),
1026  Xapian::Query("three", 1, 3)),
1028  Xapian::Query("one", 1, 4),
1029  Xapian::Query("two", 1, 2)));
1030 
1031  enquire.set_query(myquery);
1032 
1033  Xapian::MSet mymset = enquire.get_mset(0, 10);
1034 
1035  TEST_MSET_SIZE(mymset, 1);
1036  list<string> list(enquire.get_matching_terms_begin(mymset.begin()),
1037  enquire.get_matching_terms_end(mymset.begin()));
1038  TEST(list == answers_list);
1039 }
1040 
1041 // test that running a query twice returns the same results
1042 DEFINE_TESTCASE(repeatquery1, backend) {
1043  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1044  enquire.set_query(Xapian::Query("this"));
1045 
1046  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1047 
1048  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1049  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1050  TEST_EQUAL(mymset1, mymset2);
1051 }
1052 
1053 // test that prefetching documents works (at least, gives same results)
1054 DEFINE_TESTCASE(fetchdocs1, backend) {
1055  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1056  enquire.set_query(Xapian::Query("this"));
1057 
1058  enquire.set_query(query(Xapian::Query::OP_OR, "this", "word"));
1059 
1060  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1061  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1062  TEST_EQUAL(mymset1, mymset2);
1063  mymset2.fetch(mymset2[0], mymset2[mymset2.size() - 1]);
1064  mymset2.fetch(mymset2.begin(), mymset2.end());
1065  mymset2.fetch(mymset2.begin());
1066  mymset2.fetch();
1067 
1068  Xapian::MSetIterator it1 = mymset1.begin();
1069  Xapian::MSetIterator it2 = mymset2.begin();
1070 
1071  while (it1 != mymset1.end() && it2 != mymset2.end()) {
1073  it2.get_document().get_data());
1074  TEST_NOT_EQUAL(it1.get_document().get_data(), "");
1075  TEST_NOT_EQUAL(it2.get_document().get_data(), "");
1076  it1++;
1077  it2++;
1078  }
1079  TEST_EQUAL(it1, mymset1.end());
1080  TEST_EQUAL(it1, mymset2.end());
1081 }
1082 
1083 // test that searching for a term not in the database fails nicely
1084 DEFINE_TESTCASE(absentterm1, backend) {
1085  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1087  enquire.set_query(Xapian::Query("frink"));
1088 
1089  Xapian::MSet mymset = enquire.get_mset(0, 10);
1090  mset_expect_order(mymset);
1091 }
1092 
1093 // as absentterm1, but setting query from a vector of terms
1094 DEFINE_TESTCASE(absentterm2, backend) {
1095  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1096  vector<string> terms;
1097  terms.push_back("frink");
1098 
1099  Xapian::Query query(Xapian::Query::OP_OR, terms.begin(), terms.end());
1100  enquire.set_query(query);
1101 
1102  Xapian::MSet mymset = enquire.get_mset(0, 10);
1103  mset_expect_order(mymset);
1104 }
1105 
1106 // test that rsets do sensible things
1107 DEFINE_TESTCASE(rset1, backend) {
1108  Xapian::Database mydb(get_database("apitest_rset"));
1109  Xapian::Enquire enquire(mydb);
1110  Xapian::Query myquery = query(Xapian::Query::OP_OR, "giraffe", "tiger");
1111  enquire.set_query(myquery);
1112 
1113  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1114 
1115  Xapian::RSet myrset;
1116  myrset.add_document(1);
1117 
1118  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1119 
1120  // We should have the same documents turn up, but 1 and 3 should
1121  // have higher weights with the RSet.
1122  TEST_MSET_SIZE(mymset1, 3);
1123  TEST_MSET_SIZE(mymset2, 3);
1124 }
1125 
1126 // test that rsets do more sensible things
1127 DEFINE_TESTCASE(rset2, backend) {
1128  Xapian::Database mydb(get_database("apitest_rset"));
1129  Xapian::Enquire enquire(mydb);
1130  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
1131  enquire.set_query(myquery);
1132 
1133  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1134 
1135  Xapian::RSet myrset;
1136  myrset.add_document(2);
1137 
1138  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
1139 
1140  mset_expect_order(mymset1, 1, 2);
1141  mset_expect_order(mymset2, 2, 1);
1142 }
1143 
1144 // test that rsets behave correctly with multiDBs
1145 DEFINE_TESTCASE(rsetmultidb1, backend && !multi) {
1146  Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
1147  Xapian::Database mydb2(get_database("apitest_rset"));
1148  mydb2.add_database(get_database("apitest_simpledata2"));
1149 
1150  Xapian::Enquire enquire1(mydb1);
1151  Xapian::Enquire enquire2(mydb2);
1152 
1153  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "multiple");
1154 
1155  enquire1.set_query(myquery);
1156  enquire2.set_query(myquery);
1157 
1158  Xapian::RSet myrset1;
1159  Xapian::RSet myrset2;
1160  myrset1.add_document(4);
1161  myrset2.add_document(2);
1162 
1163  Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
1164  Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
1165  Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
1166  Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
1167 
1168  mset_expect_order(mymset1a, 1, 4);
1169  mset_expect_order(mymset1b, 4, 1);
1170  mset_expect_order(mymset2a, 1, 2);
1171  mset_expect_order(mymset2b, 2, 1);
1172 
1173  TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
1174  TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
1175  TEST_NOT_EQUAL(mymset1a, mymset1b);
1176  TEST_NOT_EQUAL(mymset2a, mymset2b);
1177 }
1178 
1179 // regression tests - used to cause assertion in stats.h to fail
1180 // Doesn't actually fail for multi but it doesn't make sense to run there.
1181 DEFINE_TESTCASE(rsetmultidb3, backend && !multi) {
1182  Xapian::Enquire enquire(get_database("apitest_simpledata2"));
1183  enquire.set_query(query(Xapian::Query::OP_OR, "cuddly", "people"));
1184  Xapian::MSet mset = enquire.get_mset(0, 10); // used to fail assertion
1185 }
1186 
1188 DEFINE_TESTCASE(eliteset1, backend && !multi) {
1189  Xapian::Database mydb(get_database("apitest_simpledata"));
1190  Xapian::Enquire enquire(mydb);
1191 
1192  Xapian::Query myquery1 = query(Xapian::Query::OP_OR, "word");
1193 
1195  "simple", "word");
1196 
1197  enquire.set_query(myquery1, 2); // So the query lengths are the same.
1198  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1199 
1200  enquire.set_query(myquery2);
1201  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1202 
1203  TEST_EQUAL(mymset1, mymset2);
1204 }
1205 
1207 DEFINE_TESTCASE(elitesetmulti1, multi) {
1208  Xapian::Database mydb(get_database("apitest_simpledata"));
1209  Xapian::Enquire enquire(mydb);
1210 
1212  "simple", "word");
1213 
1214  enquire.set_query(myquery2);
1215  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1216 
1217  // For a sharded database, the elite set is resolved per shard and can
1218  // select different terms because the max term weights vary with the
1219  // per-shard term statistics. I can't see a feasible way to create
1220  // an equivalent MSet to compare with so for now at least we hard-code
1221  // the expected values.
1222  TEST_EQUAL(mymset2.size(), 3);
1223  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1224  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1225  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1226  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.1736756775723788948);
1227  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.0464816871772451012);
1228  mset_expect_order(mymset2, 2, 4, 5);
1229  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.0464816871772451012);
1230  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.64098768659591376373);
1231  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.46338869498075929698);
1232 }
1233 
1236 DEFINE_TESTCASE(eliteset2, backend && !multi) {
1237  Xapian::Database mydb(get_database("apitest_simpledata"));
1238  Xapian::Enquire enquire(mydb);
1239 
1240  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1241 
1242  vector<Xapian::Query> qs;
1243  qs.push_back(query("this"));
1244  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1246  qs.begin(), qs.end(), 1);
1247 
1248  enquire.set_query(myquery1);
1249  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
1250 
1251  enquire.set_query(myquery2);
1252  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1253 
1254  TEST_EQUAL(mymset1, mymset2);
1255 }
1256 
1258 DEFINE_TESTCASE(elitesetmulti2, multi) {
1259  Xapian::Database mydb(get_database("apitest_simpledata"));
1260  Xapian::Enquire enquire(mydb);
1261 
1262  Xapian::Query myquery1 = query(Xapian::Query::OP_AND, "word", "search");
1263 
1264  vector<Xapian::Query> qs;
1265  qs.push_back(query("this"));
1266  qs.push_back(query(Xapian::Query::OP_AND, "word", "search"));
1268  qs.begin(), qs.end(), 1);
1269 
1270  enquire.set_query(myquery2);
1271  Xapian::MSet mymset2 = enquire.get_mset(0, 10);
1272 
1273  // For a sharded database, the elite set is resolved per shard and can
1274  // select different terms because the max term weights vary with the
1275  // per-shard term statistics. I can't see a feasible way to create
1276  // an equivalent MSet to compare with so for now at least we hard-code
1277  // the expected values.
1278  TEST_EQUAL(mymset2.size(), 4);
1279  TEST_EQUAL(mymset2.get_matches_lower_bound(), 4);
1280  TEST_EQUAL(mymset2.get_matches_estimated(), 4);
1281  TEST_EQUAL(mymset2.get_matches_upper_bound(), 4);
1282  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 2.6585705165783908299);
1283  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.9700834242150864206);
1284  mset_expect_order(mymset2, 2, 1, 3, 5);
1285  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.9700834242150864206);
1286  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 0.051103097360122341775);
1287  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.043131803408968119595);
1288  TEST_EQUAL_DOUBLE(mymset2[3].get_weight(), 0.043131803408968119595);
1289 }
1290 
1291 
1294 DEFINE_TESTCASE(eliteset3, backend) {
1295  Xapian::Database mydb1(get_database("apitest_simpledata"));
1296  Xapian::Enquire enquire1(mydb1);
1297 
1298  Xapian::Database mydb2(get_database("apitest_simpledata"));
1299  Xapian::Enquire enquire2(mydb2);
1300 
1301  // make a query
1302  Xapian::Stem stemmer("english");
1303 
1304  string term1 = stemmer("word");
1305  string term2 = stemmer("rubbish");
1306  string term3 = stemmer("banana");
1307 
1308  vector<string> terms;
1309  terms.push_back(term1);
1310  terms.push_back(term2);
1311  terms.push_back(term3);
1312 
1313  Xapian::Query myquery1(Xapian::Query::OP_OR, terms.begin(), terms.end());
1314  enquire1.set_query(myquery1);
1315 
1316  Xapian::Query myquery2(Xapian::Query::OP_ELITE_SET, terms.begin(), terms.end(), 3);
1317  enquire2.set_query(myquery2);
1318 
1319  // retrieve the results
1320  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1321  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1322 
1323  TEST_EQUAL(mymset1, mymset2);
1324 
1325  TEST_EQUAL(mymset1.get_termfreq(term1),
1326  mymset2.get_termfreq(term1));
1327  TEST_EQUAL(mymset1.get_termweight(term1),
1328  mymset2.get_termweight(term1));
1329  TEST_EQUAL(mymset1.get_termfreq(term2),
1330  mymset2.get_termfreq(term2));
1331  TEST_EQUAL(mymset1.get_termweight(term2),
1332  mymset2.get_termweight(term2));
1333  TEST_EQUAL(mymset1.get_termfreq(term3),
1334  mymset2.get_termfreq(term3));
1335  TEST_EQUAL(mymset1.get_termweight(term3),
1336  mymset2.get_termweight(term3));
1337 }
1338 
1340 DEFINE_TESTCASE(eliteset4, backend && !multi) {
1341  Xapian::Database mydb1(get_database("apitest_simpledata"));
1342  Xapian::Enquire enquire1(mydb1);
1343 
1344  Xapian::Database mydb2(get_database("apitest_simpledata"));
1345  Xapian::Enquire enquire2(mydb2);
1346 
1347  Xapian::Query myquery1 = query("rubbish");
1349  "word", "rubbish", "fibble");
1350  enquire1.set_query(myquery1);
1351  enquire2.set_query(myquery2);
1352 
1353  // retrieve the results
1354  Xapian::MSet mymset1 = enquire1.get_mset(0, 10);
1355  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1356 
1357  TEST_NOT_EQUAL(mymset2.size(), 0);
1358  TEST_EQUAL(mymset1, mymset2);
1359 }
1360 
1362 DEFINE_TESTCASE(elitesetmulti4, multi) {
1363  Xapian::Database mydb2(get_database("apitest_simpledata"));
1364  Xapian::Enquire enquire2(mydb2);
1365 
1367  "word", "rubbish", "fibble");
1368  enquire2.set_query(myquery2);
1369 
1370  // retrieve the results
1371  Xapian::MSet mymset2 = enquire2.get_mset(0, 10);
1372 
1373  // For a sharded database, the elite set is resolved per shard and can
1374  // select different terms because the max term weights vary with the
1375  // per-shard term statistics. I can't see a feasible way to create
1376  // an equivalent MSet to compare with so for now at least we hard-code
1377  // the expected values.
1378  TEST_EQUAL(mymset2.size(), 3);
1379  TEST_EQUAL(mymset2.get_matches_lower_bound(), 3);
1380  TEST_EQUAL(mymset2.get_matches_estimated(), 3);
1381  TEST_EQUAL(mymset2.get_matches_upper_bound(), 3);
1382  TEST_EQUAL_DOUBLE(mymset2.get_max_possible(), 1.4848948390060121572);
1383  TEST_EQUAL_DOUBLE(mymset2.get_max_attained(), 1.4848948390060121572);
1384  mset_expect_order(mymset2, 3, 2, 4);
1385  TEST_EQUAL_DOUBLE(mymset2[0].get_weight(), 1.4848948390060121572);
1386  TEST_EQUAL_DOUBLE(mymset2[1].get_weight(), 1.0464816871772451012);
1387  TEST_EQUAL_DOUBLE(mymset2[2].get_weight(), 0.64098768659591376373);
1388 }
1389 
1391 DEFINE_TESTCASE(eliteset5, backend) {
1392  Xapian::Database mydb1(get_database("apitest_simpledata"));
1393  Xapian::Enquire enquire1(mydb1);
1394 
1395  vector<string> v;
1396  for (int i = 0; i != 3; ++i) {
1397  v.push_back("simpl");
1398  v.push_back("queri");
1399 
1400  v.push_back("rubbish");
1401  v.push_back("rubbish");
1402  v.push_back("rubbish");
1403  v.push_back("word");
1404  v.push_back("word");
1405  v.push_back("word");
1406  }
1407 
1408  for (Xapian::termcount n = 1; n != v.size(); ++n) {
1410  v.begin(), v.end(), n);
1412  myquery1,
1413  0.004);
1414 
1415  enquire1.set_query(myquery1);
1416  // On architectures with excess precision (or, at least, on x86), the
1417  // following call used to result in a segfault (at least when n=1).
1418  enquire1.get_mset(0, 10);
1419  }
1420 }
1421 
1423 DEFINE_TESTCASE(termlisttermfreq1, backend) {
1424  Xapian::Database mydb(get_database("apitest_simpledata"));
1425  Xapian::Enquire enquire(mydb);
1426  Xapian::Stem stemmer("english");
1427  Xapian::RSet rset1;
1428  Xapian::RSet rset2;
1429  rset1.add_document(5);
1430  rset2.add_document(6);
1431 
1432  Xapian::ESet eset1 = enquire.get_eset(1000, rset1);
1433  Xapian::ESet eset2 = enquire.get_eset(1000, rset2);
1434 
1435  // search for weight of term 'another'
1436  string theterm = stemmer("another");
1437 
1438  double wt1 = 0;
1439  double wt2 = 0;
1440  {
1441  Xapian::ESetIterator i = eset1.begin();
1442  for ( ; i != eset1.end(); ++i) {
1443  if (*i == theterm) {
1444  wt1 = i.get_weight();
1445  break;
1446  }
1447  }
1448  }
1449  {
1450  Xapian::ESetIterator i = eset2.begin();
1451  for ( ; i != eset2.end(); ++i) {
1452  if (*i == theterm) {
1453  wt2 = i.get_weight();
1454  break;
1455  }
1456  }
1457  }
1458 
1459  TEST_NOT_EQUAL(wt1, 0);
1460  TEST_NOT_EQUAL(wt2, 0);
1461  TEST_EQUAL(wt1, wt2);
1462 }
1463 
1465 DEFINE_TESTCASE(qterminfo1, backend) {
1466  Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
1467  Xapian::Enquire enquire1(mydb1);
1468 
1469  Xapian::Database mydb2(get_database("apitest_simpledata"));
1470  mydb2.add_database(get_database("apitest_simpledata2"));
1471  Xapian::Enquire enquire2(mydb2);
1472 
1473  // make a query
1474  Xapian::Stem stemmer("english");
1475 
1476  string term1 = stemmer("word");
1477  string term2 = stemmer("inmemory");
1478  string term3 = stemmer("flibble");
1479 
1481  Xapian::Query(term1),
1483  Xapian::Query(term2),
1484  Xapian::Query(term3)));
1485  enquire1.set_query(myquery);
1486  enquire2.set_query(myquery);
1487 
1488  // retrieve the results
1489  Xapian::MSet mymset1a = enquire1.get_mset(0, 0);
1490  Xapian::MSet mymset2a = enquire2.get_mset(0, 0);
1491 
1492  TEST_EQUAL(mymset1a.get_termfreq(term1),
1493  mymset2a.get_termfreq(term1));
1494  TEST_EQUAL(mymset1a.get_termfreq(term2),
1495  mymset2a.get_termfreq(term2));
1496  TEST_EQUAL(mymset1a.get_termfreq(term3),
1497  mymset2a.get_termfreq(term3));
1498 
1499  TEST_EQUAL(mymset1a.get_termfreq(term1), 3);
1500  TEST_EQUAL(mymset1a.get_termfreq(term2), 1);
1501  TEST_EQUAL(mymset1a.get_termfreq(term3), 0);
1502 
1503  TEST_NOT_EQUAL(mymset1a.get_termweight(term1), 0);
1504  TEST_NOT_EQUAL(mymset1a.get_termweight(term2), 0);
1505  // non-existent terms should have 0 weight.
1506  TEST_EQUAL(mymset1a.get_termweight(term3), 0);
1507 
1508  TEST_EQUAL(mymset1a.get_termfreq(stemmer("banana")), 1);
1510  mymset1a.get_termweight(stemmer("banana")));
1511 
1512  TEST_EQUAL(mymset1a.get_termfreq("sponge"), 0);
1514  mymset1a.get_termweight("sponge"));
1515 }
1516 
1518 DEFINE_TESTCASE(qterminfo2, backend) {
1519  Xapian::Database db(get_database("apitest_simpledata"));
1520  Xapian::Enquire enquire(db);
1521 
1522  // make a query
1523  Xapian::Stem stemmer("english");
1524 
1525  string term1 = stemmer("paragraph");
1526  string term2 = stemmer("another");
1527 
1528  enquire.set_query(Xapian::Query(term1));
1529  Xapian::MSet mset0 = enquire.get_mset(0, 10);
1530 
1531  TEST_NOT_EQUAL(mset0.get_termweight("paragraph"), 0);
1532 
1534  Xapian::Query(Xapian::Query::OP_AND, term1, term2));
1535  enquire.set_query(query);
1536 
1537  // retrieve the results
1538  // Note: get_mset() used to throw "AssertionError" in debug builds
1539  Xapian::MSet mset = enquire.get_mset(0, 10);
1540 
1541  TEST_NOT_EQUAL(mset.get_termweight("paragraph"), 0);
1542 }
1543 
1544 // tests that when specifying that no items are to be returned, those
1545 // statistics which should be the same are.
1546 DEFINE_TESTCASE(msetzeroitems1, backend) {
1547  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1548  enquire.set_query(query("this"));
1549  Xapian::MSet mymset1 = enquire.get_mset(0, 0);
1550 
1551  Xapian::MSet mymset2 = enquire.get_mset(0, 1);
1552 
1553  TEST_EQUAL(mymset1.get_max_possible(), mymset2.get_max_possible());
1554 }
1555 
1556 // test that the matches_* of a simple query are as expected
1557 DEFINE_TESTCASE(matches1, backend) {
1558  bool multi = startswith(get_dbtype(), "multi");
1559  bool remote = get_dbtype().find("remote") != string::npos;
1560 
1561  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1562  Xapian::Query myquery;
1563  Xapian::MSet mymset;
1564 
1565  myquery = query("word");
1566  enquire.set_query(myquery);
1567  mymset = enquire.get_mset(0, 10);
1568  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1569  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1570  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1574 
1575  myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
1576  enquire.set_query(myquery);
1577  mymset = enquire.get_mset(0, 10);
1578  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1579  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1580  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1584 
1585  myquery = query(Xapian::Query::OP_AND, "inmemory", "word");
1586  enquire.set_query(myquery);
1587  mymset = enquire.get_mset(0, 10);
1588  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1589  TEST_EQUAL(mymset.get_matches_estimated(), 0);
1590  TEST_EQUAL(mymset.get_matches_upper_bound(), 0);
1594 
1595  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1596  enquire.set_query(myquery);
1597  mymset = enquire.get_mset(0, 10);
1598  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1599  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1600  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1604 
1605  myquery = query(Xapian::Query::OP_AND, "simple", "word");
1606  enquire.set_query(myquery);
1607  mymset = enquire.get_mset(0, 0);
1608  if (!multi) {
1609  // This isn't true for sharded DBs since there one sub-database has 3
1610  // documents and simple and word both have termfreq of 2, so the
1611  // matcher can tell at least one document must match!)
1612  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
1613  }
1615  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1616  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1620 
1621  mymset = enquire.get_mset(0, 1);
1622  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1623  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1624  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1625  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1626  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1627  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1628 
1629  mymset = enquire.get_mset(0, 2);
1630  TEST_EQUAL(mymset.get_matches_lower_bound(), 2);
1631  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1632  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1633  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 2);
1634  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1635  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1636 
1637  myquery = query(Xapian::Query::OP_AND, "paragraph", "another");
1638  enquire.set_query(myquery);
1639  mymset = enquire.get_mset(0, 0);
1640  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1641  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1642  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1643  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1644  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1645  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1646 
1647  mymset = enquire.get_mset(0, 1);
1648  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1649  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1650  if (multi && remote) {
1651  // The matcher can tell there's only one match in this case.
1652  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1653  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1654  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1655  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1656  } else {
1657  TEST_EQUAL(mymset.get_matches_estimated(), 2);
1658  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 2);
1659  TEST_EQUAL(mymset.get_matches_upper_bound(), 2);
1660  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 2);
1661  }
1662 
1663  mymset = enquire.get_mset(0, 2);
1664  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1665  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1666  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1667  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1668  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1669  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1670 
1671  mymset = enquire.get_mset(1, 20);
1672  TEST_EQUAL(mymset.get_matches_lower_bound(), 1);
1673  TEST_EQUAL(mymset.get_matches_estimated(), 1);
1674  TEST_EQUAL(mymset.get_matches_upper_bound(), 1);
1675  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 1);
1676  TEST_EQUAL(mymset.get_uncollapsed_matches_estimated(), 1);
1677  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 1);
1678 }
1679 
1680 // tests that wqf affects the document weights
1681 DEFINE_TESTCASE(wqf1, backend) {
1682  // Both queries have length 2; in q1 word has wqf=2, in q2 word has wqf=1
1683  Xapian::Query q1("word", 2);
1684  Xapian::Query q2("word");
1685  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1686  enquire.set_query(q1);
1687  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1688  enquire.set_query(q2);
1689  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1690  // Check the weights
1691  TEST(mset1.begin().get_weight() > mset2.begin().get_weight());
1692 }
1693 
1694 // tests that query length affects the document weights
1695 DEFINE_TESTCASE(qlen1, backend) {
1696  Xapian::Query q1("word");
1697  Xapian::Query q2("word");
1698  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1699  enquire.set_query(q1);
1700  Xapian::MSet mset1 = enquire.get_mset(0, 10);
1701  enquire.set_query(q2);
1702  Xapian::MSet mset2 = enquire.get_mset(0, 2);
1703  // Check the weights
1704  // TEST(mset1.begin().get_weight() < mset2.begin().get_weight());
1705  TEST(mset1.begin().get_weight() == mset2.begin().get_weight());
1706 }
1707 
1708 // tests that opening a non-existent termlist throws the correct exception
1709 DEFINE_TESTCASE(termlist1, backend) {
1710  Xapian::Database db(get_database("apitest_onedoc"));
1715  /* Cause the database to be used properly, showing up problems
1716  * with the link being in a bad state. CME */
1717  Xapian::TermIterator temp = db.termlist_begin(1);
1719  Xapian::TermIterator t = db.termlist_begin(999999999));
1720 }
1721 
1722 // tests that a Xapian::TermIterator works as an STL iterator
1723 DEFINE_TESTCASE(termlist2, backend) {
1724  Xapian::Database db(get_database("apitest_onedoc"));
1726  Xapian::TermIterator tend = db.termlist_end(1);
1727 
1728  // test operator= creates a copy which compares equal
1729  Xapian::TermIterator t_copy = t;
1730  TEST_EQUAL(t, t_copy);
1731 
1732  // test copy constructor creates a copy which compares equal
1733  Xapian::TermIterator t_clone(t);
1734  TEST_EQUAL(t, t_clone);
1735 
1736  vector<string> v(t, tend);
1737 
1738  t = db.termlist_begin(1);
1739  tend = db.termlist_end(1);
1740  vector<string>::const_iterator i;
1741  for (i = v.begin(); i != v.end(); ++i) {
1742  TEST_NOT_EQUAL(t, tend);
1743  TEST_EQUAL(*i, *t);
1744  t++;
1745  }
1746  TEST_EQUAL(t, tend);
1747 }
1748 
1749 static Xapian::TermIterator
1751 {
1752  Xapian::Database db(get_database("apitest_onedoc"));
1753  return db.termlist_begin(1);
1754 }
1755 
1756 // tests that a Xapian::TermIterator still works when the DB is deleted
1757 DEFINE_TESTCASE(termlist3, backend) {
1759  Xapian::Database db(get_database("apitest_onedoc"));
1761  Xapian::TermIterator tend = db.termlist_end(1);
1762 
1763  while (t != tend) {
1764  TEST_EQUAL(*t, *u);
1765  t++;
1766  u++;
1767  }
1768 }
1769 
1770 // tests skip_to
1771 DEFINE_TESTCASE(termlist4, backend) {
1772  Xapian::Database db(get_database("apitest_onedoc"));
1774  i.skip_to("");
1775  i.skip_to("\xff");
1776 }
1777 
1778 // tests punctuation is OK in terms (particularly in remote queries)
1779 DEFINE_TESTCASE(puncterms1, backend) {
1780  Xapian::Database db(get_database("apitest_punc"));
1781  Xapian::Enquire enquire(db);
1782 
1783  Xapian::Query q1("semi;colon");
1784  enquire.set_query(q1);
1785  Xapian::MSet m1 = enquire.get_mset(0, 10);
1786 
1787  Xapian::Query q2("col:on");
1788  enquire.set_query(q2);
1789  Xapian::MSet m2 = enquire.get_mset(0, 10);
1790 
1791  Xapian::Query q3("com,ma");
1792  enquire.set_query(q3);
1793  Xapian::MSet m3 = enquire.get_mset(0, 10);
1794 }
1795 
1796 // test that searching for a term with a space or backslash in it works
1797 DEFINE_TESTCASE(spaceterms1, backend) {
1798  Xapian::Enquire enquire(get_database("apitest_space"));
1799  Xapian::MSet mymset;
1800  Xapian::doccount count;
1802  Xapian::Stem stemmer("english");
1803 
1804  enquire.set_query(stemmer("space man"));
1805  mymset = enquire.get_mset(0, 10);
1806  TEST_MSET_SIZE(mymset, 1);
1807  count = 0;
1808  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1809  TEST_EQUAL(count, 1);
1810 
1811  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
1812  TEST_NOT_EQUAL(mymset.begin().get_document().get_data(), "");
1813  TEST_NOT_EQUAL(mymset.begin().get_document().get_value(value_no), "");
1814  }
1815 
1816  enquire.set_query(stemmer("tab\tby"));
1817  mymset = enquire.get_mset(0, 10);
1818  TEST_MSET_SIZE(mymset, 1);
1819  count = 0;
1820  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1821  TEST_EQUAL(count, 1);
1822 
1823  for (Xapian::valueno value_no = 0; value_no < 7; ++value_no) {
1824  string value = mymset.begin().get_document().get_value(value_no);
1825  TEST_NOT_EQUAL(value, "");
1826  if (value_no == 0) {
1827  TEST(value.size() > 262);
1828  TEST_EQUAL(static_cast<unsigned char>(value[262]), 255);
1829  }
1830  }
1831 
1832  enquire.set_query(stemmer("back\\slash"));
1833  mymset = enquire.get_mset(0, 10);
1834  TEST_MSET_SIZE(mymset, 1);
1835  count = 0;
1836  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1837  TEST_EQUAL(count, 1);
1838 }
1839 
1840 // test that XOR queries work
1841 DEFINE_TESTCASE(xor1, backend) {
1842  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1843  Xapian::Stem stemmer("english");
1844 
1845  vector<string> terms;
1846  terms.push_back(stemmer("this"));
1847  terms.push_back(stemmer("word"));
1848  terms.push_back(stemmer("of"));
1849 
1850  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1852  enquire.set_query(query);
1853 
1854  Xapian::MSet mymset = enquire.get_mset(0, 10);
1855  // Docid this word of Match?
1856  // 1 * *
1857  // 2 * * * *
1858  // 3 * *
1859  // 4 * *
1860  // 5 * *
1861  // 6 * *
1862  mset_expect_order(mymset, 1, 2, 5, 6);
1863 }
1864 
1866 DEFINE_TESTCASE(xor2, backend) {
1867  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1868  Xapian::Stem stemmer("english");
1869 
1870  vector<string> terms;
1871  terms.push_back(stemmer("this"));
1872  terms.push_back(stemmer("word"));
1873  terms.push_back(stemmer("of"));
1874 
1875  Xapian::Query query(Xapian::Query::OP_XOR, terms.begin(), terms.end());
1876  enquire.set_query(query);
1877 
1878  Xapian::MSet mymset = enquire.get_mset(0, 10);
1879  // Docid LEN this word of Match?
1880  // 1 28 2 *
1881  // 2 81 5 8 1 *
1882  // 3 15 1 2
1883  // 4 31 1 1
1884  // 5 15 1 *
1885  // 6 15 1 *
1886  mset_expect_order(mymset, 2, 1, 5, 6);
1887 }
1888 
1889 // test Xapian::Database::get_document()
1890 DEFINE_TESTCASE(getdoc1, backend) {
1891  Xapian::Database db(get_database("apitest_onedoc"));
1892  Xapian::Document doc(db.get_document(1));
1898  // Check that Document works as a handle on modification
1899  // (this was broken for the first try at Xapian::Document prior to 0.7).
1900  Xapian::Document doc2 = doc;
1901  doc.set_data("modified!");
1902  TEST_EQUAL(doc.get_data(), "modified!");
1903  TEST_EQUAL(doc.get_data(), doc2.get_data());
1904 }
1905 
1906 // test whether operators with no elements work as a null query
1907 DEFINE_TESTCASE(emptyop1, backend) {
1908  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1909  vector<Xapian::Query> nullvec;
1910 
1911  Xapian::Query query1(Xapian::Query::OP_XOR, nullvec.begin(), nullvec.end());
1912 
1913  enquire.set_query(query1);
1914  Xapian::MSet mymset = enquire.get_mset(0, 10);
1915  TEST_MSET_SIZE(mymset, 0);
1916  // In Xapian < 1.3.0, this gave InvalidArgumentError (because
1917  // query1.empty()) but elsewhere we treat an empty query as just not
1918  // matching any documents, so we now do the same here too.
1920  enquire.get_matching_terms_end(1));
1921 }
1922 
1923 // Regression test for check_at_least SEGV when there are no matches.
1924 DEFINE_TESTCASE(checkatleast1, backend) {
1925  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1926  enquire.set_query(Xapian::Query("thom"));
1927  Xapian::MSet mymset = enquire.get_mset(0, 10, 11);
1928  TEST_EQUAL(0, mymset.size());
1929 }
1930 
1931 // Regression test - if check_at_least was set we returned (check_at_least - 1)
1932 // results, rather than the requested msize. Fixed in 1.0.2.
1933 DEFINE_TESTCASE(checkatleast2, backend) {
1934  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1935  enquire.set_query(Xapian::Query("paragraph"));
1936 
1937  Xapian::MSet mymset = enquire.get_mset(0, 3, 10);
1938  TEST_MSET_SIZE(mymset, 3);
1939  TEST_EQUAL(mymset.get_matches_lower_bound(), 5);
1941 
1942  mymset = enquire.get_mset(0, 2, 4);
1943  TEST_MSET_SIZE(mymset, 2);
1944  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1945  TEST_REL(mymset.get_matches_lower_bound(),>=,4);
1946  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1947  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,4);
1948 }
1949 
1950 // Feature tests - check_at_least with various sorting options.
1951 DEFINE_TESTCASE(checkatleast3, backend) {
1952  Xapian::Enquire enquire(get_database("etext"));
1953  enquire.set_query(Xapian::Query("prussian")); // 60 matches.
1954 
1955  for (int order = 0; order < 3; ++order) {
1956  switch (order) {
1957  case 0:
1959  break;
1960  case 1:
1962  break;
1963  case 2:
1965  break;
1966  }
1967 
1968  for (int sort = 0; sort < 7; ++sort) {
1969  bool reverse = (sort & 1);
1970  switch (sort) {
1971  case 0:
1972  enquire.set_sort_by_relevance();
1973  break;
1974  case 1: case 2:
1975  enquire.set_sort_by_value(0, reverse);
1976  break;
1977  case 3: case 4:
1978  enquire.set_sort_by_value_then_relevance(0, reverse);
1979  break;
1980  case 5: case 6:
1981  enquire.set_sort_by_relevance_then_value(0, reverse);
1982  break;
1983  }
1984 
1985  Xapian::MSet mset = enquire.get_mset(0, 100, 500);
1986  TEST_MSET_SIZE(mset, 60);
1987  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1988  TEST_EQUAL(mset.get_matches_estimated(), 60);
1989  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1993 
1994  mset = enquire.get_mset(0, 50, 100);
1995  TEST_MSET_SIZE(mset, 50);
1996  TEST_EQUAL(mset.get_matches_lower_bound(), 60);
1997  TEST_EQUAL(mset.get_matches_estimated(), 60);
1998  TEST_EQUAL(mset.get_matches_upper_bound(), 60);
1999  TEST_EQUAL(mset.get_uncollapsed_matches_lower_bound(), 60);
2000  TEST_EQUAL(mset.get_uncollapsed_matches_estimated(), 60);
2001  TEST_EQUAL(mset.get_uncollapsed_matches_upper_bound(), 60);
2002 
2003  mset = enquire.get_mset(0, 10, 50);
2004  TEST_MSET_SIZE(mset, 10);
2005  TEST_REL(mset.get_matches_lower_bound(),>=,50);
2006  TEST_REL(mset.get_uncollapsed_matches_lower_bound(),>=,50);
2007  }
2008  }
2009 }
2010 
2011 // tests all document postlists
2012 DEFINE_TESTCASE(allpostlist1, backend) {
2013  Xapian::Database db(get_database("apitest_manydocs"));
2015  unsigned int j = 1;
2016  while (i != db.postlist_end("")) {
2017  TEST_EQUAL(*i, j);
2018  i++;
2019  j++;
2020  }
2021  TEST_EQUAL(j, 513);
2022 
2023  i = db.postlist_begin("");
2024  j = 1;
2025  while (i != db.postlist_end("")) {
2026  TEST_EQUAL(*i, j);
2027  i++;
2028  j++;
2029  if (j == 50) {
2030  j += 10;
2031  i.skip_to(j);
2032  }
2033  }
2034  TEST_EQUAL(j, 513);
2035 }
2036 
2038 {
2039  // Don't bother with postlist_begin() because allpostlist tests cover that.
2041  TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
2042  TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
2044 }
2045 
2046 // tests results of passing an empty term to various methods
2047 DEFINE_TESTCASE(emptyterm1, backend) {
2048  Xapian::Database db(get_database("apitest_manydocs"));
2049  TEST_EQUAL(db.get_doccount(), 512);
2051 
2052  db = get_database("apitest_onedoc");
2053  TEST_EQUAL(db.get_doccount(), 1);
2055 
2056  db = get_database("");
2057  TEST_EQUAL(db.get_doccount(), 0);
2059 }
2060 
2061 // Test for alldocs postlist with a sparse database.
2062 DEFINE_TESTCASE(alldocspl1, writable) {
2064  Xapian::Document doc;
2065  doc.set_data("5");
2066  doc.add_value(0, "5");
2067  db.replace_document(5, doc);
2068 
2070  TEST(i != db.postlist_end(""));
2071  TEST_EQUAL(*i, 5);
2072  TEST_EQUAL(i.get_doclength(), 0);
2073  TEST_EQUAL(i.get_unique_terms(), 0);
2074  TEST_EQUAL(i.get_wdf(), 1);
2075  ++i;
2076  TEST(i == db.postlist_end(""));
2077 }
2078 
2079 // Test reading and writing a modified alldocspostlist.
2080 DEFINE_TESTCASE(alldocspl2, writable) {
2081  Xapian::PostingIterator i, end;
2082  {
2084  Xapian::Document doc;
2085  doc.set_data("5");
2086  doc.add_value(0, "5");
2087  db.replace_document(5, doc);
2088 
2089  // Test iterating before committing the changes.
2090  i = db.postlist_begin("");
2091  end = db.postlist_end("");
2092  TEST(i != end);
2093  TEST_EQUAL(*i, 5);
2094  TEST_EQUAL(i.get_doclength(), 0);
2095  TEST_EQUAL(i.get_unique_terms(), 0);
2096  TEST_EQUAL(i.get_wdf(), 1);
2097  ++i;
2098  TEST(i == end);
2099 
2100  db.commit();
2101 
2102  // Test iterating after committing the changes.
2103  i = db.postlist_begin("");
2104  end = db.postlist_end("");
2105  TEST(i != end);
2106  TEST_EQUAL(*i, 5);
2107  TEST_EQUAL(i.get_doclength(), 0);
2108  TEST_EQUAL(i.get_unique_terms(), 0);
2109  TEST_EQUAL(i.get_wdf(), 1);
2110  ++i;
2111  TEST(i == end);
2112 
2113  // Add another document.
2114  doc = Xapian::Document();
2115  doc.set_data("5");
2116  doc.add_value(0, "7");
2117  db.replace_document(7, doc);
2118 
2119  // Test iterating through before committing the changes.
2120  i = db.postlist_begin("");
2121  end = db.postlist_end("");
2122  TEST(i != end);
2123  TEST_EQUAL(*i, 5);
2124  TEST_EQUAL(i.get_doclength(), 0);
2125  TEST_EQUAL(i.get_unique_terms(), 0);
2126  TEST_EQUAL(i.get_wdf(), 1);
2127  ++i;
2128  TEST(i != end);
2129  TEST_EQUAL(*i, 7);
2130  TEST_EQUAL(i.get_doclength(), 0);
2131  TEST_EQUAL(i.get_unique_terms(), 0);
2132  TEST_EQUAL(i.get_wdf(), 1);
2133  ++i;
2134  TEST(i == end);
2135 
2136  // Delete the first document.
2137  db.delete_document(5);
2138 
2139  // Test iterating through before committing the changes.
2140  i = db.postlist_begin("");
2141  end = db.postlist_end("");
2142  TEST(i != end);
2143  TEST_EQUAL(*i, 7);
2144  TEST_EQUAL(i.get_doclength(), 0);
2145  TEST_EQUAL(i.get_unique_terms(), 0);
2146  TEST_EQUAL(i.get_wdf(), 1);
2147  ++i;
2148  TEST(i == end);
2149 
2150  // Test iterating through after committing the changes, and dropping the
2151  // reference to the main DB.
2152  db.commit();
2153  i = db.postlist_begin("");
2154  end = db.postlist_end("");
2155  }
2156 
2157  TEST(i != end);
2158  TEST_EQUAL(*i, 7);
2159  TEST_EQUAL(i.get_doclength(), 0);
2160  TEST_EQUAL(i.get_unique_terms(), 0);
2161  TEST_EQUAL(i.get_wdf(), 1);
2162  ++i;
2163  TEST(i == end);
2164 }
2165 
2166 // Feature test for Query::OP_SCALE_WEIGHT.
2167 DEFINE_TESTCASE(scaleweight1, backend) {
2168  Xapian::Database db(get_database("apitest_phrase"));
2169  Xapian::Enquire enq(db);
2171 
2172  static const char * const queries[] = {
2173  "pad",
2174  "milk fridge",
2175  "leave milk on fridge",
2176  "ordered milk operator",
2177  "ordered phrase operator",
2178  "leave \"milk on fridge\"",
2179  "notpresent",
2180  "leave \"milk notpresent\"",
2181  };
2182  static const double multipliers[] = {
2183  -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2184  0, 0
2185  };
2186 
2187  for (auto qstr : queries) {
2188  tout.str(string());
2189  Xapian::Query query1 = qp.parse_query(qstr);
2190  tout << "query1: " << query1.get_description() << endl;
2191  for (const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2192  double mult = *multp;
2193  if (mult < 0) {
2196  query1, mult));
2197  continue;
2198  }
2199  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, mult);
2200  tout << "query2: " << query2.get_description() << endl;
2201 
2202  enq.set_query(query1);
2203  Xapian::MSet mset1 = enq.get_mset(0, 20);
2204  enq.set_query(query2);
2205  Xapian::MSet mset2 = enq.get_mset(0, 20);
2206 
2207  TEST_EQUAL(mset1.size(), mset2.size());
2208 
2209  Xapian::MSetIterator i1, i2;
2210  if (mult > 0) {
2211  for (i1 = mset1.begin(), i2 = mset2.begin();
2212  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2213  TEST_EQUAL_DOUBLE(i1.get_weight() * mult, i2.get_weight());
2214  TEST_EQUAL(*i1, *i2);
2215  }
2216  } else {
2217  // Weights in mset2 are 0; so it should be sorted by docid.
2218  vector<Xapian::docid> ids1;
2219  vector<Xapian::docid> ids2;
2220  for (i1 = mset1.begin(), i2 = mset2.begin();
2221  i1 != mset1.end() && i2 != mset2.end(); ++i1, ++i2) {
2222  TEST_NOT_EQUAL_DOUBLE(i1.get_weight(), 0);
2223  TEST_EQUAL_DOUBLE(i2.get_weight(), 0);
2224  ids1.push_back(*i1);
2225  ids2.push_back(*i2);
2226  }
2227  sort(ids1.begin(), ids1.end());
2228  TEST_EQUAL(ids1, ids2);
2229  }
2230  }
2231  }
2232 }
2233 
2234 // Test Query::OP_SCALE_WEIGHT being used to multiply some of the weights of a
2235 // search by zero.
2236 DEFINE_TESTCASE(scaleweight2, backend) {
2237  Xapian::Database db(get_database("apitest_phrase"));
2238  Xapian::Enquire enq(db);
2240 
2241  Xapian::Query query1("fridg");
2242  Xapian::Query query2(Xapian::Query::OP_SCALE_WEIGHT, query1, 2.5);
2243  Xapian::Query query3("milk");
2244  Xapian::Query query4(Xapian::Query::OP_SCALE_WEIGHT, query3, 0);
2245  Xapian::Query query5(Xapian::Query::OP_OR, query2, query4);
2246 
2247  // query5 should first return the same results as query1, in the same
2248  // order, and then return the results of query3 which aren't also results
2249  // of query1, in ascending docid order. We test that this happens.
2250 
2251  // First, build a vector of docids matching the first part of the query,
2252  // and append the non-duplicate docids matching the second part of the
2253  // query.
2254  vector<Xapian::docid> ids1;
2255  set<Xapian::docid> idsin1;
2256  vector<Xapian::docid> ids3;
2257 
2258  enq.set_query(query1);
2259  Xapian::MSet mset1 = enq.get_mset(0, 20);
2260  enq.set_query(query3);
2261  Xapian::MSet mset3 = enq.get_mset(0, 20);
2262  TEST_NOT_EQUAL(mset1.size(), 0);
2263  for (i = mset1.begin(); i != mset1.end(); ++i) {
2264  ids1.push_back(*i);
2265  idsin1.insert(*i);
2266  }
2267  TEST_NOT_EQUAL(mset3.size(), 0);
2268  for (i = mset3.begin(); i != mset3.end(); ++i) {
2269  if (idsin1.find(*i) != idsin1.end())
2270  continue;
2271  ids3.push_back(*i);
2272  }
2273  sort(ids3.begin(), ids3.end());
2274  ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2275 
2276  // Now, run the combined query and build a vector of the matching docids.
2277  vector<Xapian::docid> ids5;
2278  enq.set_query(query5);
2279  Xapian::MSet mset5 = enq.get_mset(0, 20);
2280  for (i = mset5.begin(); i != mset5.end(); ++i) {
2281  ids5.push_back(*i);
2282  }
2283 
2284  TEST_EQUAL(ids1, ids5);
2285 }
2286 
2287 // Regression test for bug fixed in 1.0.5 - this test would failed under
2288 // valgrind because it used an uninitialised value.
2289 DEFINE_TESTCASE(bm25weight1, backend) {
2290  Xapian::Enquire enquire(get_database("apitest_simpledata"));
2291  enquire.set_weighting_scheme(Xapian::BM25Weight(1, 25, 1, 0.01, 0.5));
2292  enquire.set_query(Xapian::Query("word"));
2293 
2294  Xapian::MSet mset = enquire.get_mset(0, 25);
2295 }
2296 
2297 // Feature test for TradWeight.
2298 DEFINE_TESTCASE(tradweight1, backend) {
2299  Xapian::Enquire enquire(get_database("apitest_simpledata"));
2301  enquire.set_query(Xapian::Query("word"));
2302 
2303  Xapian::MSet mset = enquire.get_mset(0, 25);
2304  TEST_EQUAL(mset.size(), 2);
2305 
2307  enquire.set_query(Xapian::Query("this"));
2308 
2309  mset = enquire.get_mset(0, 25);
2310  TEST_EQUAL(mset.size(), 6);
2311 
2312  // Check that TradWeight(0) means wdf and doc length really don't affect
2313  // the weights as stated in the documentation.
2314  TEST_EQUAL(mset[0].get_weight(), mset[5].get_weight());
2315 }
2316 
2317 // Test TradWeight when weighting documents using an RSet.
2318 // Simply changed the weighting scheme used by rset2 testcase.
2319 DEFINE_TESTCASE(tradweight4, backend) {
2320  Xapian::Database mydb(get_database("apitest_rset"));
2321  Xapian::Enquire enquire(mydb);
2322  Xapian::Query myquery = query(Xapian::Query::OP_OR, "cuddly", "people");
2323 
2324  enquire.set_query(myquery);
2326 
2327  Xapian::MSet mymset1 = enquire.get_mset(0, 10);
2328 
2329  Xapian::RSet myrset;
2330  myrset.add_document(2);
2331 
2332  Xapian::MSet mymset2 = enquire.get_mset(0, 10, &myrset);
2333 
2334  mset_expect_order(mymset1, 1, 2);
2335  // Document 2 should have higher weight than document 1 despite the wdf of
2336  // "people" being 1 because "people" indexes a document in the RSet whereas
2337  // "cuddly" (wdf=2) does not.
2338  mset_expect_order(mymset2, 2, 1);
2339 }
2340 
2341 // Feature test for Database::get_uuid().
2342 DEFINE_TESTCASE(uuid1, backend && !multi) {
2343  SKIP_TEST_FOR_BACKEND("inmemory");
2344  Xapian::Database db = get_database("apitest_simpledata");
2345  string uuid1 = db.get_uuid();
2346  TEST_EQUAL(uuid1.size(), 36);
2347 
2348  // A database with no sub-databases has an empty UUID.
2349  Xapian::Database db2;
2350  TEST(db2.get_uuid().empty());
2351 
2352  db2.add_database(db);
2353  TEST_EQUAL(uuid1, db2.get_uuid());
2354 
2355  // Multi-database has multiple UUIDs (we don't define the format exactly
2356  // so this assumes something about the implementation).
2357  db2.add_database(db);
2358  TEST_EQUAL(uuid1 + ":" + uuid1, db2.get_uuid());
2359 
2360 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
2361  // This relies on InMemory databases not supporting uuids.
2362  // A multi-database containing a database with no uuid has no uuid.
2363  db2.add_database(Xapian::Database(string(), Xapian::DB_BACKEND_INMEMORY));
2364  TEST(db2.get_uuid().empty());
2365 #endif
2366 }
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Definition: testutils.h:78
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
Definition: constants.h:195
ExpandDecider subclass which restrict terms to a particular prefix.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
Definition: omenquire.cc:829
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: omenquire.cc:878
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
Definition: omdatabase.cc:198
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_sort_by_relevance()
Set the sorting to be by relevance only.
Definition: omenquire.cc:863
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:850
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
static void print_mset_percentages(const Xapian::MSet &mset)
Definition: api_anydb.cc:54
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: omenquire.cc:198
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:869
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Definition: eset.h:362
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:713
Match documents which an odd number of subqueries match.
Definition: query.h:107
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:856
Class representing a stemming algorithm.
Definition: stem.h:62
PositionIterator positionlist_begin(Xapian::docid did, const std::string &tname) const
An iterator pointing to the start of the position list for a given term in a given document...
Definition: omdatabase.cc:250
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
op
Query operators.
Definition: query.h:78
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails...
Definition: testsuite.h:267
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: omenquire.cc:246
static void test_emptyterm1_helper(Xapian::Database &db)
Definition: api_anydb.cc:2037
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:300
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
double get_max_attained() const
The maximum weight attained by any document.
Definition: omenquire.cc:297
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
a generic test suite engine
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
Definition: enquire.h:601
Class representing a list of search results.
Definition: mset.h:44
void skip_to(const std::string &term)
Advance the iterator to term term.
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
static Xapian::Stem stemmer
Definition: stemtest.cc:41
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
Definition: enquire.h:591
double get_weight() const
Get the weight for the current position.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:956
test functionality of the Xapian API
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: omenquire.cc:262
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:345
Class for iterating over a list of terms.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:941
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
Xapian::doccount size() const
Return number of items in this ESet object.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: omenquire.cc:276
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
docids sort in whatever order is most efficient for the backend.
Definition: enquire.h:329
static int verbose
Definition: xapian-delve.cc:47
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
This class provides read/write access to a database.
Definition: database.h:785
DEFINE_TESTCASE(zerodocid1, backend)
Definition: api_anydb.cc:118
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:351
Scale the weight contributed by a subquery.
Definition: query.h:166
Public interfaces for the Xapian library.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: omenquire.cc:887
docids sort in ascending order (default)
Definition: enquire.h:324
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string get_dbtype()
Definition: apitest.cc:42
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
Definition: mset.h:595
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:607
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:612
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Definition: omenquire.cc:222
int percent
The percentage score for a document in an MSet.
Definition: types.h:66
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:768
static void print_mset_weights(const Xapian::MSet &mset)
Definition: api_anydb.cc:45
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Definition: queryparser.cc:161
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:514
Iterator over a Xapian::ESet.
Definition: eset.h:160
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
Definition: database.h:238
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:75
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
bool term_exists(const std::string &tname) const
Check if a given term exists in the database.
Definition: omdatabase.cc:524
double get_weight() const
Get the weight for the current position.
Definition: omenquire.cc:460
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
Match only documents which all subqueries match.
Definition: query.h:84
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
static Xapian::TermIterator test_termlist3_helper()
Definition: api_anydb.cc:1750
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
bool operator()(const string &tname) const
Do we want this term in the ESet?
Definition: api_anydb.cc:569
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:283
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:111
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Definition: omenquire.cc:206
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
Class representing a list of search results.
Definition: eset.h:43
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: omenquire.cc:450
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:624
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Definition: omenquire.cc:842
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:350
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:535
A relevance set (R-Set).
Definition: enquire.h:60
std::string get_uuid() const
Get a UUID for the database.
Definition: omdatabase.cc:776
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:269
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339
docids sort in descending order.
Definition: enquire.h:326