xapian-core  1.4.21
api_db.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2015,2016,2017,2019 Olly Betts
7  * Copyright 2006,2007,2008,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "api_db.h"
28 
29 #include <algorithm>
30 #include <fstream>
31 #include <map>
32 #include <string>
33 #include <vector>
34 #include "safenetdb.h" // For gai_strerror().
35 #include "safesysstat.h" // For mkdir().
36 #include "safeunistd.h" // For sleep().
37 
38 #include <xapian.h>
39 
40 #include "backendmanager.h"
41 #include "testsuite.h"
42 #include "testutils.h"
43 #include "unixcmds.h"
44 
45 #include "apitest.h"
46 
47 using namespace std;
48 
49 static Xapian::Query
50 query(const string &t)
51 {
52  return Xapian::Query(Xapian::Stem("english")(t));
53 }
54 
55 // #######################################################################
56 // # Tests start here
57 
58 // tests Xapian::Database::get_termfreq() and Xapian::Database::term_exists()
59 DEFINE_TESTCASE(termstats, backend) {
60  Xapian::Database db(get_database("apitest_simpledata"));
61 
62  TEST(!db.term_exists("corn"));
63  TEST_EQUAL(db.get_termfreq("corn"), 0);
64  TEST(db.term_exists("banana"));
65  TEST_EQUAL(db.get_termfreq("banana"), 1);
66  TEST(db.term_exists("paragraph"));
67  TEST_EQUAL(db.get_termfreq("paragraph"), 5);
68 }
69 
70 // Check that stub databases work.
71 DEFINE_TESTCASE(stubdb1, path) {
72  mkdir(".stub", 0755);
73  const char * dbpath = ".stub/stubdb1";
74  ofstream out(dbpath);
75  TEST(out.is_open());
76  out << "auto ../" << get_database_path("apitest_simpledata") << endl;
77  out.close();
78 
79  {
81  Xapian::Enquire enquire(db);
82  enquire.set_query(Xapian::Query("word"));
83  enquire.get_mset(0, 10);
84  }
85  {
86  Xapian::Database db(dbpath);
87  Xapian::Enquire enquire(db);
88  enquire.set_query(Xapian::Query("word"));
89  enquire.get_mset(0, 10);
90  }
91 
93 }
94 
95 // Check that stub databases work remotely.
96 DEFINE_TESTCASE(stubdb2, path) {
97  mkdir(".stub", 0755);
98  const char * dbpath = ".stub/stubdb2";
99  ofstream out(dbpath);
100  TEST(out.is_open());
101  out << "remote :" << BackendManager::get_xapian_progsrv_command()
102  << ' ' << get_database_path("apitest_simpledata") << endl;
103  out.close();
104 
105  try {
107  Xapian::Enquire enquire(db);
108  enquire.set_query(Xapian::Query("word"));
109  enquire.get_mset(0, 10);
111 #ifdef XAPIAN_HAS_REMOTE_BACKEND
112  throw;
113 #endif
114  }
115 
116  try {
117  Xapian::Database db(dbpath);
118  Xapian::Enquire enquire(db);
119  enquire.set_query(Xapian::Query("word"));
120  enquire.get_mset(0, 10);
122 #ifdef XAPIAN_HAS_REMOTE_BACKEND
123  throw;
124 #endif
125  }
126 
127  out.open(dbpath);
128  TEST(out.is_open());
129  out << "remote" << endl;
130  out.close();
131 
132  // Quietly ignored prior to 1.4.1.
135  );
136 
137  // Quietly ignored prior to 1.4.1.
140  );
141 
142 #ifdef XAPIAN_HAS_REMOTE_BACKEND
143 # define EXPECTED_EXCEPTION Xapian::DatabaseOpeningError
144 #else
145 # define EXPECTED_EXCEPTION Xapian::FeatureUnavailableError
146 #endif
147 
148  out.open(dbpath);
149  TEST(out.is_open());
150  out << "remote foo" << endl;
151  out.close();
152 
153  // Quietly ignored prior to 1.4.1.
156  );
157 
158  // Quietly ignored prior to 1.4.1.
161  );
162 
163 #ifdef XAPIAN_HAS_REMOTE_BACKEND
164  out.open(dbpath);
165  TEST(out.is_open());
166  out << "remote [::1]:65535" << endl;
167  out.close();
168 
169  try {
171  } catch (const Xapian::NetworkError& e) {
172  // 1.4.0 threw (on Linux) the confusing message:
173  // NetworkError: Couldn't resolve host [ (context: remote:tcp([:0)) (No address associated with hostname)
174  // 1.4.1 throws (because we don't actually support IPv6 yet) on Linux (EAI_ADDRFAMILY):
175  // NetworkError: Couldn't resolve host ::1 (context: remote:tcp(::1:65535)) (nodename nor servname provided, or not known)
176  // or on macOS (EAI_NONAME):
177  // NetworkError: Couldn't resolve host ::1 (context: remote:tcp(::1:65535)) (Address family for hostname not supported)
178  //
179  // But NetBSD seems to resolve ::1 to an IPv4 address and then tries
180  // to connect to it (which hopefully fails), so just test the message
181  // doesn't match the bad 1.4.0 result.
182  TEST(e.get_msg().find("host [") == string::npos);
183  }
184 
185  try {
187  } catch (const Xapian::NetworkError& e) {
188  // 1.4.0 threw (Linux):
189  // NetworkError: Couldn't resolve host [ (context: remote:tcp([:0)) (No address associated with hostname)
190  // 1.4.1 throws (because we don't actually support IPv6 yet) on Linux (EAI_ADDRFAMILY):
191  // NetworkError: Couldn't resolve host ::1 (context: remote:tcp(::1:65535)) (nodename nor servname provided, or not known)
192  // or on macOS (EAI_NONAME):
193  // NetworkError: Couldn't resolve host ::1 (context: remote:tcp(::1:65535)) (Address family for hostname not supported)
194  // So we test the message instead of the error string for portability.
195  //
196  // But NetBSD seems to resolve ::1 to an IPv4 address and then tries
197  // to connect to it (which hopefully fails), so just test the message
198  // doesn't match the bad 1.4.0 result.
199  TEST(e.get_msg().find("host [") == string::npos);
200  }
201 #endif
202 
203  out.open(dbpath);
204  TEST(out.is_open());
205  // Invalid - the port number is required.
206  out << "remote [::1]" << endl;
207  out.close();
208 
209  // 1.4.0 threw:
210  // NetworkError: Couldn't resolve host [ (context: remote:tcp([:0)) (No address associated with hostname)
213  );
214 
215  // 1.4.0 threw:
216  // NetworkError: Couldn't resolve host [ (context: remote:tcp([:0)) (No address associated with hostname)
219  );
220 }
221 
222 // Regression test - bad entries were ignored after a good entry prior to 1.0.8.
223 DEFINE_TESTCASE(stubdb3, path) {
224  mkdir(".stub", 0755);
225  const char * dbpath = ".stub/stubdb3";
226  ofstream out(dbpath);
227  TEST(out.is_open());
228  out << "auto ../" << get_database_path("apitest_simpledata") << "\n"
229  "bad line here\n";
230  out.close();
231 
234 
236  Xapian::Database db(dbpath));
237 }
238 
239 // Test a stub database with just a bad entry.
240 DEFINE_TESTCASE(stubdb4, !backend) {
241  mkdir(".stub", 0755);
242  const char * dbpath = ".stub/stubdb4";
243  ofstream out(dbpath);
244  TEST(out.is_open());
245  out << "bad line here\n";
246  out.close();
247 
250 
252  Xapian::Database db(dbpath));
253 }
254 
255 // Test a stub database with a bad entry with no spaces (prior to 1.1.0 this
256 // was deliberately allowed, though not documented.
257 DEFINE_TESTCASE(stubdb5, path) {
258  mkdir(".stub", 0755);
259  const char * dbpath = ".stub/stubdb5";
260  ofstream out(dbpath);
261  TEST(out.is_open());
262  out << "bad\n"
263  "auto ../" << get_database_path("apitest_simpledata") << endl;
264  out.close();
265 
268 
270  Xapian::Database db(dbpath));
271 }
272 
273 // Test a stub database with an inmemory database (new feature in 1.1.0).
274 DEFINE_TESTCASE(stubdb6, inmemory) {
275  mkdir(".stub", 0755);
276  const char * dbpath = ".stub/stubdb6";
277  ofstream out(dbpath);
278  TEST(out.is_open());
279  out << "inmemory\n";
280  out.close();
281 
282  // Read-only tests:
283  {
285  TEST_EQUAL(db.get_doccount(), 0);
286  Xapian::Enquire enquire(db);
287  enquire.set_query(Xapian::Query("word"));
288  Xapian::MSet mset = enquire.get_mset(0, 10);
289  TEST(mset.empty());
290  }
291  {
292  Xapian::Database db(dbpath);
293  TEST_EQUAL(db.get_doccount(), 0);
294  Xapian::Enquire enquire(db);
295  enquire.set_query(Xapian::Query("word"));
296  Xapian::MSet mset = enquire.get_mset(0, 10);
297  TEST(mset.empty());
298  }
299 
300  // Writable tests:
301  {
302  Xapian::WritableDatabase db(dbpath,
304  TEST_EQUAL(db.get_doccount(), 0);
306  TEST_EQUAL(db.get_doccount(), 1);
307  }
308  {
309  Xapian::WritableDatabase db(dbpath,
311  TEST_EQUAL(db.get_doccount(), 0);
313  TEST_EQUAL(db.get_doccount(), 1);
314  }
315 }
316 
318 // Regression test - in 1.4.3 and earlier this threw
319 // Xapian::DatabaseError.
320 DEFINE_TESTCASE(stubdb8, inmemory) {
321  mkdir(".stub", 0755);
322  const char * dbpath = ".stub/stubdb8";
323  ofstream out(dbpath);
324  TEST(out.is_open());
325  out << "inmemory\n";
326  out.close();
327 
328  try {
329  Xapian::Database::check(dbpath);
330  FAIL_TEST("Managed to check inmemory stub");
331  } catch (const Xapian::UnimplementedError& e) {
332  // Check the message is appropriate.
334  "InMemory database checking not implemented");
335  }
336 }
337 
339 DEFINE_TESTCASE(stubdb9, path) {
340  mkdir(".stub", 0755);
341  const char * dbpath = ".stub/stubdb9";
342  ofstream out(dbpath);
343  TEST(out.is_open());
344  out << "remote :" << BackendManager::get_xapian_progsrv_command()
345  << ' ' << get_database_path("apitest_simpledata") << endl;
346  out.close();
347 
348  try {
349  Xapian::Database::check(dbpath);
350  FAIL_TEST("Managed to check remote stub");
351  } catch (const Xapian::UnimplementedError& e) {
352  // Check the message is appropriate.
354  "Remote database checking not implemented");
355  }
356 }
357 
358 #if 0 // the "force error" mechanism is no longer in place...
359 class MyErrorHandler : public Xapian::ErrorHandler {
360  public:
361  int count;
362 
363  bool handle_error(Xapian::Error & error) {
364  ++count;
365  tout << "Error handling caught: " << error.get_description()
366  << ", count is now " << count << "\n";
367  return true;
368  }
369 
370  MyErrorHandler() : count (0) {}
371 };
372 
373 // tests error handler in multimatch().
374 DEFINE_TESTCASE(multierrhandler1, backend) {
375  MyErrorHandler myhandler;
376 
377  Xapian::Database mydb2(get_database("apitest_simpledata"));
378  Xapian::Database mydb3(get_database("apitest_simpledata2"));
379  int errcount = 1;
380  for (int testcount = 0; testcount < 14; testcount ++) {
381  tout << "testcount=" << testcount << "\n";
382  Xapian::Database mydb4(get_database("-e", "apitest_termorder"));
383  Xapian::Database mydb5(get_network_database("apitest_termorder", 1));
384  Xapian::Database mydb6(get_database("-e2", "apitest_termorder"));
385  Xapian::Database mydb7(get_database("-e3", "apitest_simpledata"));
386 
387  Xapian::Database dbs;
388  switch (testcount) {
389  case 0:
390  dbs.add_database(mydb2);
391  dbs.add_database(mydb3);
392  dbs.add_database(mydb4);
393  break;
394  case 1:
395  dbs.add_database(mydb4);
396  dbs.add_database(mydb2);
397  dbs.add_database(mydb3);
398  break;
399  case 2:
400  dbs.add_database(mydb3);
401  dbs.add_database(mydb4);
402  dbs.add_database(mydb2);
403  break;
404  case 3:
405  dbs.add_database(mydb2);
406  dbs.add_database(mydb3);
407  dbs.add_database(mydb5);
408  sleep(1);
409  break;
410  case 4:
411  dbs.add_database(mydb5);
412  dbs.add_database(mydb2);
413  dbs.add_database(mydb3);
414  sleep(1);
415  break;
416  case 5:
417  dbs.add_database(mydb3);
418  dbs.add_database(mydb5);
419  dbs.add_database(mydb2);
420  sleep(1);
421  break;
422  case 6:
423  dbs.add_database(mydb2);
424  dbs.add_database(mydb3);
425  dbs.add_database(mydb6);
426  break;
427  case 7:
428  dbs.add_database(mydb6);
429  dbs.add_database(mydb2);
430  dbs.add_database(mydb3);
431  break;
432  case 8:
433  dbs.add_database(mydb3);
434  dbs.add_database(mydb6);
435  dbs.add_database(mydb2);
436  break;
437  case 9:
438  dbs.add_database(mydb2);
439  dbs.add_database(mydb3);
440  dbs.add_database(mydb7);
441  break;
442  case 10:
443  dbs.add_database(mydb7);
444  dbs.add_database(mydb2);
445  dbs.add_database(mydb3);
446  break;
447  case 11:
448  dbs.add_database(mydb3);
449  dbs.add_database(mydb7);
450  dbs.add_database(mydb2);
451  break;
452  case 12:
453  dbs.add_database(mydb2);
454  dbs.add_database(mydb6);
455  dbs.add_database(mydb7);
456  break;
457  case 13:
458  dbs.add_database(mydb2);
459  dbs.add_database(mydb7);
460  dbs.add_database(mydb6);
461  break;
462  }
463  tout << "db=" << dbs << "\n";
464  Xapian::Enquire enquire(dbs, &myhandler);
465 
466  // make a query
467  Xapian::Query myquery = query(Xapian::Query::OP_OR, "inmemory", "word");
469  enquire.set_query(myquery);
470 
471  tout << "query=" << myquery << "\n";
472  // retrieve the top ten results
473  Xapian::MSet mymset = enquire.get_mset(0, 10);
474 
475  switch (testcount) {
476  case 0: case 3: case 6: case 9:
477  mset_expect_order(mymset, 2, 4, 10);
478  break;
479  case 1: case 4: case 7: case 10:
480  mset_expect_order(mymset, 3, 5, 11);
481  break;
482  case 2: case 5: case 8: case 11:
483  mset_expect_order(mymset, 1, 6, 12);
484  break;
485  case 12:
486  case 13:
487  mset_expect_order(mymset, 4, 10);
488  errcount += 1;
489  break;
490  }
491  TEST_EQUAL(myhandler.count, errcount);
492  errcount += 1;
493  }
494 }
495 #endif
496 
498  string needle;
499  public:
500  explicit GrepMatchDecider(const string& needle_)
501  : needle(needle_) {}
502 
503  bool operator()(const Xapian::Document &doc) const {
504  // Note that this is not recommended usage of get_data()
505  return doc.get_data().find(needle) != string::npos;
506  }
507 };
508 
509 // Test Xapian::MatchDecider functor.
510 DEFINE_TESTCASE(matchdecider1, backend && !remote) {
511  Xapian::Database db(get_database("apitest_simpledata"));
512  Xapian::Enquire enquire(db);
513  enquire.set_query(Xapian::Query("this"));
514 
515  GrepMatchDecider myfunctor("This is");
516 
517  Xapian::MSet mymset = enquire.get_mset(0, 100, 0, &myfunctor);
518 
519  vector<bool> docid_checked(db.get_lastdocid());
520 
521  // Check that we get the expected number of matches, and that they
522  // satisfy the condition.
523  Xapian::MSetIterator i = mymset.begin();
524  TEST(i != mymset.end());
525  TEST_EQUAL(mymset.size(), 3);
526  TEST_EQUAL(mymset.get_matches_lower_bound(), 3);
527  TEST_EQUAL(mymset.get_matches_upper_bound(), 3);
528  TEST_EQUAL(mymset.get_matches_estimated(), 3);
532  for ( ; i != mymset.end(); ++i) {
533  const Xapian::Document doc(i.get_document());
534  TEST(myfunctor(doc));
535  docid_checked[*i] = true;
536  }
537 
538  // Check that there are some documents which aren't accepted by the match
539  // decider.
540  mymset = enquire.get_mset(0, 100);
541  TEST(mymset.size() > 3);
542 
543  // Check that the bounds are appropriate even if we don't ask for any
544  // actual matches.
545  mymset = enquire.get_mset(0, 0, 0, &myfunctor);
546  TEST_EQUAL(mymset.size(), 0);
547  TEST_EQUAL(mymset.get_matches_lower_bound(), 0);
548  TEST_EQUAL(mymset.get_matches_upper_bound(), 6);
549  TEST_REL(mymset.get_matches_estimated(),>,0);
550  TEST_REL(mymset.get_matches_estimated(),<=,6);
551  TEST_EQUAL(mymset.get_uncollapsed_matches_lower_bound(), 0);
552  TEST_EQUAL(mymset.get_uncollapsed_matches_upper_bound(), 6);
553  TEST_REL(mymset.get_uncollapsed_matches_estimated(),>,0);
554  TEST_REL(mymset.get_uncollapsed_matches_estimated(),<=,6);
555 
556  // Check that the bounds are appropriate if we ask for only one hit.
557  // (Regression test - until SVN 10256, we didn't reduce the lower_bound
558  // appropriately, and returned 6 here.)
559  mymset = enquire.get_mset(0, 1, 0, &myfunctor);
560  TEST_EQUAL(mymset.size(), 1);
561  TEST_REL(mymset.get_matches_lower_bound(),>=,1);
562  TEST_REL(mymset.get_matches_lower_bound(),<=,3);
563  TEST_REL(mymset.get_matches_upper_bound(),>=,3);
564  TEST_REL(mymset.get_matches_upper_bound(),<=,6);
565  TEST_REL(mymset.get_matches_estimated(),>,0);
566  TEST_REL(mymset.get_matches_estimated(),<=,6);
567  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,1);
568  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),<=,3);
569  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),>=,3);
570  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),<=,6);
571  TEST_REL(mymset.get_uncollapsed_matches_estimated(),>,0);
572  TEST_REL(mymset.get_uncollapsed_matches_estimated(),<=,6);
573 
574  // Check that the other documents don't satisfy the condition.
575  for (Xapian::docid did = 1; did < docid_checked.size(); ++did) {
576  if (!docid_checked[did]) {
577  TEST(!myfunctor(db.get_document(did)));
578  }
579  }
580 
581  // Check that the bounds are appropriate if a collapse key is used.
582  // Use a value which is never set so we don't actually discard anything.
583  enquire.set_collapse_key(99);
584  mymset = enquire.get_mset(0, 1, 0, &myfunctor);
585  TEST_EQUAL(mymset.size(), 1);
586  TEST_REL(mymset.get_matches_lower_bound(),>=,1);
587  TEST_REL(mymset.get_matches_lower_bound(),<=,3);
588  TEST_REL(mymset.get_matches_upper_bound(),>=,3);
589  TEST_REL(mymset.get_matches_upper_bound(),<=,6);
590  TEST_REL(mymset.get_matches_estimated(),>,0);
591  TEST_REL(mymset.get_matches_estimated(),<=,6);
592  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,1);
593  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),<=,3);
594  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),>=,3);
595  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),<=,6);
596  TEST_REL(mymset.get_uncollapsed_matches_estimated(),>,0);
597  TEST_REL(mymset.get_uncollapsed_matches_estimated(),<=,6);
598 
599  // Check that the bounds are appropriate if a percentage cutoff is in
600  // use. Set a 1% threshold so we don't actually discard anything.
602  enquire.set_cutoff(1);
603  mymset = enquire.get_mset(0, 1, 0, &myfunctor);
604  TEST_EQUAL(mymset.size(), 1);
605  TEST_REL(mymset.get_matches_lower_bound(),>=,1);
606  TEST_REL(mymset.get_matches_lower_bound(),<=,3);
607  TEST_REL(mymset.get_matches_upper_bound(),>=,3);
608  TEST_REL(mymset.get_matches_upper_bound(),<=,6);
609  TEST_REL(mymset.get_matches_estimated(),>,0);
610  TEST_REL(mymset.get_matches_estimated(),<=,6);
611  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,1);
612  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),<=,3);
613  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),>=,3);
614  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),<=,6);
615  TEST_REL(mymset.get_uncollapsed_matches_estimated(),>,0);
616  TEST_REL(mymset.get_uncollapsed_matches_estimated(),<=,6);
617 
618  // And now with both a collapse key and percentage cutoff.
619  enquire.set_collapse_key(99);
620  mymset = enquire.get_mset(0, 1, 0, &myfunctor);
621  TEST_EQUAL(mymset.size(), 1);
622  TEST_REL(mymset.get_matches_lower_bound(),>=,1);
623  TEST_REL(mymset.get_matches_lower_bound(),<=,3);
624  TEST_REL(mymset.get_matches_upper_bound(),>=,3);
625  TEST_REL(mymset.get_matches_upper_bound(),<=,6);
626  TEST_REL(mymset.get_matches_estimated(),>,0);
627  TEST_REL(mymset.get_matches_estimated(),<=,6);
628  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),>=,1);
629  TEST_REL(mymset.get_uncollapsed_matches_lower_bound(),<=,3);
630  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),>=,3);
631  TEST_REL(mymset.get_uncollapsed_matches_upper_bound(),<=,6);
632  TEST_REL(mymset.get_uncollapsed_matches_estimated(),>,0);
633  TEST_REL(mymset.get_uncollapsed_matches_estimated(),<=,6);
634 }
635 
636 // Test Xapian::MatchDecider functor used as a match spy.
637 DEFINE_TESTCASE(matchdecider2, backend && !remote) {
638  Xapian::Database db(get_database("apitest_simpledata"));
639  Xapian::Enquire enquire(db);
640  enquire.set_query(Xapian::Query("this"));
641 
642  GrepMatchDecider myfunctor("This is");
643 
644  Xapian::MSet mymset = enquire.get_mset(0, 100, 0, NULL, &myfunctor);
645 
646  vector<bool> docid_checked(db.get_lastdocid());
647 
648  // Check that we get the expected number of matches, and that they
649  // satisfy the condition.
650  Xapian::MSetIterator i = mymset.begin();
651  TEST(i != mymset.end());
652  TEST_EQUAL(mymset.size(), 3);
653  for ( ; i != mymset.end(); ++i) {
654  const Xapian::Document doc(i.get_document());
655  TEST(myfunctor(doc));
656  docid_checked[*i] = true;
657  }
658 
659  // Check that the other documents don't satisfy the condition.
660  for (Xapian::docid did = 1; did < docid_checked.size(); ++did) {
661  if (!docid_checked[did]) {
662  TEST(!myfunctor(db.get_document(did)));
663  }
664  }
665 }
666 
667 // Regression test for lower bound using functor, sorting and collapsing.
668 DEFINE_TESTCASE(matchdecider3, backend && !remote) {
669  Xapian::Database db(get_database("etext"));
670  Xapian::Enquire enquire(db);
671  enquire.set_query(Xapian::Query(""));
672  enquire.set_collapse_key(12);
673  enquire.set_sort_by_value(11, true);
674 
675  GrepMatchDecider myfunctor("We produce");
676 
677  Xapian::MSet mset1 = enquire.get_mset(0, 2, 0, NULL, &myfunctor);
678  Xapian::MSet mset2 = enquire.get_mset(0, 1000, 0, NULL, &myfunctor);
679 
680  // mset2 should contain all the hits, so the statistics should be exact.
681  TEST_EQUAL(mset2.get_matches_estimated(), mset2.size());
684 
687 
688  // Check that the lower bound in mset1 is not greater than the known
689  // number of hits. This failed until revision 10811.
690  TEST_REL(mset1.get_matches_lower_bound(),<=,mset2.size());
691 
692  // Check that the bounds for mset1 make sense.
695  TEST_REL(mset1.size(),<=,mset1.get_matches_upper_bound());
696 
699 
700  // The uncollapsed match would match all documents but the one the
701  // matchdecider rejects.
706 }
707 
708 // tests that mset iterators on msets compare correctly.
709 DEFINE_TESTCASE(msetiterator1, backend) {
710  Xapian::Enquire enquire(get_database("apitest_simpledata"));
711  enquire.set_query(Xapian::Query("this"));
712  Xapian::MSet mymset = enquire.get_mset(0, 2);
713 
715  j = mymset.begin();
716  Xapian::MSetIterator k = mymset.end();
719  Xapian::MSetIterator n = mymset.begin();
720  Xapian::MSetIterator o = mymset.begin();
721  TEST_NOT_EQUAL(j, k);
722  TEST_NOT_EQUAL(l, m);
723  TEST_EQUAL(k, m);
724  TEST_EQUAL(j, l);
725  TEST_EQUAL(j, j);
726  TEST_EQUAL(k, k);
727 
728  k = j;
729  TEST_EQUAL(j, k);
730  TEST_EQUAL(j, o);
731  k++;
732  TEST_NOT_EQUAL(j, k);
733  TEST_NOT_EQUAL(k, l);
734  TEST_NOT_EQUAL(k, m);
735  TEST_NOT_EQUAL(k, o);
736  o++;
737  TEST_EQUAL(k, o);
738  k++;
739  TEST_NOT_EQUAL(j, k);
740  TEST_NOT_EQUAL(k, l);
741  TEST_EQUAL(k, m);
742  TEST_EQUAL(n, l);
743 
744  n = m;
745  TEST_NOT_EQUAL(n, l);
746  TEST_EQUAL(n, m);
747  TEST_NOT_EQUAL(n, mymset.begin());
748  TEST_EQUAL(n, mymset.end());
749 }
750 
751 // tests that mset iterators on empty msets compare equal.
752 DEFINE_TESTCASE(msetiterator2, backend) {
753  Xapian::Enquire enquire(get_database("apitest_simpledata"));
754  enquire.set_query(Xapian::Query("this"));
755  Xapian::MSet mymset = enquire.get_mset(0, 0);
756 
757  Xapian::MSetIterator j = mymset.begin();
758  Xapian::MSetIterator k = mymset.end();
761  TEST_EQUAL(j, k);
762  TEST_EQUAL(l, m);
763  TEST_EQUAL(k, m);
764  TEST_EQUAL(j, l);
765  TEST_EQUAL(j, j);
766  TEST_EQUAL(k, k);
767 }
768 
769 // tests that begin().get_document() works when first != 0
770 DEFINE_TESTCASE(msetiterator3, backend) {
771  Xapian::Database mydb(get_database("apitest_simpledata"));
772  Xapian::Enquire enquire(mydb);
773  enquire.set_query(Xapian::Query("this"));
774 
775  Xapian::MSet mymset = enquire.get_mset(2, 10);
776 
777  TEST(!mymset.empty());
778  Xapian::Document doc(mymset.begin().get_document());
779  TEST(!doc.get_data().empty());
780 }
781 
782 // tests that eset iterators on empty esets compare equal.
783 DEFINE_TESTCASE(esetiterator1, backend) {
784  Xapian::Enquire enquire(get_database("apitest_simpledata"));
785  enquire.set_query(Xapian::Query("this"));
786 
787  Xapian::MSet mymset = enquire.get_mset(0, 10);
788  TEST(mymset.size() >= 2);
789 
790  Xapian::RSet myrset;
791  Xapian::MSetIterator i = mymset.begin();
792  myrset.add_document(*i);
793  myrset.add_document(*(++i));
794 
795  Xapian::ESet myeset = enquire.get_eset(2, myrset);
797  j = myeset.begin();
798  Xapian::ESetIterator k = myeset.end();
801  Xapian::ESetIterator n = myeset.begin();
802 
803  TEST_NOT_EQUAL(j, k);
804  TEST_NOT_EQUAL(l, m);
805  TEST_EQUAL(k, m);
806  TEST_EQUAL(j, l);
807  TEST_EQUAL(j, j);
808  TEST_EQUAL(k, k);
809 
810  k = j;
811  TEST_EQUAL(j, k);
812  k++;
813  TEST_NOT_EQUAL(j, k);
814  TEST_NOT_EQUAL(k, l);
815  TEST_NOT_EQUAL(k, m);
816  k++;
817  TEST_NOT_EQUAL(j, k);
818  TEST_NOT_EQUAL(k, l);
819  TEST_EQUAL(k, m);
820  TEST_EQUAL(n, l);
821 
822  n = m;
823  TEST_NOT_EQUAL(n, l);
824  TEST_EQUAL(n, m);
825  TEST_NOT_EQUAL(n, myeset.begin());
826  TEST_EQUAL(n, myeset.end());
827 }
828 
829 // tests that eset iterators on empty esets compare equal.
830 DEFINE_TESTCASE(esetiterator2, backend) {
831  Xapian::Enquire enquire(get_database("apitest_simpledata"));
832  enquire.set_query(Xapian::Query("this"));
833 
834  Xapian::MSet mymset = enquire.get_mset(0, 10);
835  TEST(mymset.size() >= 2);
836 
837  Xapian::RSet myrset;
838  Xapian::MSetIterator i = mymset.begin();
839  myrset.add_document(*i);
840  myrset.add_document(*(++i));
841 
842  Xapian::ESet myeset = enquire.get_eset(0, myrset);
843  Xapian::ESetIterator j = myeset.begin();
844  Xapian::ESetIterator k = myeset.end();
847  TEST_EQUAL(j, k);
848  TEST_EQUAL(l, m);
849  TEST_EQUAL(k, m);
850  TEST_EQUAL(j, l);
851  TEST_EQUAL(j, j);
852  TEST_EQUAL(k, k);
853 }
854 
855 // tests the collapse-on-key
856 DEFINE_TESTCASE(collapsekey1, backend) {
857  Xapian::Enquire enquire(get_database("apitest_simpledata"));
858  enquire.set_query(Xapian::Query("this"));
859 
860  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
861  Xapian::doccount mymsize1 = mymset1.size();
862 
863  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
864  enquire.set_collapse_key(value_no);
865  Xapian::MSet mymset = enquire.get_mset(0, 100);
866 
867  TEST_AND_EXPLAIN(mymsize1 > mymset.size(),
868  "Had no fewer items when performing collapse: don't know whether it worked.");
869 
870  map<string, Xapian::docid> values;
871  Xapian::MSetIterator i = mymset.begin();
872  for ( ; i != mymset.end(); ++i) {
873  string value = i.get_document().get_value(value_no);
874  TEST(values[value] == 0 || value.empty());
875  values[value] = *i;
876  }
877  }
878 }
879 
880 // tests that collapse-on-key modifies the predicted bounds for the number of
881 // matches appropriately.
882 DEFINE_TESTCASE(collapsekey2, backend) {
883  SKIP_TEST("Don't have a suitable database currently");
884  // FIXME: this needs an appropriate database creating, but that's quite
885  // subtle to do it seems.
886  Xapian::Enquire enquire(get_database("apitest_simpledata2"));
887  enquire.set_query(Xapian::Query("this"));
888 
889  Xapian::MSet mset1 = enquire.get_mset(0, 1);
890 
891  // Test that if no duplicates are found, then the upper bound remains
892  // unchanged and the lower bound drops.
893  {
894  enquire.set_query(Xapian::Query("this"));
895  Xapian::valueno value_no = 3;
896  enquire.set_collapse_key(value_no);
897  Xapian::MSet mset = enquire.get_mset(0, 1);
898 
901  }
902 }
903 
904 // tests that collapse-on-key modifies the predicted bounds for the number of
905 // matches appropriately.
906 DEFINE_TESTCASE(collapsekey3, backend) {
907  Xapian::Enquire enquire(get_database("apitest_simpledata"));
908  enquire.set_query(Xapian::Query("this"));
909 
910  Xapian::MSet mymset1 = enquire.get_mset(0, 3);
911 
912  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
913  enquire.set_collapse_key(value_no);
914  Xapian::MSet mymset = enquire.get_mset(0, 3);
915 
917  "Lower bound was not lower when performing collapse: don't know whether it worked.");
919  "Upper bound was not lower when performing collapse: don't know whether it worked.");
920 
921  map<string, Xapian::docid> values;
922  Xapian::MSetIterator i = mymset.begin();
923  for ( ; i != mymset.end(); ++i) {
924  string value = i.get_document().get_value(value_no);
925  TEST(values[value] == 0 || value.empty());
926  values[value] = *i;
927  }
928  }
929 
930  // Test that if the collapse value is always empty, then the upper bound
931  // remains unchanged, and the lower bound is the same or lower (it can be
932  // lower because the matcher counts the number of documents with empty
933  // collapse keys, but may have rejected a document because its weight is
934  // too low for the proto-MSet before it even looks at its collapse key).
935  {
936  Xapian::valueno value_no = 1000;
937  enquire.set_collapse_key(value_no);
938  Xapian::MSet mymset = enquire.get_mset(0, 3);
939 
942 
943  map<string, Xapian::docid> values;
944  Xapian::MSetIterator i = mymset.begin();
945  for ( ; i != mymset.end(); ++i) {
946  string value = i.get_document().get_value(value_no);
947  TEST(values[value] == 0 || value.empty());
948  values[value] = *i;
949  }
950  }
951 }
952 
953 // tests that collapse-on-key modifies the predicted bounds for the number of
954 // matches appropriately even when no results are requested.
955 DEFINE_TESTCASE(collapsekey4, backend) {
956  Xapian::Enquire enquire(get_database("apitest_simpledata"));
957  enquire.set_query(Xapian::Query("this"));
958 
959  Xapian::MSet mymset1 = enquire.get_mset(0, 0);
960 
961  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
962  enquire.set_collapse_key(value_no);
963  Xapian::MSet mymset = enquire.get_mset(0, 0);
964 
966  "Lower bound was not 1 when performing collapse but not asking for any results.");
968  "Upper bound was changed when performing collapse but not asking for any results.");
969 
970  map<string, Xapian::docid> values;
971  Xapian::MSetIterator i = mymset.begin();
972  for ( ; i != mymset.end(); ++i) {
973  string value = i.get_document().get_value(value_no);
974  TEST(values[value] == 0 || value.empty());
975  values[value] = *i;
976  }
977  }
978 }
979 
980 // test for keepalives
981 DEFINE_TESTCASE(keepalive1, remote) {
982  Xapian::Database db(get_remote_database("apitest_simpledata", 5000));
983 
984  /* Test that keep-alives work */
985  for (int i = 0; i < 10; ++i) {
986  sleep(2);
987  db.keep_alive();
988  }
989  Xapian::Enquire enquire(db);
990  enquire.set_query(Xapian::Query("word"));
991  enquire.get_mset(0, 10);
992 
993  /* Test that things break without keepalives */
994  sleep(10);
995  enquire.set_query(Xapian::Query("word"));
996  /* Currently this can throw NetworkError or NetworkTimeoutError (which is
997  * a subclass of NetworkError).
998  */
1000  enquire.get_mset(0, 10));
1001 }
1002 
1003 // test that iterating through all terms in a database works.
1004 DEFINE_TESTCASE(allterms1, backend) {
1005  Xapian::Database db(get_database("apitest_allterms"));
1007  TEST(ati != db.allterms_end());
1008  TEST_EQUAL(*ati, "one");
1009  TEST_EQUAL(ati.get_termfreq(), 1);
1010 
1011  Xapian::TermIterator ati2 = ati;
1012 
1013  ati++;
1014  TEST(ati != db.allterms_end());
1015  if (verbose) {
1016  tout << "*ati = '" << *ati << "'\n";
1017  tout << "*ati.length = '" << (*ati).length() << "'\n";
1018  tout << "*ati == \"one\" = " << (*ati == "one") << "\n";
1019  tout << "*ati[3] = " << ((*ati)[3]) << "\n";
1020  tout << "*ati = '" << *ati << "'\n";
1021  }
1022  TEST(*ati == "three");
1023  TEST(ati.get_termfreq() == 3);
1024 
1025 #if 0
1026  TEST(ati2 != db.allterms_end());
1027  TEST(*ati2 == "one");
1028  TEST(ati2.get_termfreq() == 1);
1029 #endif
1030 
1031  ++ati;
1032 #if 0
1033  ++ati2;
1034 #endif
1035  TEST(ati != db.allterms_end());
1036  TEST(*ati == "two");
1037  TEST(ati.get_termfreq() == 2);
1038 
1039 #if 0
1040  TEST(ati2 != db.allterms_end());
1041  TEST(*ati2 == "three");
1042  TEST(ati2.get_termfreq() == 3);
1043 #endif
1044 
1045  ati++;
1046  TEST(ati == db.allterms_end());
1047 }
1048 
1049 // test that iterating through all terms in two databases works.
1050 DEFINE_TESTCASE(allterms2, backend) {
1051  Xapian::Database db;
1052  db.add_database(get_database("apitest_allterms"));
1053  db.add_database(get_database("apitest_allterms2"));
1055 
1056  TEST(ati != db.allterms_end());
1057  TEST(*ati == "five");
1058  TEST(ati.get_termfreq() == 2);
1059  ati++;
1060 
1061  TEST(ati != db.allterms_end());
1062  TEST(*ati == "four");
1063  TEST(ati.get_termfreq() == 1);
1064 
1065  ati++;
1066  TEST(ati != db.allterms_end());
1067  TEST(*ati == "one");
1068  TEST(ati.get_termfreq() == 1);
1069 
1070  ++ati;
1071  TEST(ati != db.allterms_end());
1072  TEST(*ati == "six");
1073  TEST(ati.get_termfreq() == 3);
1074 
1075  ati++;
1076  TEST(ati != db.allterms_end());
1077  TEST(*ati == "three");
1078  TEST(ati.get_termfreq() == 3);
1079 
1080  ati++;
1081  TEST(ati != db.allterms_end());
1082  TEST(*ati == "two");
1083  TEST(ati.get_termfreq() == 2);
1084 
1085  ati++;
1086  TEST(ati == db.allterms_end());
1087 }
1088 
1089 // test that skip_to sets at_end (regression test)
1090 DEFINE_TESTCASE(allterms3, backend) {
1091  Xapian::Database db;
1092  db.add_database(get_database("apitest_allterms"));
1094 
1095  ati.skip_to(string("zzzzzz"));
1096  TEST(ati == db.allterms_end());
1097 }
1098 
1099 // test that next ignores extra entries due to long posting lists being
1100 // chunked (regression test for quartz)
1101 DEFINE_TESTCASE(allterms4, backend) {
1102  // apitest_allterms4 contains 682 documents each containing just the word
1103  // "foo". 682 was the magic number which started to cause Quartz problems.
1104  Xapian::Database db = get_database("apitest_allterms4");
1105 
1107  TEST(i != db.allterms_end());
1108  TEST(*i == "foo");
1109  TEST(i.get_termfreq() == 682);
1110  ++i;
1111  TEST(i == db.allterms_end());
1112 }
1113 
1114 // test that skip_to with an exact match sets the current term (regression test
1115 // for quartz)
1116 DEFINE_TESTCASE(allterms5, backend) {
1117  Xapian::Database db;
1118  db.add_database(get_database("apitest_allterms"));
1120  ati.skip_to("three");
1121  TEST(ati != db.allterms_end());
1122  TEST_EQUAL(*ati, "three");
1123 }
1124 
1125 // test allterms iterators with prefixes
1126 DEFINE_TESTCASE(allterms6, backend) {
1127  Xapian::Database db;
1128  db.add_database(get_database("apitest_allterms"));
1129  db.add_database(get_database("apitest_allterms2"));
1130 
1131  Xapian::TermIterator ati = db.allterms_begin("three");
1132  TEST(ati != db.allterms_end("three"));
1133  TEST_EQUAL(*ati, "three");
1134  ati.skip_to("three");
1135  TEST(ati != db.allterms_end("three"));
1136  TEST_EQUAL(*ati, "three");
1137  ati++;
1138  TEST(ati == db.allterms_end("three"));
1139 
1140  ati = db.allterms_begin("thre");
1141  TEST(ati != db.allterms_end("thre"));
1142  TEST_EQUAL(*ati, "three");
1143  ati.skip_to("three");
1144  TEST(ati != db.allterms_end("thre"));
1145  TEST_EQUAL(*ati, "three");
1146  ati++;
1147  TEST(ati == db.allterms_end("thre"));
1148 
1149  ati = db.allterms_begin("f");
1150  TEST(ati != db.allterms_end("f"));
1151  TEST_EQUAL(*ati, "five");
1152  TEST(ati != db.allterms_end("f"));
1153  ati.skip_to("three");
1154  TEST(ati == db.allterms_end("f"));
1155 
1156  ati = db.allterms_begin("f");
1157  TEST(ati != db.allterms_end("f"));
1158  TEST_EQUAL(*ati, "five");
1159  ati++;
1160  TEST(ati != db.allterms_end("f"));
1161  TEST_EQUAL(*ati, "four");
1162  ati++;
1163  TEST(ati == db.allterms_end("f"));
1164 
1165  ati = db.allterms_begin("absent");
1166  TEST(ati == db.allterms_end("absent"));
1167 }
1168 
1169 // test that searching for a term with a special characters in it works
1170 DEFINE_TESTCASE(specialterms1, backend) {
1171  Xapian::Enquire enquire(get_database("apitest_space"));
1172  Xapian::MSet mymset;
1173  Xapian::doccount count;
1175  Xapian::Stem stemmer("english");
1176 
1177  enquire.set_query(stemmer("new\nline"));
1178  mymset = enquire.get_mset(0, 10);
1179  TEST_MSET_SIZE(mymset, 1);
1180  count = 0;
1181  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1182  TEST_EQUAL(count, 1);
1183 
1184  for (Xapian::valueno value_no = 0; value_no < 7; ++value_no) {
1185  string value = mymset.begin().get_document().get_value(value_no);
1186  TEST_NOT_EQUAL(value, "");
1187  if (value_no == 0) {
1188  TEST(value.size() > 263);
1189  TEST_EQUAL(static_cast<unsigned char>(value[262]), 255);
1190  for (int k = 0; k < 256; ++k) {
1191  TEST_EQUAL(static_cast<unsigned char>(value[k + 7]), k);
1192  }
1193  }
1194  }
1195 
1196  enquire.set_query(stemmer(string("big\0zero", 8)));
1197  mymset = enquire.get_mset(0, 10);
1198  TEST_MSET_SIZE(mymset, 1);
1199  count = 0;
1200  for (m = mymset.begin(); m != mymset.end(); ++m) ++count;
1201  TEST_EQUAL(count, 1);
1202 }
1203 
1204 // test that terms with a special characters in appear correctly when iterating
1205 // allterms
1206 DEFINE_TESTCASE(specialterms2, backend) {
1207  Xapian::Database db(get_database("apitest_space"));
1208 
1209  // Check the terms are all as expected (after stemming) and that allterms
1210  // copes with iterating over them.
1212  t = db.allterms_begin();
1213  TEST_EQUAL(*t, "back\\slash"); ++t; TEST_NOT_EQUAL(t, db.allterms_end());
1214  TEST_EQUAL(*t, string("big\0zero", 8)); ++t; TEST_NOT_EQUAL(t, db.allterms_end());
1215  TEST_EQUAL(*t, "new\nlin"); ++t; TEST_NOT_EQUAL(t, db.allterms_end());
1216  TEST_EQUAL(*t, "one\x01on"); ++t; TEST_NOT_EQUAL(t, db.allterms_end());
1217  TEST_EQUAL(*t, "space man"); ++t; TEST_NOT_EQUAL(t, db.allterms_end());
1218  TEST_EQUAL(*t, "tab\tbi"); ++t; TEST_NOT_EQUAL(t, db.allterms_end());
1219  TEST_EQUAL(*t, "tu\x02tu"); ++t; TEST_EQUAL(t, db.allterms_end());
1220 
1221  // Now check that skip_to exactly a term containing a zero byte works.
1222  // This is a regression test for flint and quartz - an Assert() used to
1223  // fire in debug builds (the Assert was wrong - the actual code handled
1224  // this OK).
1225  t = db.allterms_begin();
1226  t.skip_to(string("big\0zero", 8));
1227  TEST_NOT_EQUAL(t, db.allterms_end());
1228  TEST_EQUAL(*t, string("big\0zero", 8));
1229 }
1230 
1231 // test that rsets behave correctly with multiDBs
1232 DEFINE_TESTCASE(rsetmultidb2, backend && !multi) {
1233  Xapian::Database mydb1(get_database("apitest_rset", "apitest_simpledata2"));
1234  Xapian::Database mydb2(get_database("apitest_rset"));
1235  mydb2.add_database(get_database("apitest_simpledata2"));
1236 
1237  Xapian::Enquire enquire1(mydb1);
1238  Xapian::Enquire enquire2(mydb2);
1239 
1240  Xapian::Query myquery = query("is");
1241 
1242  enquire1.set_query(myquery);
1243  enquire2.set_query(myquery);
1244 
1245  Xapian::RSet myrset1;
1246  Xapian::RSet myrset2;
1247  myrset1.add_document(4);
1248  myrset2.add_document(2);
1249 
1250  Xapian::MSet mymset1a = enquire1.get_mset(0, 10);
1251  Xapian::MSet mymset1b = enquire1.get_mset(0, 10, &myrset1);
1252  Xapian::MSet mymset2a = enquire2.get_mset(0, 10);
1253  Xapian::MSet mymset2b = enquire2.get_mset(0, 10, &myrset2);
1254 
1255  mset_expect_order(mymset1a, 4, 3);
1256  mset_expect_order(mymset1b, 4, 3);
1257  mset_expect_order(mymset2a, 2, 5);
1258  mset_expect_order(mymset2b, 2, 5);
1259 
1260  TEST(mset_range_is_same_weights(mymset1a, 0, mymset2a, 0, 2));
1261  TEST(mset_range_is_same_weights(mymset1b, 0, mymset2b, 0, 2));
1262  TEST_NOT_EQUAL(mymset1a, mymset1b);
1263  TEST_NOT_EQUAL(mymset2a, mymset2b);
1264 }
1265 
1266 // tests an expand across multiple databases
1267 DEFINE_TESTCASE(multiexpand1, backend && !multi) {
1268  Xapian::Database mydb1(get_database("apitest_simpledata", "apitest_simpledata2"));
1269  Xapian::Enquire enquire1(mydb1);
1270 
1271  Xapian::Database mydb2(get_database("apitest_simpledata"));
1272  mydb2.add_database(get_database("apitest_simpledata2"));
1273  Xapian::Enquire enquire2(mydb2);
1274 
1275  // make simple equivalent rsets, with a document from each database in each.
1276  Xapian::RSet rset1;
1277  Xapian::RSet rset2;
1278  rset1.add_document(1);
1279  rset1.add_document(7);
1280  rset2.add_document(1);
1281  rset2.add_document(2);
1282 
1283  // Retrieve all the ESet results in each of the three setups:
1284 
1285  // This is the single database one.
1286  Xapian::ESet eset1 = enquire1.get_eset(1000, rset1);
1287 
1288  // This is the multi database with approximation
1289  Xapian::ESet eset2 = enquire2.get_eset(1000, rset2);
1290 
1291  // This is the multi database without approximation
1292  Xapian::ESet eset3 = enquire2.get_eset(1000, rset2, Xapian::Enquire::USE_EXACT_TERMFREQ);
1293 
1294  TEST_EQUAL(eset1.size(), eset3.size());
1295 
1296  Xapian::ESetIterator i = eset1.begin();
1297  Xapian::ESetIterator j = eset3.begin();
1298  while (i != eset1.end() && j != eset3.end()) {
1299  TEST_EQUAL(*i, *j);
1300  TEST_EQUAL(i.get_weight(), j.get_weight());
1301  ++i;
1302  ++j;
1303  }
1304  TEST(i == eset1.end());
1305  TEST(j == eset3.end());
1306 
1307  bool eset1_eq_eset2 = true;
1308  i = eset1.begin();
1309  j = eset2.begin();
1310  while (i != eset1.end() && j != eset2.end()) {
1311  if (i.get_weight() != j.get_weight()) {
1312  eset1_eq_eset2 = false;
1313  break;
1314  }
1315  ++i;
1316  ++j;
1317  }
1318  TEST(!eset1_eq_eset2);
1319 }
1320 
1321 // tests that opening a non-existent postlist returns an empty list
1322 DEFINE_TESTCASE(postlist1, backend) {
1323  Xapian::Database db(get_database("apitest_simpledata"));
1324 
1325  TEST_EQUAL(db.postlist_begin("rosebud"), db.postlist_end("rosebud"));
1326 
1327  string s = "let_us_see_if_we_can_break_it_with_a_really_really_long_term.";
1328  for (int i = 0; i < 8; ++i) {
1329  s += s;
1330  TEST_EQUAL(db.postlist_begin(s), db.postlist_end(s));
1331  }
1332 
1333  // A regression test (no, really!)
1334  TEST_NOT_EQUAL(db.postlist_begin("a"), db.postlist_end("a"));
1335 }
1336 
1337 // tests that a Xapian::PostingIterator works as an STL iterator
1338 DEFINE_TESTCASE(postlist2, backend) {
1339  Xapian::Database db(get_database("apitest_simpledata"));
1341  p = db.postlist_begin("this");
1342  Xapian::PostingIterator pend = db.postlist_end("this");
1343 
1344  TEST(p.get_description() != "PostingIterator()");
1345 
1346  // test operator= creates a copy which compares equal
1347  Xapian::PostingIterator p_copy = p;
1348  TEST_EQUAL(p, p_copy);
1349 
1350  TEST(p_copy.get_description() != "PostingIterator()");
1351 
1352  // test copy constructor creates a copy which compares equal
1353  Xapian::PostingIterator p_clone(p);
1354  TEST_EQUAL(p, p_clone);
1355 
1356  TEST(p_clone.get_description() != "PostingIterator()");
1357 
1358  vector<Xapian::docid> v(p, pend);
1359 
1360  p = db.postlist_begin("this");
1361  pend = db.postlist_end("this");
1362  vector<Xapian::docid>::const_iterator i;
1363  for (i = v.begin(); i != v.end(); ++i) {
1364  TEST_NOT_EQUAL(p, pend);
1365  TEST_EQUAL(*i, *p);
1366  p++;
1367  }
1368  TEST_EQUAL(p, pend);
1369 
1370  TEST_STRINGS_EQUAL(p.get_description(), "PostingIterator()");
1371  TEST_STRINGS_EQUAL(pend.get_description(), "PostingIterator()");
1372 }
1373 
1374 // tests that a Xapian::PostingIterator still works when the DB is deleted
1375 DEFINE_TESTCASE(postlist3, backend) {
1377  {
1378  Xapian::Database db_temp(get_database("apitest_simpledata"));
1379  u = db_temp.postlist_begin("this");
1380  }
1381 
1382  Xapian::Database db(get_database("apitest_simpledata"));
1383  Xapian::PostingIterator p = db.postlist_begin("this");
1384  Xapian::PostingIterator pend = db.postlist_end("this");
1385 
1386  while (p != pend) {
1387  TEST_EQUAL(*p, *u);
1388  p++;
1389  u++;
1390  }
1391 }
1392 
1393 // tests skip_to
1394 DEFINE_TESTCASE(postlist4, backend) {
1395  Xapian::Database db(get_database("apitest_simpledata"));
1396  Xapian::PostingIterator i = db.postlist_begin("this");
1397  i.skip_to(1);
1398  i.skip_to(999999999);
1399  TEST(i == db.postlist_end("this"));
1400 }
1401 
1402 // tests long postlists
1403 DEFINE_TESTCASE(postlist5, backend) {
1404  Xapian::Database db(get_database("apitest_manydocs"));
1406  Xapian::PostingIterator i = db.postlist_begin("this");
1407  unsigned int j = 1;
1408  while (i != db.postlist_end("this")) {
1409  TEST_EQUAL(*i, j);
1410  i++;
1411  j++;
1412  }
1413  TEST_EQUAL(j, 513);
1414 }
1415 
1416 // tests document length in postlists
1417 DEFINE_TESTCASE(postlist6, backend) {
1418  Xapian::Database db(get_database("apitest_simpledata"));
1419  Xapian::PostingIterator i = db.postlist_begin("this");
1420  TEST(i != db.postlist_end("this"));
1421  while (i != db.postlist_end("this")) {
1422  TEST_EQUAL(i.get_doclength(), db.get_doclength(*i));
1424  TEST_REL(i.get_wdf(),<=,i.get_doclength());
1425  TEST_REL(1,<=,i.get_unique_terms());
1426  // The next two aren't necessarily true if there are terms with wdf=0
1427  // in the document, but that isn't the case here.
1429  TEST_REL(i.get_wdf() + i.get_unique_terms() - 1,<=,i.get_doclength());
1430  ++i;
1431  }
1432 }
1433 
1434 // tests collection frequency
1435 DEFINE_TESTCASE(collfreq1, backend) {
1436  Xapian::Database db(get_database("apitest_simpledata"));
1437 
1438  TEST_EQUAL(db.get_collection_freq("this"), 11);
1439  TEST_EQUAL(db.get_collection_freq("first"), 1);
1440  TEST_EQUAL(db.get_collection_freq("last"), 0);
1441  TEST_EQUAL(db.get_collection_freq("word"), 9);
1442 
1443  Xapian::Database db1(get_database("apitest_simpledata", "apitest_simpledata2"));
1444  Xapian::Database db2(get_database("apitest_simpledata"));
1445  db2.add_database(get_database("apitest_simpledata2"));
1446 
1447  TEST_EQUAL(db1.get_collection_freq("this"), 15);
1448  TEST_EQUAL(db1.get_collection_freq("first"), 1);
1449  TEST_EQUAL(db1.get_collection_freq("last"), 0);
1450  TEST_EQUAL(db1.get_collection_freq("word"), 11);
1451  TEST_EQUAL(db2.get_collection_freq("this"), 15);
1452  TEST_EQUAL(db2.get_collection_freq("first"), 1);
1453  TEST_EQUAL(db2.get_collection_freq("last"), 0);
1454  TEST_EQUAL(db2.get_collection_freq("word"), 11);
1455 }
1456 
1457 // Regression test for split msets being incorrect when sorting
1458 DEFINE_TESTCASE(sortvalue1, backend) {
1459  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1460  enquire.set_query(Xapian::Query("this"));
1461 
1462  for (int pass = 1; pass <= 2; ++pass) {
1463  for (Xapian::valueno value_no = 1; value_no < 7; ++value_no) {
1464  tout << "Sorting on value " << value_no << endl;
1465  enquire.set_sort_by_value(value_no, true);
1466  Xapian::MSet allbset = enquire.get_mset(0, 100);
1467  Xapian::MSet partbset1 = enquire.get_mset(0, 3);
1468  Xapian::MSet partbset2 = enquire.get_mset(3, 97);
1469  TEST_EQUAL(allbset.size(), partbset1.size() + partbset2.size());
1470 
1471  bool ok = true;
1472  int n = 0;
1473  Xapian::MSetIterator i, j;
1474  j = allbset.begin();
1475  for (i = partbset1.begin(); i != partbset1.end(); ++i) {
1476  tout << "Entry " << n << ": " << *i << " | " << *j << endl;
1477  TEST(j != allbset.end());
1478  if (*i != *j) ok = false;
1479  ++j;
1480  ++n;
1481  }
1482  tout << "===\n";
1483  for (i = partbset2.begin(); i != partbset2.end(); ++i) {
1484  tout << "Entry " << n << ": " << *i << " | " << *j << endl;
1485  TEST(j != allbset.end());
1486  if (*i != *j) ok = false;
1487  ++j;
1488  ++n;
1489  }
1490  TEST(j == allbset.end());
1491  if (!ok)
1492  FAIL_TEST("Split msets aren't consistent with unsplit");
1493  }
1495  }
1496 }
1497 
1498 // consistency check match - vary mset size and check results agree.
1499 // consistency1 will run on the remote backend, but it's particularly slow
1500 // with that, and testing it there doesn't actually improve the test
1501 // coverage really.
1502 DEFINE_TESTCASE(consistency1, backend && !remote) {
1503  Xapian::Database db(get_database("etext"));
1504  Xapian::Enquire enquire(db);
1506  Xapian::doccount lots = 214;
1507  Xapian::MSet bigmset = enquire.get_mset(0, lots);
1508  TEST_EQUAL(bigmset.size(), lots);
1509  try {
1510  for (Xapian::doccount start = 0; start < lots; ++start) {
1511  for (Xapian::doccount size = 0; size < lots - start; ++size) {
1512  Xapian::MSet mset = enquire.get_mset(start, size);
1513  if (mset.size()) {
1514  TEST_EQUAL(start + mset.size(),
1515  min(start + size, bigmset.size()));
1516  } else if (size) {
1517 // tout << start << mset.size() << bigmset.size() << endl;
1518  TEST(start >= bigmset.size());
1519  }
1520  for (Xapian::doccount i = 0; i < mset.size(); ++i) {
1521  TEST_EQUAL(*mset[i], *bigmset[start + i]);
1522  TEST_EQUAL_DOUBLE(mset[i].get_weight(),
1523  bigmset[start + i].get_weight());
1524  }
1525  }
1526  }
1527  } catch (const Xapian::NetworkTimeoutError &) {
1528  // consistency1 is a long test - may timeout with the remote backend...
1529  SKIP_TEST("Test taking too long");
1530  }
1531 }
1532 
1533 // Test that specifying a nonexistent input file throws an exception
1534 // (chert-specific cases).
1535 DEFINE_TESTCASE(chertdatabasenotfounderror1, chert) {
1536 #ifdef XAPIAN_HAS_CHERT_BACKEND
1537  mkdir(".chert", 0755);
1538 
1540  Xapian::Database(".chert/nosuchdirectory",
1543  Xapian::WritableDatabase(".chert/nosuchdirectory",
1545 
1546  mkdir(".chert/emptydirectory", 0700);
1548  Xapian::Database(".chert/emptydirectory",
1550 
1551  touch(".chert/somefile");
1553  Xapian::Database(".chert/somefile",
1556  Xapian::WritableDatabase(".chert/somefile",
1559  Xapian::WritableDatabase(".chert/somefile",
1562  Xapian::WritableDatabase(".chert/somefile",
1565  Xapian::WritableDatabase(".chert/somefile",
1567 #endif
1568 
1569 }
1570 
1571 // Test that specifying a nonexistent input file throws an exception
1572 // (glass-specific cases).
1573 DEFINE_TESTCASE(glassdatabasenotfounderror1, glass) {
1574 #ifdef XAPIAN_HAS_GLASS_BACKEND
1575  mkdir(".glass", 0755);
1576 
1578  Xapian::Database(".glass/nosuchdirectory",
1581  Xapian::WritableDatabase(".glass/nosuchdirectory",
1583 
1584  mkdir(".glass/emptydirectory", 0700);
1586  Xapian::Database(".glass/emptydirectory",
1588 
1589  touch(".glass/somefile");
1591  Xapian::Database(".glass/somefile",
1594  Xapian::WritableDatabase(".glass/somefile",
1597  Xapian::WritableDatabase(".glass/somefile",
1600  Xapian::WritableDatabase(".glass/somefile",
1603  Xapian::WritableDatabase(".glass/somefile",
1605 #endif
1606 }
1607 
1608 // Test that specifying a nonexistent input file throws an exception
1609 // (non-backend-specific cases).
1610 DEFINE_TESTCASE(databasenotfounderror2, !backend) {
1612  Xapian::Database("nosuchdirectory"));
1614  Xapian::Database("no/such/directory"));
1615 
1617  Xapian::WritableDatabase("nosuchdirectory", Xapian::DB_OPEN));
1619  Xapian::WritableDatabase("no/such/directory", Xapian::DB_OPEN));
1620 
1621  string empty_dir = "emptydirectory";
1622  mkdir(empty_dir.c_str(), 0700);
1624  Xapian::Database{empty_dir});
1625 }
1626 
1628 DEFINE_TESTCASE(chertdatabaseopen1, chert) {
1629 #ifdef XAPIAN_HAS_CHERT_BACKEND
1630  const string dbdir = ".chert/test_chertdatabaseopen1";
1631  mkdir(".chert", 0755);
1632 
1633  {
1634  rm_rf(dbdir);
1635  Xapian::WritableDatabase wdb(dbdir,
1641  }
1642 
1643  {
1644  rm_rf(dbdir);
1645  Xapian::WritableDatabase wdb(dbdir,
1651  }
1652 
1653  {
1654  rm_rf(dbdir);
1655  Xapian::WritableDatabase wdb(dbdir,
1661  }
1662 
1663  {
1667  Xapian::WritableDatabase wdb(dbdir,
1670  }
1671 
1672  {
1673  Xapian::WritableDatabase wdb(dbdir,
1676  }
1677 
1678  {
1679  Xapian::WritableDatabase wdb(dbdir,
1682  }
1683 #endif
1684 }
1685 
1686 // feature test for Enquire:
1687 // set_sort_by_value
1688 // set_sort_by_value_then_relevance
1689 // set_sort_by_relevance_then_value
1690 // Prior to 1.2.17 and 1.3.2, order8 and order9 were swapped, and
1691 // set_sort_by_relevance_then_value was buggy, so this testcase now serves as
1692 // a regression test for that bug.
1693 DEFINE_TESTCASE(sortrel1, backend) {
1694  Xapian::Enquire enquire(get_database("apitest_sortrel"));
1695  enquire.set_sort_by_value(1, true);
1696  enquire.set_query(Xapian::Query("woman"));
1697 
1698  static const Xapian::docid order1[] = { 1,2,3,4,5,6,7,8,9 };
1699  static const Xapian::docid order2[] = { 2,1,3,6,5,4,7,9,8 };
1700  static const Xapian::docid order3[] = { 3,2,1,6,5,4,9,8,7 };
1701  static const Xapian::docid order4[] = { 7,8,9,4,5,6,1,2,3 };
1702  static const Xapian::docid order5[] = { 9,8,7,6,5,4,3,2,1 };
1703  static const Xapian::docid order6[] = { 7,9,8,6,5,4,2,1,3 };
1704  static const Xapian::docid order7[] = { 7,9,8,6,5,4,2,1,3 };
1705  static const Xapian::docid order8[] = { 2,6,7,1,5,9,3,4,8 };
1706  static const Xapian::docid order9[] = { 7,6,2,9,5,1,8,4,3 };
1707 
1708  Xapian::MSet mset;
1709  size_t i;
1710 
1711  mset = enquire.get_mset(0, 10);
1712  TEST_EQUAL(mset.size(), sizeof(order1) / sizeof(Xapian::docid));
1713  for (i = 0; i < sizeof(order1) / sizeof(Xapian::docid); ++i) {
1714  TEST_EQUAL(*mset[i], order1[i]);
1715  }
1716 
1717  enquire.set_sort_by_value_then_relevance(1, true);
1718 
1719  mset = enquire.get_mset(0, 10);
1720  TEST_EQUAL(mset.size(), sizeof(order2) / sizeof(Xapian::docid));
1721  for (i = 0; i < sizeof(order2) / sizeof(Xapian::docid); ++i) {
1722  TEST_EQUAL(*mset[i], order2[i]);
1723  }
1724 
1725  enquire.set_sort_by_value(1, true);
1726 
1727  mset = enquire.get_mset(0, 10);
1728  TEST_EQUAL(mset.size(), sizeof(order1) / sizeof(Xapian::docid));
1729  for (i = 0; i < sizeof(order1) / sizeof(Xapian::docid); ++i) {
1730  TEST_EQUAL(*mset[i], order1[i]);
1731  }
1732 
1733  enquire.set_sort_by_value_then_relevance(1, true);
1735 
1736  mset = enquire.get_mset(0, 10);
1737  TEST_EQUAL(mset.size(), sizeof(order2) / sizeof(Xapian::docid));
1738  for (i = 0; i < sizeof(order2) / sizeof(Xapian::docid); ++i) {
1739  TEST_EQUAL(*mset[i], order2[i]);
1740  }
1741 
1742  enquire.set_sort_by_value(1, true);
1744 
1745  mset = enquire.get_mset(0, 10);
1746  TEST_EQUAL(mset.size(), sizeof(order3) / sizeof(Xapian::docid));
1747  for (i = 0; i < sizeof(order3) / sizeof(Xapian::docid); ++i) {
1748  TEST_EQUAL(*mset[i], order3[i]);
1749  }
1750 
1751  enquire.set_sort_by_value(1, false);
1753  mset = enquire.get_mset(0, 10);
1754  TEST_EQUAL(mset.size(), sizeof(order4) / sizeof(Xapian::docid));
1755  for (i = 0; i < sizeof(order4) / sizeof(Xapian::docid); ++i) {
1756  TEST_EQUAL(*mset[i], order4[i]);
1757  }
1758 
1759  enquire.set_sort_by_value(1, false);
1761  mset = enquire.get_mset(0, 10);
1762  TEST_EQUAL(mset.size(), sizeof(order5) / sizeof(Xapian::docid));
1763  for (i = 0; i < sizeof(order5) / sizeof(Xapian::docid); ++i) {
1764  TEST_EQUAL(*mset[i], order5[i]);
1765  }
1766 
1767  enquire.set_sort_by_value_then_relevance(1, false);
1769  mset = enquire.get_mset(0, 10);
1770  TEST_EQUAL(mset.size(), sizeof(order6) / sizeof(Xapian::docid));
1771  for (i = 0; i < sizeof(order6) / sizeof(Xapian::docid); ++i) {
1772  TEST_EQUAL(*mset[i], order6[i]);
1773  }
1774 
1775  enquire.set_sort_by_value_then_relevance(1, false);
1777  mset = enquire.get_mset(0, 10);
1778  TEST_EQUAL(mset.size(), sizeof(order7) / sizeof(Xapian::docid));
1779  for (i = 0; i < sizeof(order7) / sizeof(Xapian::docid); ++i) {
1780  TEST_EQUAL(*mset[i], order7[i]);
1781  }
1782 
1783  enquire.set_sort_by_relevance_then_value(1, true);
1785  mset = enquire.get_mset(0, 10);
1786  TEST_EQUAL(mset.size(), sizeof(order8) / sizeof(Xapian::docid));
1787  for (i = 0; i < sizeof(order8) / sizeof(Xapian::docid); ++i) {
1788  TEST_EQUAL(*mset[i], order8[i]);
1789  }
1790 
1791  enquire.set_sort_by_relevance_then_value(1, true);
1793  mset = enquire.get_mset(0, 10);
1794  TEST_EQUAL(mset.size(), sizeof(order8) / sizeof(Xapian::docid));
1795  for (i = 0; i < sizeof(order8) / sizeof(Xapian::docid); ++i) {
1796  TEST_EQUAL(*mset[i], order8[i]);
1797  }
1798 
1799  enquire.set_sort_by_relevance_then_value(1, false);
1801  mset = enquire.get_mset(0, 10);
1802  TEST_EQUAL(mset.size(), sizeof(order9) / sizeof(Xapian::docid));
1803  for (i = 0; i < sizeof(order9) / sizeof(Xapian::docid); ++i) {
1804  TEST_EQUAL(*mset[i], order9[i]);
1805  }
1806 
1807  enquire.set_sort_by_relevance_then_value(1, false);
1809  mset = enquire.get_mset(0, 10);
1810  TEST_EQUAL(mset.size(), sizeof(order9) / sizeof(Xapian::docid));
1811  for (i = 0; i < sizeof(order9) / sizeof(Xapian::docid); ++i) {
1812  TEST_EQUAL(*mset[i], order9[i]);
1813  }
1814 }
1815 
1816 static void
1818 {
1819  static const struct { Xapian::docid did; const char* text; } content[] = {
1820  {1, "This is a test document used with the API test. This paragraph "
1821  "must be at least three lines (including the blank line) to be "
1822  "counted as a \"paragraph\"."},
1823  {2, "This is a second simple data test, used to test multiple "
1824  "(inmemory anyway) databases. The text in this file is "
1825  "unimportant, although I suppose it ought to include the "
1826  "standard word \"word\" in a few places."},
1827  {3, "This file will be indexed by paragraph, and the simple query will "
1828  "search for the word \"word\". Well expect the mset to contain "
1829  "two documents, including this paragraph and the fourth, below. "
1830  "Since this paragraph uses the word \"word\" so much, this "
1831  "should be the first one in the match set. Ill just say the word "
1832  "a few more times (word!) to make sure of that. If this doesnt "
1833  "word (typo, I meant work), then there may be fourletter words "
1834  "spoken."},
1835  {4, "Ill leave this at two paragraphs. This one hasnt got any useful "
1836  "information in it either."},
1837  {5, "This paragraph only has a load of absolute rubbish, and nothing "
1838  "of any use whatsoever."},
1839  {7, "This is the other paragraph with the word in the simple query "
1840  "in it. For simplicity, all paragraphs are at least two lines, "
1841  "due to how the hacked up indexer works."},
1842  {9, "This is another paragraph which wont be returned. Well, not "
1843  "with the simple query, anyway."},
1844  {11, "And yet another. This one does mention banana splits, though, "
1845  "so cant be that bad."}
1846  };
1847 
1848  Xapian::TermGenerator indexer;
1849  indexer.set_stemmer(Xapian::Stem("english"));
1850  indexer.set_stemming_strategy(indexer.STEM_ALL);
1851 
1852  for (auto& i : content) {
1853  Xapian::Document doc;
1854  indexer.set_document(doc);
1855  indexer.index_text(i.text);
1856  db.replace_document(i.did, doc);
1857  }
1858 
1859  db.commit();
1860 }
1861 
1862 // Test network stats and local stats give the same results.
1863 DEFINE_TESTCASE(netstats1, generated) {
1864  static const char * const words[] = { "paragraph", "word" };
1865  Xapian::Query query(Xapian::Query::OP_OR, words, words + 2);
1866  const size_t MSET_SIZE = 10;
1867 
1868  Xapian::RSet rset;
1869  rset.add_document(4);
1870  rset.add_document(9);
1871 
1872  {
1873  Xapian::Database db = get_database("netstats1", make_netstats1_db);
1874 
1875  Xapian::Enquire enq(db);
1876  enq.set_query(query);
1877  Xapian::MSet mset = enq.get_mset(0, MSET_SIZE, &rset);
1880  TEST_EQUAL(mset.get_matches_estimated(), 7);
1881  TEST_EQUAL(mset.get_max_attained(), 1.445962071042388164);
1882  TEST_EQUAL(mset.size(), 7);
1883 
1884  static const pair<Xapian::docid, double> to_compare[] = {
1885  {7, 1.445962071042388164},
1886  {3, 1.4140112748017070743},
1887  {1, 1.3747698831232337824},
1888  {5, 1.1654938419498412916},
1889  {9, 1.1654938419498412916},
1890  {4, 1.1543806706320836053},
1891  {2, 0.12268031290495594321}
1892  };
1893 
1894  TEST(mset_range_is_same(mset, 0, to_compare, mset.size()));
1895  }
1896 }
1897 
1898 // Coordinate matching - scores 1 for each matching term
1899 class MyWeight : public Xapian::Weight {
1901 
1902  public:
1903  MyWeight * clone() const {
1904  return new MyWeight;
1905  }
1906  void init(double factor) {
1907  scale_factor = factor;
1908  }
1909  MyWeight() { }
1911  std::string name() const { return "MyWeight"; }
1912  string serialise() const { return string(); }
1913  MyWeight * unserialise(const string &) const { return new MyWeight; }
1915  return scale_factor;
1916  }
1917  double get_maxpart() const { return scale_factor; }
1918 
1919  double get_sumextra(Xapian::termcount, Xapian::termcount) const { return 0; }
1920  double get_maxextra() const { return 0; }
1921 };
1922 
1923 // tests user weighting scheme.
1924 // Would work with remote if we registered the weighting scheme.
1925 // FIXME: do this so we also test that functionality...
1926 DEFINE_TESTCASE(userweight1, backend && !remote) {
1927  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1928  enquire.set_weighting_scheme(MyWeight());
1929  static const char * const query[] = {
1930  "this", "line", "paragraph", "rubbish"
1931  };
1933  query + sizeof(query) / sizeof(query[0])));
1934  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
1935  // MyWeight scores 1 for each matching term, so the weight should equal
1936  // the number of matching terms.
1937  for (Xapian::MSetIterator i = mymset1.begin(); i != mymset1.end(); ++i) {
1938  Xapian::termcount matching_terms = 0;
1940  while (t != enquire.get_matching_terms_end(i)) {
1941  ++matching_terms;
1942  ++t;
1943  }
1944  TEST_EQUAL(i.get_weight(), matching_terms);
1945  }
1946 }
1947 
1948 // tests MatchAll queries
1949 // This is a regression test, which failed with assertion failures in
1950 // revision 9094. Also check that the results aren't ranked by relevance
1951 // (regression test for bug fixed in 1.0.9).
1952 DEFINE_TESTCASE(matchall1, backend) {
1953  Xapian::Database db(get_database("apitest_simpledata"));
1954  Xapian::Enquire enquire(db);
1956  Xapian::MSet mset = enquire.get_mset(0, 10);
1959 
1961  Xapian::Query("nosuchterm"),
1963  mset = enquire.get_mset(0, 10);
1966 
1967  // Check that the results aren't ranked by relevance (fixed in 1.0.9).
1968  TEST(mset.size() > 1);
1969  TEST_EQUAL(mset[mset.size() - 1].get_weight(), 0);
1970  TEST_EQUAL(*mset[0], 1);
1971  TEST_EQUAL(*mset[mset.size() - 1], mset.size());
1972 }
1973 
1974 // Test using a ValueSetMatchDecider
1975 DEFINE_TESTCASE(valuesetmatchdecider2, backend && !remote) {
1976  Xapian::Database db(get_database("apitest_phrase"));
1977  Xapian::Enquire enq(db);
1978  enq.set_query(Xapian::Query("leav"));
1979 
1980  Xapian::ValueSetMatchDecider vsmd1(1, true);
1981  vsmd1.add_value("n");
1982  Xapian::ValueSetMatchDecider vsmd2(1, false);
1983  vsmd2.add_value("n");
1984 
1985  Xapian::MSet mymset = enq.get_mset(0, 20);
1986  mset_expect_order(mymset, 8, 6, 4, 5, 7, 10, 12, 11, 13, 9, 14);
1987  mymset = enq.get_mset(0, 20, 0, NULL, &vsmd1);
1988  mset_expect_order(mymset, 6, 12);
1989  mymset = enq.get_mset(0, 20, 0, NULL, &vsmd2);
1990  mset_expect_order(mymset, 8, 4, 5, 7, 10, 11, 13, 9, 14);
1991 }
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Definition: testutils.h:78
static void make_netstats1_db(Xapian::WritableDatabase &db, const string &)
Definition: api_db.cc:1817
static const char * get_xapian_progsrv_command()
Get the command line required to run xapian-progsrv.
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
MatchDecider filtering results based on whether document values are in a user-defined set...
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: omenquire.cc:878
static size_t check(const std::string &path, int opts=0, std::ostream *out=NULL)
Check the integrity of a database or database table.
Definition: database.h:560
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:850
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
~MyWeight()
Definition: api_db.cc:1910
#define TEST_EXCEPTION_BASE_CLASS(TYPE, CODE)
Check that CODE throws Xapian exception derived from TYPE.
Definition: testutils.h:106
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:869
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:713
static const Xapian::Query MatchAll
A query matching all documents.
Definition: query.h:75
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:856
const int DB_CREATE
Create a new database.
Definition: constants.h:44
DatabaseOpeningError indicates failure to open a database.
Definition: error.h:581
Class representing a stemming algorithm.
Definition: stem.h:62
bool operator()(const Xapian::Document &doc) const
Decide whether we want this document to be in the MSet.
Definition: api_db.cc:503
void set_document(const Xapian::Document &doc)
Set the current document.
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:46
Parses a piece of text and generate terms.
Definition: termgenerator.h:48
Indicates a timeout expired while communicating with a remote database.
Definition: error.h:845
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails...
Definition: testsuite.h:267
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:283
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: omenquire.cc:246
#define EXPECTED_EXCEPTION
MyWeight * unserialise(const string &) const
Unserialise parameters.
Definition: api_db.cc:1913
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
Definition: database.h:265
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
double get_max_attained() const
The maximum weight attained by any document.
Definition: omenquire.cc:297
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
Definition: error.h:122
string needle
Definition: api_db.cc:498
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
a generic test suite engine
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
Definition: enquire.h:601
void add_value(const std::string &value)
Add a value to the test set.
C++ function versions of useful Unix commands.
Class representing a list of search results.
Definition: mset.h:44
void skip_to(const std::string &term)
Advance the iterator to term term.
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
const int DB_CREATE_OR_OPEN
Create database if it doesn&#39;t already exist.
Definition: constants.h:35
include <netdb.h>, with portability workarounds.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
static Xapian::Stem stemmer
Definition: stemtest.cc:41
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
Definition: api_db.cc:1920
void init(double factor)
Allow the subclass to perform any initialisation it needs to.
Definition: api_db.cc:1906
std::string name() const
Return the name of this weighting scheme.
Definition: api_db.cc:1911
include <sys/stat.h> with portability enhancements
double get_weight() const
Get the weight for the current position.
void index_text(const Xapian::Utf8Iterator &itor, Xapian::termcount wdf_inc=1, const std::string &prefix=std::string())
Index some text.
MyWeight()
Definition: api_db.cc:1909
const int DB_BACKEND_GLASS
Use the glass backend.
Definition: constants.h:158
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:956
MyWeight * clone() const
Clone this object.
Definition: api_db.cc:1903
test functionality of the Xapian API
void rm_rf(const string &filename)
Remove a directory and contents, just like the Unix "rm -rf" command.
Definition: unixcmds.cc:111
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
Definition: api_db.cc:1917
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: omenquire.cc:262
Xapian::doclength get_avlength() const
Get the average length of the documents in the database.
Definition: omdatabase.cc:293
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:345
Class for iterating over a list of terms.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:941
Decide if a Xapian::Error exception should be ignored.
Definition: errorhandler.h:50
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
Xapian::doccount size() const
Return number of items in this ESet object.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: omenquire.cc:276
Base class for backend handling in test harness.
string get_database_path(const string &dbname)
Definition: apitest.cc:72
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
static int verbose
Definition: xapian-delve.cc:47
DatabaseLockError indicates failure to lock a database.
Definition: error.h:493
const int DB_OPEN
Open an existing database.
Definition: constants.h:50
This class provides read/write access to a database.
Definition: database.h:785
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const
Calculate the weight contribution for this object&#39;s term to a document.
Definition: api_db.cc:1914
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:351
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
DatabaseCreateError indicates a failure to create a database.
Definition: error.h:451
Public interfaces for the Xapian library.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: omenquire.cc:887
docids sort in ascending order (default)
Definition: enquire.h:324
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
double scale_factor
Definition: api_db.cc:1900
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:607
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:612
void set_stemmer(const Xapian::Stem &stemmer)
Set the Xapian::Stem object to be used for generating stemmed terms.
Xapian::termcount get_doclength(Xapian::docid did) const
Get the length of a document.
Definition: omdatabase.cc:461
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
double get_sumextra(Xapian::termcount, Xapian::termcount) const
Calculate the term-independent weight component for a document.
Definition: api_db.cc:1919
Indicates an attempt to access a database not present.
Definition: error.h:1055
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
Definition: omdatabase.cc:223
Iterator over a Xapian::ESet.
Definition: eset.h:160
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
bool term_exists(const std::string &tname) const
Check if a given term exists in the database.
Definition: omdatabase.cc:524
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
Base class for matcher decision functor.
Definition: enquire.h:118
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
const int DB_BACKEND_STUB
Open a stub database file.
Definition: constants.h:179
#define SKIP_TEST(MSG)
Skip the current testcase with message MSG.
Definition: testsuite.h:74
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:283
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
Indicates a problem communicating with a remote database.
Definition: error.h:803
Match documents which at least one subquery matches.
Definition: query.h:92
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
DEFINE_TESTCASE(termstats, backend)
Definition: api_db.cc:59
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:287
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:111
void keep_alive()
Send a "keep-alive" to remote databases to stop them timing out.
Definition: omdatabase.cc:538
<unistd.h>, but with compat.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
Class representing a list of search results.
Definition: eset.h:43
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: omenquire.cc:450
const int DB_CREATE_OR_OVERWRITE
Create database if it doesn&#39;t already exist, or overwrite if it does.
Definition: constants.h:38
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:125
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in document.
Definition: omdatabase.cc:476
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
Xapian::Database get_remote_database(const string &dbname, unsigned int timeout)
Definition: apitest.cc:111
string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: api_db.cc:1912
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Definition: omenquire.cc:842
std::string get_value(Xapian::valueno slot) const
Get value by number.
Definition: omdocument.cc:64
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:350
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
const int DB_BACKEND_CHERT
Use the chert backend.
Definition: constants.h:170
GrepMatchDecider(const string &needle_)
Definition: api_db.cc:500
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
static Xapian::Query query(const string &t)
Definition: api_db.cc:50
A relevance set (R-Set).
Definition: enquire.h:60
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
void touch(const string &filename)
Touch a file, just like the Unix "touch" command.
Definition: unixcmds.cc:155
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:269
Abstract base class for weighting schemes.
Definition: weight.h:35
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339
docids sort in descending order.
Definition: enquire.h:326
std::string get_description() const
Return a string describing this object.