xapian-core  2.0.0
api_query.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008-2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "api_query.h"
24 
25 #include <xapian.h>
26 
27 #include "testsuite.h"
28 #include "testutils.h"
29 
30 #include "apitest.h"
31 
32 using namespace std;
33 
34 DEFINE_TESTCASE(queryterms1, !backend) {
40  TEST_EQUAL(*query.get_terms_begin(), "fair");
42 
44  Xapian::Query q = qp.parse_query("\"the the the\"");
45  {
46  auto t = q.get_terms_begin();
47  size_t count = 0;
48  while (t != q.get_terms_end()) {
49  TEST_EQUAL(*t, "the");
50  ++count;
51  ++t;
52  }
53  TEST_EQUAL(count, 3);
54  }
55  {
56  auto t = q.get_unique_terms_begin();
57  size_t count = 0;
58  while (t != q.get_unique_terms_end()) {
59  TEST_EQUAL(*t, "the");
60  ++count;
61  ++t;
62  }
63  TEST_EQUAL(count, 1);
64  }
65 }
66 
67 DEFINE_TESTCASE(matchall2, !backend) {
69  "Query(<alldocuments>)");
70 }
71 
72 DEFINE_TESTCASE(matchnothing1, !backend) {
74  "Query()");
75  vector<Xapian::Query> subqs;
76  subqs.push_back(Xapian::Query("foo"));
77  subqs.push_back(Xapian::Query::MatchNothing);
78  Xapian::Query q(Xapian::Query::OP_AND, subqs.begin(), subqs.end());
79  TEST_STRINGS_EQUAL(q.get_description(), "Query()");
80 
83  TEST_STRINGS_EQUAL(q2.get_description(), "Query()");
84 
87  TEST_STRINGS_EQUAL(q2.get_description(), "Query()");
88 
91  TEST_STRINGS_EQUAL(q4.get_description(), "Query(foo)");
92 
95  TEST_STRINGS_EQUAL(q5.get_description(), "Query()");
96 
99  TEST_STRINGS_EQUAL(q6.get_description(), "Query(foo)");
100 
103  TEST_STRINGS_EQUAL(q7.get_description(), "Query()");
104 }
105 
106 DEFINE_TESTCASE(overload1, !backend) {
107  Xapian::Query q;
108  q = Xapian::Query("foo") & Xapian::Query("bar");
109  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
110 
111  // Test &= appends a same-type subquery (since Xapian 1.4.10).
112  q &= Xapian::Query("baz");
113  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar AND baz))");
114  // But not if the RHS is the same query:
115  q = Xapian::Query("foo") & Xapian::Query("bar");
116 #ifdef __has_warning
117 # if __has_warning("-Wself-assign-overloaded")
118  // Suppress warning from newer clang about self-assignment so we can
119  // test that self-assignment works!
120 # pragma clang diagnostic push
121 # pragma clang diagnostic ignored "-Wself-assign-overloaded"
122 # endif
123 #endif
124  q &= q;
125 #ifdef __has_warning
126 # if __has_warning("-Wself-assign-overloaded")
127 # pragma clang diagnostic pop
128 # endif
129 #endif
130  TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo AND bar) AND (foo AND bar)))");
131  {
132  // Also not if the query has a refcount > 1.
133  q = Xapian::Query("foo") & Xapian::Query("bar");
134  Xapian::Query qcopy = q;
135  qcopy &= Xapian::Query("baz");
136  TEST_STRINGS_EQUAL(qcopy.get_description(), "Query(((foo AND bar) AND baz))");
137  // And q shouldn't change.
138  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
139  }
140  // Check that MatchNothing still results in MatchNothing:
141  q = Xapian::Query("foo") & Xapian::Query("bar");
143  TEST_STRINGS_EQUAL(q.get_description(), "Query()");
144  // Check we don't combine for other operators:
145  q = Xapian::Query("foo") | Xapian::Query("bar");
146  q &= Xapian::Query("baz");
147  TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo OR bar) AND baz))");
148 
149  // Test |= appends a same-type subquery (since Xapian 1.4.10).
150  q = Xapian::Query("foo") | Xapian::Query("bar");
151  q |= Xapian::Query("baz");
152  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar OR baz))");
153  // But not if the RHS is the same query:
154  q = Xapian::Query("foo") | Xapian::Query("bar");
155 #ifdef __has_warning
156 # if __has_warning("-Wself-assign-overloaded")
157  // Suppress warning from newer clang about self-assignment so we can
158  // test that self-assignment works!
159 # pragma clang diagnostic push
160 # pragma clang diagnostic ignored "-Wself-assign-overloaded"
161 # endif
162 #endif
163  q |= q;
164 #ifdef __has_warning
165 # if __has_warning("-Wself-assign-overloaded")
166 # pragma clang diagnostic pop
167 # endif
168 #endif
169  TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo OR bar) OR (foo OR bar)))");
170  {
171  // Also not if the query has a refcount > 1.
172  q = Xapian::Query("foo") | Xapian::Query("bar");
173  Xapian::Query qcopy = q;
174  qcopy |= Xapian::Query("baz");
175  TEST_STRINGS_EQUAL(qcopy.get_description(), "Query(((foo OR bar) OR baz))");
176  // And q shouldn't change.
177  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
178  }
179  // Check that MatchNothing still results in no change:
180  q = Xapian::Query("foo") | Xapian::Query("bar");
182  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
183  // Check we don't combine for other operators:
184  q = Xapian::Query("foo") & Xapian::Query("bar");
185  q |= Xapian::Query("baz");
186  TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo AND bar) OR baz))");
187 
188  // Test ^= appends a same-type subquery (since Xapian 1.4.10).
189  q = Xapian::Query("foo") ^ Xapian::Query("bar");
190  q ^= Xapian::Query("baz");
191  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar XOR baz))");
192  // But a query ^= itself gives an empty query.
193  q = Xapian::Query("foo") ^ Xapian::Query("bar");
194 #ifdef __has_warning
195 # if __has_warning("-Wself-assign-overloaded")
196  // Suppress warning from newer clang about self-assignment so we can
197  // test that self-assignment works!
198 # pragma clang diagnostic push
199 # pragma clang diagnostic ignored "-Wself-assign-overloaded"
200 # endif
201 #endif
202  q ^= q;
203 #ifdef __has_warning
204 # if __has_warning("-Wself-assign-overloaded")
205 # pragma clang diagnostic pop
206 # endif
207 #endif
208  TEST_STRINGS_EQUAL(q.get_description(), "Query()");
209  {
210  // Even if the reference count > 1.
211  q = Xapian::Query("foo") ^ Xapian::Query("bar");
212  Xapian::Query qcopy = q;
213  q ^= qcopy;
214  TEST_STRINGS_EQUAL(q.get_description(), "Query()");
215  }
216  {
217  // Also not if the query has a refcount > 1.
218  q = Xapian::Query("foo") ^ Xapian::Query("bar");
219  Xapian::Query qcopy = q;
220  qcopy ^= Xapian::Query("baz");
221  TEST_STRINGS_EQUAL(qcopy.get_description(), "Query(((foo XOR bar) XOR baz))");
222  // And q shouldn't change.
223  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
224  }
225  // Check that MatchNothing still results in no change:
226  q = Xapian::Query("foo") ^ Xapian::Query("bar");
228  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
229  // Check we don't combine for other operators:
230  q = Xapian::Query("foo") & Xapian::Query("bar");
231  q ^= Xapian::Query("baz");
232  TEST_STRINGS_EQUAL(q.get_description(), "Query(((foo AND bar) XOR baz))");
233 
234  q = Xapian::Query("foo") &~ Xapian::Query("bar");
235  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
236  // In 1.4.9 and earlier this gave (foo AND (<alldocuments> AND_NOT bar)).
237  q = Xapian::Query("foo");
238  q &= ~Xapian::Query("bar");
239  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
240  q = ~Xapian::Query("bar");
241  TEST_STRINGS_EQUAL(q.get_description(), "Query((<alldocuments> AND_NOT bar))");
243  TEST_STRINGS_EQUAL(q.get_description(), "Query()");
244  q = Xapian::Query("foo") | Xapian::Query("bar");
245  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
247  TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
248  q = Xapian::Query("foo") ^ Xapian::Query("bar");
249  TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
251  TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
252  q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
253  TEST_STRINGS_EQUAL(q.get_description(), "Query(1.25 * (one OR two))");
254  q = (Xapian::Query("one") & Xapian::Query("two")) * 42;
255  TEST_STRINGS_EQUAL(q.get_description(), "Query(42 * (one AND two))");
256  q = Xapian::Query("one") / 2.0;
257  TEST_STRINGS_EQUAL(q.get_description(), "Query(0.5 * one)");
258 }
259 
267 DEFINE_TESTCASE(possubqueries1, backend) {
268  Xapian::Database db = get_database("possubqueries1",
269  [](Xapian::WritableDatabase& wdb,
270  const string&)
271  {
272  Xapian::Document doc;
273  doc.add_posting("a", 1);
274  doc.add_posting("b", 2);
275  doc.add_posting("c", 3);
276  wdb.add_document(doc);
277  });
278 
280  Xapian::Query("a"),
281  Xapian::Query("b"));
282  Xapian::Query near(Xapian::Query::OP_NEAR, a_or_b, a_or_b);
283  // As of 1.3.0, we no longer rearrange queries at this point, so check
284  // that we don't.
286  "Query(((a OR b) NEAR 2 (a OR b)))");
287  Xapian::Query phrase(Xapian::Query::OP_PHRASE, a_or_b, a_or_b);
289  "Query(((a OR b) PHRASE 2 (a OR b)))");
290 
292  Xapian::Query("a"),
293  Xapian::Query("b"));
295  Xapian::Query("a"),
296  Xapian::Query("b"));
298  Xapian::Query("a"),
299  Xapian::Query("b"));
300  Xapian::Query c("c");
301 
302  // FIXME: The plan is to actually try to support the cases below, but
303  // for now at least ensure they are cleanly rejected.
304  Xapian::Enquire enq(db);
305 
307  Xapian::Query q(Xapian::Query::OP_NEAR, a_and_b, c);
308  enq.set_query(q);
309  (void)enq.get_mset(0, 10));
310 
312  Xapian::Query q(Xapian::Query::OP_NEAR, a_near_b, c);
313  enq.set_query(q);
314  (void)enq.get_mset(0, 10));
315 
317  Xapian::Query q(Xapian::Query::OP_NEAR, a_phrs_b, c);
318  enq.set_query(q);
319  (void)enq.get_mset(0, 10));
320 
323  enq.set_query(q);
324  (void)enq.get_mset(0, 10));
325 
327  Xapian::Query q(Xapian::Query::OP_PHRASE, a_near_b, c);
328  enq.set_query(q);
329  (void)enq.get_mset(0, 10));
330 
332  Xapian::Query q(Xapian::Query::OP_PHRASE, a_phrs_b, c);
333  enq.set_query(q);
334  (void)enq.get_mset(0, 10));
335 }
336 
338 // time.
339 DEFINE_TESTCASE(xor3, backend) {
340  Xapian::Database db = get_database("apitest_simpledata");
341 
342  static const char * const subqs[] = {
343  "this", "hack", "which", "paragraph", "is", "return", "this", "this"
344  };
345  // Document where the subqueries run out *does* match XOR:
346  Xapian::Query q(Xapian::Query::OP_XOR, subqs + 1, subqs + 6);
347  Xapian::Enquire enq(db);
348  enq.set_query(q);
349  Xapian::MSet mset = enq.get_mset(0, 10);
350 
351  TEST_EQUAL(mset.size(), 3);
352  TEST_EQUAL(*mset[0], 4);
353  TEST_EQUAL(*mset[1], 2);
354  TEST_EQUAL(*mset[2], 3);
355 
356  // Document where the subqueries run out *does not* match XOR:
357  q = Xapian::Query(Xapian::Query::OP_XOR, subqs + 1, subqs + 5);
358  enq.set_query(q);
359  mset = enq.get_mset(0, 10);
360 
361  TEST_EQUAL(mset.size(), 4);
362  TEST_EQUAL(*mset[0], 5);
363  TEST_EQUAL(*mset[1], 4);
364  TEST_EQUAL(*mset[2], 2);
365  TEST_EQUAL(*mset[3], 3);
366 
367  // Tests that XOR subqueries that match all docs are handled well when
368  // calculating min/est/max match counts.
369  q = Xapian::Query(Xapian::Query::OP_XOR, subqs, subqs + 2);
370  enq.set_query(q);
371  mset = enq.get_mset(0, 0);
372  TEST_EQUAL(mset.size(), 0);
376 
377  q = Xapian::Query(Xapian::Query::OP_XOR, subqs + 5, subqs + 7);
378  enq.set_query(q);
379  mset = enq.get_mset(0, 0);
380  TEST_EQUAL(mset.size(), 0);
384 
385  q = Xapian::Query(Xapian::Query::OP_XOR, subqs + 5, subqs + 8);
386  enq.set_query(q);
387  mset = enq.get_mset(0, 0);
388  TEST_EQUAL(mset.size(), 0);
392 }
393 
395 DEFINE_TESTCASE(nonutf8termdesc1, !backend) {
396  TEST_EQUAL(Xapian::Query("\xc0\x80\xf5\x80\x80\x80\xfe\xff").get_description(),
397  "Query(\\xc0\\x80\\xf5\\x80\\x80\\x80\\xfe\\xff)");
398  TEST_EQUAL(Xapian::Query(string("\x00\x1f", 2)).get_description(),
399  "Query(\\x00\\x1f)");
400  // Check that backslashes are encoded so output isn't ambiguous.
401  TEST_EQUAL(Xapian::Query("back\\slash").get_description(),
402  "Query(back\\x5cslash)");
403  // Check that \x7f is escaped.
404  TEST_EQUAL(Xapian::Query("D\x7f_\x7f~").get_description(),
405  "Query(D\\x7f_\\x7f~)");
406 }
407 
409 DEFINE_TESTCASE(queryintro1, !backend) {
411  TEST_EQUAL(Xapian::Query::MatchAll.get_num_subqueries(), 0);
413  TEST_EQUAL(Xapian::Query::MatchNothing.get_num_subqueries(), 0);
414 
415  Xapian::Query q;
421 
422  q = Xapian::Query("foo", 2, 1);
423  TEST_EQUAL(q.get_leaf_wqf(), 2);
424  TEST_EQUAL(q.get_leaf_pos(), 1);
425 
426  q = Xapian::Query("bar");
427  TEST_EQUAL(q.get_leaf_wqf(), 1);
428  TEST_EQUAL(q.get_leaf_pos(), 0);
429 
430  q = Xapian::Query("foo") & Xapian::Query("bar");
431  TEST_EQUAL(q.get_type(), q.OP_AND);
432 
433  q = Xapian::Query("foo") &~ Xapian::Query("bar");
435 
436  q = ~Xapian::Query("bar");
438 
439  q = Xapian::Query("foo") | Xapian::Query("bar");
440  TEST_EQUAL(q.get_type(), q.OP_OR);
441 
442  q = Xapian::Query("foo") ^ Xapian::Query("bar");
443  TEST_EQUAL(q.get_type(), q.OP_XOR);
444 
445  q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
449 
450  q = Xapian::Query("one") / 2.0;
454 
456  TEST_EQUAL(q.get_type(), q.OP_NEAR);
460 
462  TEST_EQUAL(q.get_type(), q.OP_PHRASE);
466 }
467 
469 // We were incorrectly converting a term which indexed all docs and was used
470 // in an unweighted phrase into an all docs postlist, so check that this
471 // case actually works.
472 DEFINE_TESTCASE(phrasealldocs1, backend) {
473  Xapian::Database db = get_database("apitest_declen");
474  Xapian::Query q;
475  static const char * const phrase[] = { "this", "is", "the" };
477  Xapian::Query("paragraph"),
478  Xapian::Query(q.OP_PHRASE, phrase, phrase + 3));
479  Xapian::Enquire enq(db);
480  enq.set_query(q);
481  Xapian::MSet mset = enq.get_mset(0, 10);
482  TEST_EQUAL(mset.size(), 3);
483 }
484 
486  const char * pattern;
488  char max_type;
489  const char * terms[4];
490 };
491 
492 #define WILDCARD_EXCEPTION { 0, 0, 0, "" }
493 static const
495  // Tries to expand to 7 terms.
496  { "th", 6, 'E', WILDCARD_EXCEPTION },
497  { "thou", 1, 'E', { "though", 0, 0, 0 } },
498  { "s", 2, 'F', { "say", "search", 0, 0 } },
499  { "s", 2, 'M', { "simpl", "so", 0, 0 } }
500 };
501 
502 DEFINE_TESTCASE(wildcard1, backend) {
503  // FIXME: The counting of terms the wildcard expands to is per subdatabase,
504  // so the wildcard may expand to more terms than the limit if some aren't
505  // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
506  // frequency from the subdatabase, and so may select different terms in
507  // each subdatabase.
508  SKIP_TEST_FOR_BACKEND("multi");
509  Xapian::Database db = get_database("apitest_simpledata");
510  Xapian::Enquire enq(db);
512 
513  for (auto&& test : wildcard1_testcases) {
514  tout << test.pattern << '\n';
515  auto tend = test.terms + 4;
516  while (tend[-1] == NULL) --tend;
517  bool expect_exception = (tend - test.terms == 4 && tend[-1][0] == '\0');
518  Xapian::Query q;
519  if (test.max_type) {
520  int max_type;
521  switch (test.max_type) {
522  case 'E':
524  break;
525  case 'F':
527  break;
528  case 'M':
530  break;
531  default:
532  FAIL_TEST("Unexpected max_type value");
533  }
534  q = Xapian::Query(o, test.pattern, test.max_expansion, max_type);
535  } else {
536  q = Xapian::Query(o, test.pattern, test.max_expansion);
537  }
538  enq.set_query(q);
539  try {
540  Xapian::MSet mset = enq.get_mset(0, 10);
541  TEST(!expect_exception);
542  q = Xapian::Query(q.OP_SYNONYM, test.terms, tend);
543  enq.set_query(q);
544  Xapian::MSet mset2 = enq.get_mset(0, 10);
545  TEST_EQUAL(mset.size(), mset2.size());
546  TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
547  } catch (const Xapian::WildcardError &) {
548  TEST(expect_exception);
549  }
550  }
551 }
552 
554 DEFINE_TESTCASE(wildcard2, backend) {
555  // FIXME: The counting of terms the wildcard expands to is per subdatabase,
556  // so the wildcard may expand to more terms than the limit if some aren't
557  // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
558  // frequency from the subdatabase, and so may select different terms in
559  // each subdatabase.
560  SKIP_TEST_FOR_BACKEND("multi");
561  Xapian::Database db = get_database("apitest_simpledata");
562  Xapian::Enquire enq(db);
564 
565  const int max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
566  Xapian::Query q0(o, "w", 2, max_type);
567  Xapian::Query q(o, "s", 2, max_type);
568  Xapian::Query q2(o, "t", 2, max_type);
569  q = Xapian::Query(q.OP_OR, q0, q);
570  q = Xapian::Query(q.OP_OR, q, q2);
571  enq.set_query(q);
572  Xapian::MSet mset = enq.get_mset(0, 10);
573  TEST_EQUAL(mset.size(), 6);
574 }
575 
581 DEFINE_TESTCASE(wildcard3, backend) {
582  Xapian::Database db = get_database("wildcard3",
583  [](Xapian::WritableDatabase& wdb,
584  const string&)
585  {
586  Xapian::Document doc;
587  doc.add_term("Zfoo");
588  doc.add_term("a");
589  wdb.add_document(doc);
590  doc.add_term("abc");
591  wdb.add_document(doc);
592  });
593 
594  Xapian::Enquire enq(db);
597  enq.set_query(q);
598  Xapian::MSet mset = enq.get_mset(0, 10);
599  TEST_EQUAL(mset.size(), 2);
600 }
601 
606 DEFINE_TESTCASE(wildcard4, backend) {
607  Xapian::Database db = get_database("apitest_simpledata");
608  Xapian::Enquire enq(db);
612  q |= Xapian::Query("xyzzy");
613  q |= Xapian::Query("use");
614  enq.set_query(q);
615  Xapian::MSet mset = enq.get_mset(0, 10);
616  TEST_EQUAL(mset.size(), 4);
617  TEST_EQUAL(mset[0].get_percent(), 25);
618  TEST_EQUAL_DOUBLE(mset.get_termweight("up"), 1.48489483900601);
619  // The exact termweight value here depends on the backend, but before the
620  // bug fix we were doubling the termweight of "use".
621  TEST_REL(mset.get_termweight("use"), <, 0.9);
622  TEST_EQUAL(mset.get_termweight("xyzzy"), 0.0);
623  // Enquire::get_matching_terms_begin() doesn't report terms from wildcard
624  // expansion, but it should report an explicit query term which also
625  // happens be in a wildcard expansion.
626  string terms;
627  for (auto t = enq.get_matching_terms_begin(*mset[1]);
628  t != enq.get_matching_terms_end(*mset[1]);
629  ++t) {
630  if (!terms.empty()) terms += ' ';
631  terms += *t;
632  }
633  TEST_EQUAL(terms, "use");
634 }
635 
636 DEFINE_TESTCASE(dualprefixwildcard1, backend) {
637  Xapian::Database db = get_database("apitest_simpledata");
641  tout << q.get_description() << '\n';
642  Xapian::Enquire enq(db);
643  enq.set_query(q);
644  TEST_EQUAL(enq.get_mset(0, 5).size(), 2);
645 }
646 
648 DEFINE_TESTCASE(specialwildcard1, !backend) {
651 
652  // Empty wildcard -> MatchNothing.
653  TEST_EQUAL(Xapian::Query(o, "", 0, f).get_description(), "Query()");
654 
655  // "*", "?*", etc -> MatchAll.
656 #define QUERY_ALLDOCS "Query(<alldocuments>)"
657  TEST_EQUAL(Xapian::Query(o, "*", 0, f).get_description(), QUERY_ALLDOCS);
658  TEST_EQUAL(Xapian::Query(o, "**", 0, f).get_description(), QUERY_ALLDOCS);
659  TEST_EQUAL(Xapian::Query(o, "?*", 0, f).get_description(), QUERY_ALLDOCS);
660  TEST_EQUAL(Xapian::Query(o, "*?", 0, f).get_description(), QUERY_ALLDOCS);
661  TEST_EQUAL(Xapian::Query(o, "*?*", 0, f).get_description(), QUERY_ALLDOCS);
662 }
663 
664 static void
666 {
667  {
668  Xapian::Document doc;
669  doc.add_term("test");
670  db.add_document(doc);
671  }
672  {
673  Xapian::Document doc;
674  doc.add_term("t\xc3\xaast");
675  db.add_document(doc);
676  }
677  {
678  Xapian::Document doc;
679  doc.add_term("t\xe1\x80\x80st");
680  db.add_document(doc);
681  }
682  {
683  Xapian::Document doc;
684  doc.add_term("t\xf3\x80\x80\x80st");
685  db.add_document(doc);
686  }
687  {
688  Xapian::Document doc;
689  doc.add_term("toast");
690  db.add_document(doc);
691  }
692  {
693  Xapian::Document doc;
694  doc.add_term("t*t");
695  db.add_document(doc);
696  }
697 }
698 
700 DEFINE_TESTCASE(singlecharwildcard1, backend) {
701  Xapian::Database db = get_database("singlecharwildcard1",
703  Xapian::Enquire enq(db);
705 
708 
709  {
710  // Check that `?` matches one Unicode character.
711  enq.set_query(Xapian::Query(o, "t?st", 0, f));
712  Xapian::MSet mset = enq.get_mset(0, 100);
713  mset_expect_order(mset, 1, 2, 3, 4);
714  }
715 
716  {
717  // Check that `??` doesn't match a single two-byte UTF-8 character.
718  enq.set_query(Xapian::Query(o, "t??st", 0, f));
719  Xapian::MSet mset = enq.get_mset(0, 100);
720  mset_expect_order(mset, 5);
721  }
722 
723  {
724  // Check that `*` is handled as a literal character not a wildcard.
725  enq.set_query(Xapian::Query(o, "t*t", 0, f));
726  Xapian::MSet mset = enq.get_mset(0, 100);
727  mset_expect_order(mset, 6);
728  }
729 }
730 
731 static void
733 {
734  {
735  Xapian::Document doc;
736  doc.add_term("ananas");
737  db.add_document(doc);
738  }
739  {
740  Xapian::Document doc;
741  doc.add_term("annas");
742  db.add_document(doc);
743  }
744  {
745  Xapian::Document doc;
746  doc.add_term("bananas");
747  db.add_document(doc);
748  }
749  {
750  Xapian::Document doc;
751  doc.add_term("banannas");
752  db.add_document(doc);
753  }
754  {
755  Xapian::Document doc;
756  doc.add_term("b?nanas");
757  db.add_document(doc);
758  }
759 }
760 
762 DEFINE_TESTCASE(multicharwildcard1, backend) {
763  Xapian::Database db = get_database("multicharwildcard1",
765  Xapian::Enquire enq(db);
767 
770 
771  {
772  // Check `*` can handle partial matches before and after.
773  enq.set_query(Xapian::Query(o, "b*anas", 0, f));
774  Xapian::MSet mset = enq.get_mset(0, 100);
775  mset_expect_order(mset, 3, 5);
776  }
777 
778  {
779  // Check leading `*` works.
780  enq.set_query(Xapian::Query(o, "*anas", 0, f));
781  Xapian::MSet mset = enq.get_mset(0, 100);
782  mset_expect_order(mset, 1, 3, 5);
783  }
784 
785  {
786  // Check more than one `*` works.
787  enq.set_query(Xapian::Query(o, "*ann*", 0, f));
788  Xapian::MSet mset = enq.get_mset(0, 100);
789  mset_expect_order(mset, 2, 4);
790  }
791 
792  {
793  // Check that `?` is handled as a literal character not a wildcard.
794  enq.set_query(Xapian::Query(o, "b?n*", 0, f));
795  Xapian::MSet mset = enq.get_mset(0, 100);
796  mset_expect_order(mset, 5);
797  }
798 }
799 
801  const char* target;
802  unsigned edit_distance;
804  char max_type;
805  const char* terms[4];
806 };
807 
808 #define EDITDIST_EXCEPTION { 0, 0, 0, "" }
809 static const
811  // Tries to expand to 9 terms.
812  { "muse", 2, 8, 'E', EDITDIST_EXCEPTION },
813  { "museum", 3, 3, 'E', { "mset", "must", "use", 0 } },
814  { "thou", 0, 9, 'E', { 0, 0, 0, 0 } },
815  { "though", 0, 9, 'E', { "though", 0, 0, 0 } },
816  { "museum", 3, 1, 'F', { "mset", 0, 0, 0 } },
817  { "museum", 3, 1, 'M', { "use", 0, 0, 0 } },
818 };
819 
820 DEFINE_TESTCASE(editdist1, backend) {
821  // FIXME: The counting of terms the subquery expands to is per subdatabase,
822  // so it may expand to more terms than the limit if some aren't in all
823  // subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the frequency from
824  // the subdatabase, and so may select different terms in each subdatabase.
825  SKIP_TEST_FOR_BACKEND("multi");
826  Xapian::Database db = get_database("apitest_simpledata");
827  Xapian::Enquire enq(db);
829 
830  for (auto&& test : editdist1_testcases) {
831  tout << test.target << '\n';
832  auto tend = test.terms + 4;
833  while (tend > test.terms && tend[-1] == NULL) --tend;
834  bool expect_exception = (tend - test.terms == 4 && tend[-1][0] == '\0');
835  Xapian::Query q;
836  int max_type;
837  switch (test.max_type) {
838  case 'E':
840  break;
841  case 'F':
843  break;
844  case 'M':
846  break;
847  default:
848  FAIL_TEST("Unexpected max_type value");
849  }
850  q = Xapian::Query(o, test.target, test.max_expansion, max_type,
851  q.OP_SYNONYM, test.edit_distance);
852  enq.set_query(q);
853  tout << q.get_description() << '\n';
854  try {
855  Xapian::MSet mset = enq.get_mset(0, 10);
856  TEST(!expect_exception);
857  q = Xapian::Query(q.OP_SYNONYM, test.terms, tend);
858  enq.set_query(q);
859  Xapian::MSet mset2 = enq.get_mset(0, 10);
860  TEST_EQUAL(mset.size(), mset2.size());
861  TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
862  } catch (const Xapian::WildcardError&) {
863  TEST(expect_exception);
864  }
865  }
866 }
867 
868 // u8"foo" is const char8_t[] in C++20 and later.
869 #define UTF8(X) reinterpret_cast<const char*>(u8"" X "")
870 
871 static const
873  { UTF8("\U00010000"), 1, 8, 'E', { UTF8("a\U00010000"), 0, 0, 0 } },
874 };
875 
877 DEFINE_TESTCASE(editdist2, backend) {
878  Xapian::Database db = get_database("editdist2",
879  [](Xapian::WritableDatabase& wdb,
880  const string&)
881  {
882  Xapian::Document doc;
883  doc.add_term(UTF8("a\U00010000"));
884  wdb.add_document(doc);
885  });
886  Xapian::Enquire enq(db);
888 
889  for (auto&& test : editdist2_testcases) {
890  tout << test.target << '\n';
891  auto tend = test.terms + 4;
892  while (tend > test.terms && tend[-1] == NULL) --tend;
893  bool expect_exception = (tend - test.terms == 4 && tend[-1][0] == '\0');
894  Xapian::Query q;
895  int max_type;
896  switch (test.max_type) {
897  case 'E':
899  break;
900  case 'F':
902  break;
903  case 'M':
905  break;
906  default:
907  FAIL_TEST("Unexpected max_type value");
908  }
909  q = Xapian::Query(o, test.target, test.max_expansion, max_type,
910  q.OP_SYNONYM, test.edit_distance);
911  enq.set_query(q);
912  tout << q.get_description() << '\n';
913  try {
914  Xapian::MSet mset = enq.get_mset(0, 10);
915  TEST(!expect_exception);
916  q = Xapian::Query(q.OP_SYNONYM, test.terms, tend);
917  enq.set_query(q);
918  Xapian::MSet mset2 = enq.get_mset(0, 10);
919  TEST_EQUAL(mset.size(), mset2.size());
920  TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
921  } catch (const Xapian::WildcardError&) {
922  TEST(expect_exception);
923  }
924  }
925 }
926 
927 DEFINE_TESTCASE(dualprefixeditdist1, backend) {
928  Xapian::Database db = get_database("dualprefixeditdist1",
929  [](Xapian::WritableDatabase& wdb,
930  const string&)
931  {
932  Xapian::Document doc;
933  doc.add_term("opossum");
934  doc.add_term("possum");
935  wdb.add_document(doc);
936  doc.clear_terms();
937  doc.add_term("Spossums");
938  wdb.add_document(doc);
939  });
940 
941  auto OP_EDIT_DISTANCE = Xapian::Query::OP_EDIT_DISTANCE;
942  auto OP_SYNONYM = Xapian::Query::OP_SYNONYM;
943  Xapian::Query q0(OP_EDIT_DISTANCE, "possum");
944  Xapian::Query q1(OP_EDIT_DISTANCE, "Spossum", 0, 0, OP_SYNONYM, 2, 1);
945  Xapian::Query q(OP_SYNONYM, q0, q1);
946  tout << q.get_description() << '\n';
947  Xapian::Enquire enq(db);
948  enq.set_query(q0);
949  Xapian::MSet mset = enq.get_mset(0, 5);
950  TEST_EQUAL(mset.size(), 1);
951  TEST_EQUAL(*mset[0], 1);
952  enq.set_query(q1);
953  mset = enq.get_mset(0, 5);
954  TEST_EQUAL(mset.size(), 1);
955  TEST_EQUAL(*mset[0], 2);
956  enq.set_query(q);
957  mset = enq.get_mset(0, 5);
958  TEST_EQUAL(mset.size(), 2);
959 }
960 
962  int window;
963  const char * terms[4];
965 };
966 
967 static const
969  { 5, { "expect", "to", "mset", 0 }, 0 },
970  { 5, { "word", "well", "the", 0 }, 2 },
971  { 5, { "if", "word", "doesnt", 0 }, 0 },
972  { 5, { "at", "line", "three", 0 }, 0 },
973  { 5, { "paragraph", "other", "the", 0 }, 0 },
974  { 5, { "other", "the", "with", 0 }, 0 }
975 };
976 
978 DEFINE_TESTCASE(loosephrase1, backend) {
979  Xapian::Database db = get_database("apitest_simpledata");
980  Xapian::Enquire enq(db);
981 
982  for (auto&& test : loosephrase1_testcases) {
983  auto tend = test.terms + 4;
984  while (tend[-1] == NULL) --tend;
985  auto OP_PHRASE = Xapian::Query::OP_PHRASE;
986  Xapian::Query q(OP_PHRASE, test.terms, tend, test.window);
987  enq.set_query(q);
988  Xapian::MSet mset = enq.get_mset(0, 10);
989  if (test.result == 0) {
990  TEST(mset.empty());
991  } else {
992  TEST_EQUAL(mset.size(), 1);
993  TEST_EQUAL(*mset[0], test.result);
994  }
995  }
996 }
997 
998 static const
1000  { 4, { "test", "the", "with", 0 }, 1 },
1001  { 4, { "expect", "word", "the", 0 }, 2 },
1002  { 4, { "line", "be", "blank", 0 }, 1 },
1003  { 2, { "banana", "banana", 0, 0 }, 0 },
1004  { 3, { "banana", "banana", 0, 0 }, 0 },
1005  { 2, { "word", "word", 0, 0 }, 2 },
1006  { 4, { "work", "meant", "work", 0 }, 0 },
1007  { 4, { "this", "one", "yet", "one" }, 0 }
1008 };
1009 
1011 DEFINE_TESTCASE(loosenear1, backend) {
1012  Xapian::Database db = get_database("apitest_simpledata");
1013  Xapian::Enquire enq(db);
1014 
1015  for (auto&& test : loosenear1_testcases) {
1016  auto tend = test.terms + 4;
1017  while (tend[-1] == NULL) --tend;
1018  Xapian::Query q(Xapian::Query::OP_NEAR, test.terms, tend, test.window);
1019  enq.set_query(q);
1020  Xapian::MSet mset = enq.get_mset(0, 10);
1021  if (test.result == 0) {
1022  TEST(mset.empty());
1023  } else {
1024  TEST_EQUAL(mset.size(), 1);
1025  TEST_EQUAL(*mset[0], test.result);
1026  }
1027  }
1028 }
1029 
1031 DEFINE_TESTCASE(complexphrase1, backend) {
1032  Xapian::Database db = get_database("apitest_simpledata");
1033  Xapian::Enquire enq(db);
1035  Xapian::Query("a") | Xapian::Query("b"),
1036  Xapian::Query("i"));
1037  enq.set_query(query);
1038  TEST(enq.get_mset(0, 10).empty());
1040  Xapian::Query("a") | Xapian::Query("b"),
1041  Xapian::Query("c"));
1042  enq.set_query(query2);
1043  TEST(enq.get_mset(0, 10).empty());
1044 }
1045 
1047 DEFINE_TESTCASE(complexnear1, backend) {
1048  Xapian::Database db = get_database("apitest_simpledata");
1049  Xapian::Enquire enq(db);
1051  Xapian::Query("a") | Xapian::Query("b"),
1052  Xapian::Query("i"));
1053  enq.set_query(query);
1054  TEST(enq.get_mset(0, 10).empty());
1056  Xapian::Query("a") | Xapian::Query("b"),
1057  Xapian::Query("c"));
1058  enq.set_query(query2);
1059  TEST(enq.get_mset(0, 10).empty());
1060 }
1061 
1063 DEFINE_TESTCASE(complexphrase2, backend) {
1064  Xapian::Database db = get_database("apitest_simpledata");
1065  Xapian::Enquire enq(db);
1067  Xapian::Query subqs[3] = {
1069  Xapian::Query("a"),
1070  Xapian::Query(&ps)),
1072  Xapian::Query("and"),
1075  Xapian::Query("at"),
1077  };
1078  Xapian::Query query(Xapian::Query::OP_OR, subqs, subqs + 3);
1079  enq.set_query(query);
1080  (void)enq.get_mset(0, 10);
1081 }
1082 
1084 DEFINE_TESTCASE(complexnear2, backend) {
1085  Xapian::Database db = get_database("apitest_simpledata");
1086  Xapian::Enquire enq(db);
1088  Xapian::Query subqs[3] = {
1090  Xapian::Query("a"),
1091  Xapian::Query(&ps)),
1093  Xapian::Query("and"),
1096  Xapian::Query("at"),
1098  };
1099  Xapian::Query query(Xapian::Query::OP_OR, subqs, subqs + 3);
1100  enq.set_query(query);
1101  (void)enq.get_mset(0, 10);
1102 }
1103 
1105 DEFINE_TESTCASE(zeroestimate1, backend) {
1106  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1108  Xapian::Query("absolute"),
1109  Xapian::Query("rubbish"));
1110  enquire.set_query(phrase &~ Xapian::Query("queri"));
1111  Xapian::MSet mset = enquire.get_mset(0, 0);
1112  TEST_EQUAL(mset.get_matches_estimated(), 0);
1113 }
1114 
1116 DEFINE_TESTCASE(complexphrase3, backend) {
1117  Xapian::Database db = get_database("apitest_simpledata");
1118  Xapian::Enquire enq(db);
1120  Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"),
1121  Xapian::Query("a"));
1122  enq.set_query(query);
1123  mset_expect_order(enq.get_mset(0, 10), 1);
1125  Xapian::Query("a"),
1126  Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"));
1127  enq.set_query(query2);
1128  mset_expect_order(enq.get_mset(0, 10));
1130  Xapian::Query("one") | Xapian::Query("with"),
1131  Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"));
1132  enq.set_query(query3);
1133  mset_expect_order(enq.get_mset(0, 10), 1, 4, 5);
1135  Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"),
1136  Xapian::Query("one") | Xapian::Query("with"));
1137  enq.set_query(query4);
1138  mset_expect_order(enq.get_mset(0, 10));
1139 }
1140 
1142 DEFINE_TESTCASE(complexnear3, backend) {
1143  Xapian::Database db = get_database("apitest_simpledata");
1144  Xapian::Enquire enq(db);
1146  Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"),
1147  Xapian::Query("a"));
1148  enq.set_query(query);
1149  mset_expect_order(enq.get_mset(0, 10), 1);
1151  Xapian::Query("a"),
1152  Xapian::Query("is") | Xapian::Query("as") | Xapian::Query("be"));
1153  enq.set_query(query2);
1154  mset_expect_order(enq.get_mset(0, 10), 1);
1156  Xapian::Query("one") | Xapian::Query("with"),
1157  Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"));
1158  enq.set_query(query3);
1159  mset_expect_order(enq.get_mset(0, 10), 1, 4, 5);
1161  Xapian::Query("the") | Xapian::Query("of") | Xapian::Query("line"),
1162  Xapian::Query("one") | Xapian::Query("with"));
1163  enq.set_query(query4);
1164  mset_expect_order(enq.get_mset(0, 10), 1, 4, 5);
1165 }
1166 
1167 static void
1169 {
1170  Xapian::Document doc;
1171  doc.add_term("this");
1172  doc.add_term("paragraph");
1173  doc.add_term("wibble", 5);
1174  db.add_document(doc);
1175 }
1176 
1177 DEFINE_TESTCASE(subdbwithoutpos1, backend) {
1178  Xapian::Database db(get_database("apitest_simpledata"));
1179  TEST(db.has_positions());
1180 
1182  Xapian::Query("this"),
1183  Xapian::Query("paragraph"));
1184 
1186  Xapian::Query("this"),
1187  Xapian::Query("paragraph"));
1188 
1189  Xapian::Enquire enq1(db);
1190  enq1.set_query(q_near);
1191  Xapian::MSet mset1 = enq1.get_mset(0, 10);
1192  TEST_EQUAL(mset1.size(), 3);
1193 
1194  enq1.set_query(q_phrase);
1195  mset1 = enq1.get_mset(0, 10);
1196  TEST_EQUAL(mset1.size(), 3);
1197 
1198  Xapian::Database db2 =
1199  get_database("subdbwithoutpos1", gen_subdbwithoutpos1_db);
1200  TEST(!db2.has_positions());
1201 
1202  // If a database has no positional info, we used to map OP_PHRASE and
1203  // OP_NEAR to OP_AND, but since 2.0.0 we no longer do.
1204  Xapian::Enquire enq2(db2);
1205  enq2.set_query(q_near);
1206  Xapian::MSet mset2 = enq2.get_mset(0, 10);
1207  TEST_EQUAL(mset2.size(), 0);
1208 
1209  enq2.set_query(q_phrase);
1210  mset2 = enq2.get_mset(0, 10);
1211  TEST_EQUAL(mset2.size(), 0);
1212 
1213  // If one sub-database in a combined database has no positional info but
1214  // other sub-databases do, then we shouldn't convert OP_PHRASE to OP_AND
1215  // (but prior to 1.4.3 we did).
1216  db.add_database(db2);
1217  TEST(db.has_positions());
1218 
1219  Xapian::Enquire enq3(db);
1220  enq3.set_query(q_near);
1221  Xapian::MSet mset3 = enq3.get_mset(0, 10);
1222  TEST_EQUAL(mset3.size(), 3);
1223  // Regression test for bug introduced in 1.4.3 which led to a division by
1224  // zero and then (at least on Linux) we got 1% here.
1225  TEST_EQUAL(mset3[0].get_percent(), 100);
1226 
1227  enq3.set_query(q_phrase);
1228  mset3 = enq3.get_mset(0, 10);
1229  TEST_EQUAL(mset3.size(), 3);
1230  // Regression test for bug introduced in 1.4.3 which led to a division by
1231  // zero and then (at least on Linux) we got 1% here.
1232  TEST_EQUAL(mset3[0].get_percent(), 100);
1233 
1234  // Regression test for https://trac.xapian.org/ticket/752
1235  auto q = (Xapian::Query("this") & q_phrase) | Xapian::Query("wibble");
1236  enq3.set_query(q);
1237  mset3 = enq3.get_mset(0, 10);
1238  TEST_EQUAL(mset3.size(), 4);
1239 }
1240 
1241 // Regression test for bug fixed in 1.4.4 and 1.2.25.
1242 DEFINE_TESTCASE(notandor1, backend) {
1243  Xapian::Database db(get_database("etext"));
1244  using Xapian::Query;
1245  Query q = Query("the") &~ (Query("friedrich") &
1246  (Query("day") | Query("night")));
1247  Xapian::Enquire enq(db);
1248  enq.set_query(q);
1249 
1250  Xapian::MSet mset = enq.get_mset(0, 10, db.get_doccount());
1251  TEST_EQUAL(mset.get_matches_estimated(), 344);
1252 }
1253 
1254 // Regression test for bug fixed in git master before 2.0.0.
1255 DEFINE_TESTCASE(boolorbug1, backend) {
1256  Xapian::Database db(get_database("etext"));
1257  using Xapian::Query;
1258  Query q = Query("the") &~ Query(Query::OP_WILDCARD, "pru");
1259  Xapian::Enquire enq(db);
1260  enq.set_query(q);
1261 
1262  Xapian::MSet mset = enq.get_mset(0, 10, db.get_doccount());
1263  // Due to a bug in BoolOrPostList this returned 330 results.
1264  TEST_EQUAL(mset.get_matches_estimated(), 331);
1265 }
1266 
1267 // Regression test for bug introduced in 1.4.13 and fixed in 1.4.14.
1268 DEFINE_TESTCASE(hoistnotbug1, backend) {
1269  Xapian::Database db(get_database("etext"));
1270  using Xapian::Query;
1271  Query q(Query::OP_PHRASE, Query("the"), Query("king"));
1272  q &= ~Query("worldtornado");
1273  q &= Query("a");
1274  Xapian::Enquire enq(db);
1275  enq.set_query(q);
1276 
1277  // This reliably fails before the fix in an assertion build, and may crash
1278  // in other builds.
1279  Xapian::MSet mset = enq.get_mset(0, 10, db.get_doccount());
1280  TEST_EQUAL(mset.get_matches_estimated(), 42);
1281 }
1282 
1283 // Regression test for segfault optimising query on git master before 2.0.0.
1284 DEFINE_TESTCASE(emptynot1, backend) {
1285  Xapian::Database db(get_database("apitest_simpledata"));
1286  Xapian::Enquire enq(db);
1288  Xapian::Query query = Xapian::Query("document") & Xapian::Query("api");
1289  // This range won't match anything, so collapses to MatchNothing as we
1290  // optimise the query.
1292  query,
1294  enq.set_query(query);
1295  Xapian::MSet mset = enq.get_mset(0, 10);
1296  TEST_EQUAL(mset.size(), 1);
1297  // Essentially the same test but with a term which doesn't match anything
1298  // on the right side.
1299  query = Xapian::Query("document") & Xapian::Query("api");
1301  query,
1302  Xapian::Query("nosuchterm"));
1303  enq.set_query(query);
1304  mset = enq.get_mset(0, 10);
1305  TEST_EQUAL(mset.size(), 1);
1306  // Essentially the same test but with a wildcard which doesn't match
1307  // anything on right side.
1308  query = Xapian::Query("document") & Xapian::Query("api");
1310  query,
1311  Xapian::Query(query.OP_WILDCARD, "nosuchwildcard"));
1312  enq.set_query(query);
1313  mset = enq.get_mset(0, 10);
1314  TEST_EQUAL(mset.size(), 1);
1315 }
1316 
1317 // Similar case to emptynot1 but for OP_AND_MAYBE. This case wasn't failing,
1318 // so this isn't a regression test, but we do want to ensure it works.
1319 DEFINE_TESTCASE(emptymaybe1, backend) {
1320  Xapian::Database db(get_database("apitest_simpledata"));
1321  Xapian::Enquire enq(db);
1323  Xapian::Query query = Xapian::Query("document") & Xapian::Query("api");
1324  // This range won't match anything, so collapses to MatchNothing as we
1325  // optimise the query.
1327  query,
1329  enq.set_query(query);
1330  Xapian::MSet mset = enq.get_mset(0, 10);
1331  TEST_EQUAL(mset.size(), 1);
1332  // Essentially the same test but with a term which doesn't match anything
1333  // on the right side.
1334  query = Xapian::Query("document") & Xapian::Query("api");
1336  query,
1337  Xapian::Query("nosuchterm"));
1338  enq.set_query(query);
1339  mset = enq.get_mset(0, 10);
1340  TEST_EQUAL(mset.size(), 1);
1341  // Essentially the same test but with a wildcard which doesn't match
1342  // anything on right side.
1343  query = Xapian::Query("document") & Xapian::Query("api");
1345  query,
1346  Xapian::Query(query.OP_WILDCARD, "nosuchwildcard"));
1347  enq.set_query(query);
1348  mset = enq.get_mset(0, 10);
1349  TEST_EQUAL(mset.size(), 1);
1350 }
1351 
1352 // Regression test for optimisation bug on git master before 2.0.0.
1353 // The query optimiser ignored the NOT part when the LHS contained
1354 // a MatchAll.
1355 DEFINE_TESTCASE(allnot1, backend) {
1356  Xapian::Database db(get_database("apitest_simpledata"));
1357  Xapian::Enquire enq(db);
1359  // This case wasn't a problem, but would have been if the index-all term
1360  // was handled like MatchAll by this optimisation (which it might be in
1361  // future).
1363  Xapian::Query("this"),
1364  Xapian::Query("the")};
1365  enq.set_query(0 * query);
1366  Xapian::MSet mset = enq.get_mset(0, 10);
1367  TEST_EQUAL(mset.size(), 2);
1369  query.MatchAll,
1370  Xapian::Query("the")};
1371  enq.set_query(0 * query);
1372  mset = enq.get_mset(0, 10);
1373  TEST_EQUAL(mset.size(), 2);
1374 }
1375 
1376 // Regression test for optimisation bug on git master before 2.0.0.
1377 // The query optimiser didn't handle the RHS of AND_MAYBE not matching
1378 // anything.
1379 DEFINE_TESTCASE(emptymayberhs1, backend) {
1380  Xapian::Database db(get_database("apitest_simpledata"));
1381  Xapian::Enquire enq(db);
1382  // The RHS doesn't match anything, which now gives a NULL PostList*, and
1383  // we were trying to dereference that in this case.
1385  Xapian::Query("document"),
1386  Xapian::Query("xyzzy"));
1387  enq.set_query(query);
1388  Xapian::MSet mset = enq.get_mset(0, 10);
1389  TEST_EQUAL(mset.size(), 2);
1390 }
1391 
1392 DEFINE_TESTCASE(phraseweightcheckbug1, backend) {
1393  Xapian::Database db(get_database("phraseweightcheckbug1"));
1394  Xapian::Enquire enq(db);
1395  static const char* const words[] = {"hello", "world"};
1396  Xapian::Query query{Xapian::Query::OP_PHRASE, begin(words), end(words), 2};
1398  tout << query.get_description() << '\n';
1399  enq.set_query(query);
1400  Xapian::MSet mset = enq.get_mset(0, 3);
1401  TEST_EQUAL(mset.size(), 3);
1402 }
1403 
1404 DEFINE_TESTCASE(orphanedhint1, backend) {
1405  Xapian::Database db(get_database("apitest_simpledata"));
1406  Xapian::Enquire enq(db);
1407  auto OP_WILDCARD = Xapian::Query::OP_WILDCARD;
1408  Xapian::Query query = Xapian::Query(OP_WILDCARD, "doc") &
1409  Xapian::Query(OP_WILDCARD, "xyzzy");
1410  query |= Xapian::Query("test");
1411  tout << query.get_description() << '\n';
1412  enq.set_query(query);
1413  Xapian::MSet mset = enq.get_mset(0, 3);
1414  TEST_EQUAL(mset.size(), 1);
1415 }
1416 
1417 // Regression test for bugs in initial implementation of query optimisation
1418 // based on docid range information.
1419 DEFINE_TESTCASE(docidrangebugs1, backend) {
1420  Xapian::Database db(get_database("apitest_simpledata"));
1421  Xapian::Enquire enq(db);
1422 
1423  // This triggered a bug in BoolOrPostList::get_docid_range().
1425  Xapian::Query("typo"),
1426  Xapian::Query("rubbish") | Xapian::Query("this"));
1427  enq.set_query(query);
1428  Xapian::MSet mset = enq.get_mset(0, 1);
1429  TEST_EQUAL(mset.size(), 1);
1430 
1432  Xapian::Query("typo"),
1433  Xapian::Query("this") | Xapian::Query("rubbish"));
1434  enq.set_query(query2);
1435  mset = enq.get_mset(0, 1);
1436  TEST_EQUAL(mset.size(), 1);
1437 
1438  // Alternative reproducer where the first term doesn't match any
1439  // documents.
1441  Xapian::Query("typo"),
1442  Xapian::Query("nosuchterm") | Xapian::Query("this"));
1443  enq.set_query(query3);
1444  mset = enq.get_mset(0, 1);
1445  TEST_EQUAL(mset.size(), 1);
1446 
1448  Xapian::Query("typo"),
1449  Xapian::Query("this") | Xapian::Query("nosuchterm"));
1450  enq.set_query(query4);
1451  mset = enq.get_mset(0, 1);
1452  TEST_EQUAL(mset.size(), 1);
1453 }
1454 
1455 DEFINE_TESTCASE(estimateopbug1, backend) {
1456  Xapian::Database db = get_database("estimateopbug1",
1457  [](Xapian::WritableDatabase& wdb,
1458  const string&)
1459  {
1460  Xapian::Document doc;
1461  doc.add_posting("XFgroups", 7);
1462  doc.add_posting("XSchange", 216);
1463  doc.add_posting("XSmember", 214);
1464  wdb.add_document(doc);
1465  Xapian::Document doc2;
1466  doc2.add_boolean_term("XEP");
1467  wdb.add_document(doc2);
1468  });
1470  Xapian::Query{"XSmember"},
1471  Xapian::Query{"XSchange"}};
1472  q = Xapian::Query{"XFgroups"} & (q | Xapian::Query{"XSmember"});
1473  q &= ~Xapian::Query{"XEP"};
1474  Xapian::Enquire enquire(db);
1475  enquire.set_query(q);
1476  Xapian::MSet matches = enquire.get_mset(0, 10);
1477 }
1478 
1481  bool operator()(const Xapian::Document&) const override {
1482  throw Exception_estimateop2();
1483  }
1484 };
1485 
1486 DEFINE_TESTCASE(estimateopbug2, backend && !remote) {
1487  Xapian::Database db = get_database("apitest_simpledata");
1488  Xapian::Query query{"it"};
1489  Xapian::Enquire enq{db};
1490  enq.set_query(query);
1491  MDecider_estimateop2 mdecider;
1492  try {
1493  auto mset = enq.get_mset(0, 10, nullptr, &mdecider);
1494  FAIL_TEST("Expected exception Exception_estimateop2 not thrown");
1495  } catch (const Exception_estimateop2&) {
1496  }
1497 }
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
#define WILDCARD_EXCEPTION
Definition: api_query.cc:492
DEFINE_TESTCASE(queryterms1, !backend)
Definition: api_query.cc:34
static const editdist_testcase editdist1_testcases[]
Definition: api_query.cc:810
static const editdist_testcase editdist2_testcases[]
Definition: api_query.cc:872
static const positional_testcase loosephrase1_testcases[]
Definition: api_query.cc:968
#define QUERY_ALLDOCS
#define EDITDIST_EXCEPTION
Definition: api_query.cc:808
static const wildcard_testcase wildcard1_testcases[]
Definition: api_query.cc:494
#define UTF8(X)
Definition: api_query.cc:869
static void gen_singlecharwildcard1_db(Xapian::WritableDatabase &db, const string &)
Definition: api_query.cc:665
static void gen_multicharwildcard1_db(Xapian::WritableDatabase &db, const string &)
Definition: api_query.cc:732
static const positional_testcase loosenear1_testcases[]
Definition: api_query.cc:999
static void gen_subdbwithoutpos1_db(Xapian::WritableDatabase &db, const string &)
Definition: api_query.cc:1168
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
#define SKIP_TEST_FOR_BACKEND(B)
Definition: apitest.h:84
bool operator()(const Xapian::Document &) const override
Decide whether to accept a document.
Definition: api_query.cc:1481
Class implementing a "boolean" weighting scheme.
Definition: weight.h:678
An indexed database of documents.
Definition: database.h:75
void add_database(const Database &other)
Add shards from another Database.
Definition: database.h:109
bool has_positions() const
Does this database have any positional information?
Definition: database.cc:215
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
Class representing a document.
Definition: document.h:64
void add_boolean_term(std::string_view term)
Add a boolean filter term to the document.
Definition: document.h:145
void add_term(std::string_view term, Xapian::termcount wdf_inc=1)
Add a term to this document.
Definition: document.cc:87
void clear_terms()
Clear all terms from the document.
Definition: document.cc:168
void add_posting(std::string_view term, Xapian::termpos term_pos, Xapian::termcount wdf_inc=1)
Add a posting for a term.
Definition: document.cc:111
Querying session.
Definition: enquire.h:57
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
Definition: enquire.cc:85
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
TermIterator get_matching_terms_begin(docid did) const
Iterate query terms matching a document.
Definition: enquire.cc:210
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
TermIterator get_matching_terms_end(docid) const noexcept
End iterator corresponding to get_matching_terms_begin().
Definition: enquire.h:435
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:467
double get_termweight(std::string_view term) const
Get the term weight of a term.
Definition: mset.cc:300
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: mset.cc:334
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: mset.cc:318
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: mset.cc:324
Abstract base class for match deciders.
Definition: matchdecider.h:37
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:516
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
Definition: queryparser.cc:174
Class representing a query.
Definition: query.h:45
const Query get_subquery(size_t n) const
Read a top level subquery.
Definition: query.cc:289
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Definition: query.cc:198
const TermIterator get_unique_terms_begin() const
Begin iterator for unique terms in the query object.
Definition: query.cc:223
Xapian::termcount get_leaf_wqf() const
Get the wqf parameter of a leaf node.
Definition: query.cc:295
std::string get_description() const
Return a string describing this object.
Definition: query.cc:307
op get_type() const noexcept
Get the type of the top level of the query.
Definition: query.cc:275
const TermIterator get_unique_terms_end() const noexcept
End iterator for unique terms in the query object.
Definition: query.h:653
const TermIterator get_terms_end() const noexcept
End iterator for terms in the query object.
Definition: query.h:639
size_t get_num_subqueries() const noexcept
Get the number of subqueries of the top level query.
Definition: query.cc:283
op
Query operators.
Definition: query.h:78
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166
@ OP_WILDCARD
Wildcard expansion.
Definition: query.h:255
@ OP_XOR
Match documents which an odd number of subqueries match.
Definition: query.h:107
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:290
@ OP_NEAR
Match only documents where all subqueries match near each other.
Definition: query.h:140
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ LEAF_MATCH_NOTHING
Value returned by get_type() for MatchNothing or equivalent.
Definition: query.h:296
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:152
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
Definition: query.h:99
@ OP_EDIT_DISTANCE
Edit distance expansion.
Definition: query.h:269
@ LEAF_TERM
Value returned by get_type() for a term.
Definition: query.h:280
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:223
Xapian::termpos get_leaf_pos() const
Get the pos parameter of a leaf node.
Definition: query.cc:301
static const Xapian::Query MatchNothing
A query matching no documents.
Definition: query.h:64
@ WILDCARD_PATTERN_MULTI
Support * which matches 0 or more characters.
Definition: query.h:330
@ WILDCARD_LIMIT_ERROR
Throw an error if OP_WILDCARD exceeds its expansion limit.
Definition: query.h:305
@ WILDCARD_PATTERN_GLOB
Enable all supported glob-like features.
Definition: query.h:342
@ WILDCARD_LIMIT_FIRST
Stop expanding when OP_WILDCARD reaches its expansion limit.
Definition: query.h:311
@ WILDCARD_LIMIT_MOST_FREQUENT
Limit OP_WILDCARD expansion to the most frequent terms.
Definition: query.h:321
@ WILDCARD_PATTERN_SINGLE
Support ? which matches a single character.
Definition: query.h:336
static const Xapian::Query MatchAll
A query matching all documents.
Definition: query.h:75
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
A posting source which reads weights from a value slot.
WildcardError indicates an error expanding a wildcarded query.
Definition: error.h:1001
This class provides read/write access to a database.
Definition: database.h:964
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Xapian::termcount max_expansion
Definition: api_query.cc:803
unsigned edit_distance
Definition: api_query.cc:802
const char * target
Definition: api_query.cc:801
Xapian::docid result
Definition: api_query.cc:964
const char * pattern
Definition: api_query.cc:486
Xapian::termcount max_expansion
Definition: api_query.cc:487
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
a generic test suite engine
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:65
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:285
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:293
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:224
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Definition: testutils.cc:45
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:112
Public interfaces for the Xapian library.