xapian-core  2.0.0
api_opvalue.cc
Go to the documentation of this file.
1 
4 /* Copyright 2007,2008,2009,2010,2010,2011,2017,2019 Olly Betts
5  * Copyright 2008 Lemur Consulting Ltd
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see
20  * <https://www.gnu.org/licenses/>.
21  */
22 
23 #include <config.h>
24 
25 #include "api_opvalue.h"
26 
27 #include <xapian.h>
28 
29 #include "apitest.h"
30 #include "testsuite.h"
31 #include "testutils.h"
32 
33 #include <string>
34 
35 using namespace std;
36 
37 // Feature test for Query::OP_VALUE_RANGE.
38 DEFINE_TESTCASE(valuerange1, backend) {
39  Xapian::Database db(get_database("apitest_phrase"));
40  Xapian::Enquire enq(db);
41  static const char * const vals[] = {
42  "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z"
43  };
44  for (auto start : vals) {
45  for (auto end : vals) {
47  enq.set_query(query);
48  Xapian::MSet mset = enq.get_mset(0, 20);
49  // Check that documents in the MSet match the value range filter.
50  set<Xapian::docid> matched;
52  for (i = mset.begin(); i != mset.end(); ++i) {
53  matched.insert(*i);
54  string value = db.get_document(*i).get_value(1);
55  TEST_REL(value,>=,start);
56  TEST_REL(value,<=,end);
57  }
58  // Check that documents not in the MSet don't match the value range filter.
59  for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
60  if (matched.find(j) == matched.end()) {
61  string value = db.get_document(j).get_value(1);
62  tout << value << " < '" << start << "' or > '" << end << "'\n";
63  TEST(value < start || value > end);
64  }
65  }
66  }
67  }
68 }
69 
70 // Regression test for Query::OP_VALUE_LE - used to return document IDs for
71 // non-existent documents.
72 DEFINE_TESTCASE(valuerange2, backend) {
73  Xapian::Database db = get_database("valuerange2",
75  const string&) {
76  Xapian::Document doc;
77  doc.set_data("5");
78  doc.add_value(0, "5");
79  wdb.replace_document(5, doc);
80  });
81  Xapian::Enquire enq(db);
82 
84  enq.set_query(query);
85  Xapian::MSet mset = enq.get_mset(0, 20);
86 
87  TEST_EQUAL(mset.size(), 1);
88  TEST_EQUAL(*(mset[0]), 5);
89 }
90 
91 static void
93 {
94  Xapian::Document doc;
95  doc.add_value(0, "BOOK");
96  db.add_document(doc);
97  doc.add_value(0, "VOLUME");
98  db.add_document(doc);
99 }
100 
101 // Check that lower and upper bounds are used.
102 DEFINE_TESTCASE(valuerange5, backend) {
103  Xapian::Database db = get_database("valuerange5", make_valuerange5);
104 
105  // If the lower bound is empty, either the specified value slot is
106  // never used in the database, or the backend doesn't track value bounds.
107  // Neither should be true here.
108  TEST(!db.get_value_lower_bound(0).empty());
109 
110  Xapian::Enquire enq(db);
111 
112  Xapian::Query query(Xapian::Query::OP_VALUE_RANGE, 0, "APPLE", "BANANA");
113  enq.set_query(query);
114  Xapian::MSet mset = enq.get_mset(0, 0);
116 
117  Xapian::Query query2(Xapian::Query::OP_VALUE_RANGE, 0, "WALRUS", "ZEBRA");
118  enq.set_query(query2);
119  mset = enq.get_mset(0, 0);
121 }
122 
123 static void
125 {
126  Xapian::Document doc;
127  db.add_document(doc);
128  doc.add_value(0, "SINGULAR");
129  db.add_document(doc);
130  db.add_document(doc);
131 }
132 
133 // Check handling of bounds when bounds are equal.
134 DEFINE_TESTCASE(valuerange6, backend) {
135  const auto OP_VALUE_RANGE = Xapian::Query::OP_VALUE_RANGE;
136  Xapian::Database db = get_database("singularvalue", make_singularvalue_db);
137 
138  Xapian::Enquire enq(db);
139 
141  query = Xapian::Query(OP_VALUE_RANGE, 0, "SATSUMA", "SLOE");
142  enq.set_query(query);
143  Xapian::MSet mset = enq.get_mset(0, 0);
147 
148  query = Xapian::Query(OP_VALUE_RANGE, 0, "PEACH", "PLUM");
149  enq.set_query(query);
150  mset = enq.get_mset(0, 0);
154 
155  query = Xapian::Query(OP_VALUE_RANGE, 0, "PEACH", "PEACH");
156  enq.set_query(query);
157  mset = enq.get_mset(0, 0);
161 
162  query = Xapian::Query(OP_VALUE_RANGE, 0, "PEACH", "PEACHERINE");
163  enq.set_query(query);
164  mset = enq.get_mset(0, 0);
168 
169  query = Xapian::Query(OP_VALUE_RANGE, 0, "SING", "SINGULARITY");
170  enq.set_query(query);
171  mset = enq.get_mset(0, 0);
175 
176  query = Xapian::Query(OP_VALUE_RANGE, 0, "SING", "SINGULAR");
177  enq.set_query(query);
178  mset = enq.get_mset(0, 0);
182 
183  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULAR", "SINGULARITY");
184  enq.set_query(query);
185  mset = enq.get_mset(0, 0);
189 
190  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULAR", "SINGULAR");
191  enq.set_query(query);
192  mset = enq.get_mset(0, 0);
196 
197  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "SINGULARITY");
198  enq.set_query(query);
199  mset = enq.get_mset(0, 0);
203 
204  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "SINGULARITIES");
205  enq.set_query(query);
206  mset = enq.get_mset(0, 0);
210 
211  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "SINNER");
212  enq.set_query(query);
213  mset = enq.get_mset(0, 0);
217 
218  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGULARITY", "ZEBRA");
219  enq.set_query(query);
220  mset = enq.get_mset(0, 0);
224 
225  query = Xapian::Query(OP_VALUE_RANGE, 0, "SINGE", "SINGER");
226  enq.set_query(query);
227  mset = enq.get_mset(0, 0);
231 
232  // Check no assertions when slot is empty. Regression test for bug
233  // introduced and fixed between 1.4.5 and 1.4.6.
234  query = Xapian::Query(OP_VALUE_RANGE, 1, "MONK", "MONKEY");
235  enq.set_query(query);
236  mset = enq.get_mset(0, 0);
240 }
241 
242 static void
244 {
245  Xapian::Document doc;
246  db.add_document(doc);
247  doc.add_value(0, "ZERO");
248  db.add_document(doc);
249  doc.add_value(0, string("ZERO\0", 5));
250  db.add_document(doc);
251 }
252 
253 // Check handling of bounds when low is a prefix of high.
254 DEFINE_TESTCASE(valuerange7, backend) {
255  const auto OP_VALUE_RANGE = Xapian::Query::OP_VALUE_RANGE;
256  Xapian::Database db = get_database("valprefixbounds", make_valprefixbounds_db);
257 
258  Xapian::Enquire enq(db);
259 
261  query = Xapian::Query(OP_VALUE_RANGE, 0, "ZAP", "ZOO");
262  enq.set_query(query);
263  Xapian::MSet mset = enq.get_mset(0, 0);
267 
268  query = Xapian::Query(OP_VALUE_RANGE, 0, "ZAP", "ZERO");
269  enq.set_query(query);
270  mset = enq.get_mset(0, 0);
272  if (db.size() > 1) {
273  // The second shard will just have one document with "ZERO" in the slot
274  // so we can tell there's exactly one match there, and the first shard
275  // has one "ZERO\0" and one empty entry, so we can tell that can't
276  // match.
279  } else {
282  }
283 }
284 
285 // Feature test for Query::OP_VALUE_GE.
286 DEFINE_TESTCASE(valuege1, backend) {
287  Xapian::Database db(get_database("apitest_phrase"));
288  Xapian::Enquire enq(db);
289  static const char * const vals[] = {
290  "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z"
291  };
292  for (auto start : vals) {
294  enq.set_query(query);
295  Xapian::MSet mset = enq.get_mset(0, 20);
296  // Check that documents in the MSet match the value range filter.
297  set<Xapian::docid> matched;
299  for (i = mset.begin(); i != mset.end(); ++i) {
300  matched.insert(*i);
301  string value = db.get_document(*i).get_value(1);
302  tout << "'" << start << "' <= '" << value << "'\n";
303  TEST_REL(value,>=,start);
304  }
305  // Check that documents not in the MSet don't match the value range
306  // filter.
307  for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
308  if (matched.find(j) == matched.end()) {
309  string value = db.get_document(j).get_value(1);
310  tout << value << " < '" << start << "'\n";
311  TEST_REL(value,<,start);
312  }
313  }
314  }
315 }
316 
317 // Regression test for Query::OP_VALUE_GE - used to segfault if check() got
318 // called.
319 DEFINE_TESTCASE(valuege2, backend) {
320  Xapian::Database db(get_database("apitest_phrase"));
321  Xapian::Enquire enq(db);
323  Xapian::Query("what"),
325  enq.set_query(query);
326  Xapian::MSet mset = enq.get_mset(0, 20);
327 }
328 
329 // Feature test for Query::OP_VALUE_LE.
330 DEFINE_TESTCASE(valuele1, backend) {
331  Xapian::Database db(get_database("apitest_phrase"));
332  Xapian::Enquire enq(db);
333  static const char * const vals[] = {
334  "", " ", "a", "aa", "abcd", "e", "g", "h", "hzz", "i", "l", "z"
335  };
336  for (auto end : vals) {
338  enq.set_query(query);
339  Xapian::MSet mset = enq.get_mset(0, 20);
340  // Check that documents in the MSet match the value range filter.
341  set<Xapian::docid> matched;
343  for (i = mset.begin(); i != mset.end(); ++i) {
344  matched.insert(*i);
345  string value = db.get_document(*i).get_value(1);
346  TEST_REL(value,<=,end);
347  }
348  // Check that documents not in the MSet don't match the value range
349  // filter.
350  for (Xapian::docid j = db.get_lastdocid(); j != 0; --j) {
351  if (matched.find(j) == matched.end()) {
352  string value = db.get_document(j).get_value(1);
353  TEST_REL(value,>,end);
354  }
355  }
356  }
357 }
358 
359 // Check that Query(OP_VALUE_GE, 0, "") -> Query::MatchAll.
360 DEFINE_TESTCASE(valuege3, !backend) {
363 }
364 
365 // Test Query::OP_VALUE_GE in a query which causes its skip_to() to be used.
366 DEFINE_TESTCASE(valuege4, backend) {
367  Xapian::Database db(get_database("apitest_phrase"));
368  Xapian::Enquire enq(db);
369 
370  // This query should put the ValueGePostList on the LHS of the AND because
371  // it has a lower estimated termfreq than the term "fridg". As a result,
372  // the skip_to() method is used to advance the ValueGePostList.
374  Xapian::Query("fridg"),
376  enq.set_query(query);
377  Xapian::MSet mset = enq.get_mset(0, 20);
378 }
379 
380 // Test Query::OP_VALUE_RANGE in a query which causes its check() to be used.
381 DEFINE_TESTCASE(valuerange3, backend) {
382  Xapian::Database db(get_database("apitest_phrase"));
383  Xapian::Enquire enq(db);
385  Xapian::Query("what"),
387  "aa", "z"));
388  enq.set_query(query);
389  Xapian::MSet mset = enq.get_mset(0, 20);
390 }
391 
392 // Test Query::OP_VALUE_RANGE in a query which causes its skip_to() to be used.
393 DEFINE_TESTCASE(valuerange4, backend) {
394  Xapian::Database db(get_database("apitest_phrase"));
395  Xapian::Enquire enq(db);
397  Xapian::Query("fridg"),
399  "aa", "z"));
400  enq.set_query(query);
401  Xapian::MSet mset = enq.get_mset(0, 20);
402 }
403 
405 DEFINE_TESTCASE(valuerangematchesub1, backend) {
406  Xapian::Database db(get_database("etext"));
407  Xapian::Enquire enq(db);
408  // Values present in slot 10 range from 'e' to 'w'.
410  "h", "i"));
411  enq.set_query(query);
412  Xapian::MSet mset = enq.get_mset(0, 0);
413  // The upper bound used to be db.size().
416  // The estimate used to be db.size() / 2, now it's calculated
417  // proportional to the possible range.
418  TEST_REL(mset.get_matches_estimated(), <=, db.get_doccount() / 3);
419 }
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
static void make_singularvalue_db(Xapian::WritableDatabase &db, const string &)
Definition: api_opvalue.cc:124
static void make_valuerange5(Xapian::WritableDatabase &db, const string &)
Definition: api_opvalue.cc:92
static void make_valprefixbounds_db(Xapian::WritableDatabase &db, const string &)
Definition: api_opvalue.cc:243
DEFINE_TESTCASE(valuerange1, backend)
Definition: api_opvalue.cc:38
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
An indexed database of documents.
Definition: database.h:75
size_t size() const
Return number of shards in this Database object.
Definition: database.cc:105
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
Definition: database.cc:290
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: database.cc:239
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Definition: database.cc:284
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
Definition: database.cc:368
Class representing a document.
Definition: document.h:64
void set_data(std::string_view data)
Set the document data.
Definition: document.cc:81
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
Definition: document.cc:185
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Definition: document.cc:191
Querying session.
Definition: enquire.h:57
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
Iterator over a Xapian::MSet.
Definition: mset.h:535
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: mset.cc:334
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: mset.cc:318
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: mset.cc:324
Class representing a query.
Definition: query.h:45
std::string get_description() const
Return a string describing this object.
Definition: query.cc:307
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
Definition: query.h:158
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
Definition: query.h:231
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:223
static const Xapian::Query MatchAll
A query matching all documents.
Definition: query.h:75
This class provides read/write access to a database.
Definition: database.h:964
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a document in the database.
Definition: database.cc:582
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
a generic test suite engine
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:285
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.