xapian-core  1.4.21
api_matchspy.cc
Go to the documentation of this file.
1 
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2012,2015,2019 Olly Betts
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_matchspy.h"
27 
28 #include <xapian.h>
29 
30 #include <cmath>
31 #include <map>
32 #include <vector>
33 
34 #include "backendmanager.h"
35 #include "str.h"
36 #include "testsuite.h"
37 #include "testutils.h"
38 #include "apitest.h"
39 
40 using namespace std;
41 
42 // #######################################################################
43 // # Tests start here
44 
46  public:
47  // Vector which will be filled with all the document contents seen.
48  std::vector<std::string> seen;
49 
50  void operator()(const Xapian::Document &doc, double) {
51  // Note that this is not recommended usage of get_data() - you
52  // generally shouldn't call get_data() from inside a MatchSpy, because
53  // it is (likely to be) a slow operation resulting in considerable IO.
54  seen.push_back(doc.get_data());
55  }
56 };
57 
58 // Basic test of a matchspy.
59 DEFINE_TESTCASE(matchspy1, backend && !remote) {
60  Xapian::Database db(get_database("apitest_simpledata"));
61  Xapian::Enquire enquire(db);
62  enquire.set_query(Xapian::Query("this"));
63 
64  SimpleMatchSpy myspy;
65 
66  Xapian::MSet nospymset = enquire.get_mset(0, 100);
67  enquire.add_matchspy(&myspy);
68  Xapian::MSet spymset = enquire.get_mset(0, 100);
69 
70  // Check that the match estimates aren't affected by the matchspy.
71  TEST_EQUAL(nospymset, spymset);
72 
73  vector<bool> docid_checked(db.get_lastdocid());
74 
75  // Check that we get the expected number of matches, and that the stored
76  // document contents are right.
77  Xapian::MSetIterator i = spymset.begin();
78  TEST(i != spymset.end());
79  TEST_EQUAL(spymset.size(), 6);
80  TEST_EQUAL(myspy.seen.size(), spymset.size());
81 
82  std::sort(myspy.seen.begin(), myspy.seen.end());
83 
84  std::vector<std::string> seen2;
85  for ( ; i != spymset.end(); ++i) {
86  const Xapian::Document doc(i.get_document());
87  seen2.push_back(doc.get_data());
88  }
89  std::sort(seen2.begin(), seen2.end());
90 
91  TEST_EQUAL(myspy.seen.size(), seen2.size());
92  std::vector<std::string>::const_iterator j = myspy.seen.begin();
93  std::vector<std::string>::const_iterator j2 = seen2.begin();
94  for (; j != myspy.seen.end(); ++j, ++j2) {
95  TEST_EQUAL(*j, *j2);
96  }
97 }
98 
99 static string values_to_repr(const Xapian::ValueCountMatchSpy & spy) {
100  string resultrepr("|");
101  for (Xapian::TermIterator i = spy.values_begin();
102  i != spy.values_end();
103  ++i) {
104  resultrepr += *i;
105  resultrepr += ':';
106  resultrepr += str(i.get_termfreq());
107  resultrepr += '|';
108  }
109  return resultrepr;
110 }
111 
112 static void
114 {
115  for (int c = 1; c <= 25; ++c) {
116  Xapian::Document doc;
117  doc.set_data("Document " + str(c));
118  int factors = 0;
119  for (int factor = 1; factor <= c; ++factor) {
120  doc.add_term("all");
121  if (c % factor == 0) {
122  doc.add_term("XFACT" + str(factor));
123  ++factors;
124  }
125  }
126 
127  // Number of factors.
128  doc.add_value(0, str(factors));
129  // Units digits.
130  doc.add_value(1, str(c % 10));
131  // Constant.
132  doc.add_value(2, "fish");
133  // Number of digits.
134  doc.add_value(3, str(str(c).size()));
135 
136  db.add_document(doc);
137  }
138 }
139 
140 DEFINE_TESTCASE(matchspy2, generated)
141 {
143 
147 
148  Xapian::Enquire enq(db);
149 
150  enq.set_query(Xapian::Query("all"));
151  if (startswith(get_dbtype(), "multi")) {
152  // Without this, we short-cut on the second shard because we don't get
153  // the documents in ascending weight order.
155  }
156 
157  enq.add_matchspy(&spy0);
158  enq.add_matchspy(&spy1);
159  enq.add_matchspy(&spy3);
160  Xapian::MSet mset = enq.get_mset(0, 10);
161 
162  TEST_EQUAL(spy0.get_total(), 25);
163  TEST_EQUAL(spy1.get_total(), 25);
164  TEST_EQUAL(spy3.get_total(), 25);
165 
166  static const char * const results[] = {
167  "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
168  "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
169  "|1:9|2:16|",
170  };
171  TEST_STRINGS_EQUAL(values_to_repr(spy0), results[0]);
172  TEST_STRINGS_EQUAL(values_to_repr(spy1), results[1]);
173  TEST_STRINGS_EQUAL(values_to_repr(spy3), results[2]);
174 }
175 
176 DEFINE_TESTCASE(matchspy4, generated)
177 {
178  XFAIL_FOR_BACKEND("multi_remote",
179  "Matchspy counts hits on remote and locally");
180  XFAIL_FOR_BACKEND("multi_glass_remote",
181  "Matchspy counts hits on remote and locally");
182 
184 
185  // We're going to run the match twice - once sorted by relevance, and once
186  // sorted by a value. This is a regression test - the matcher used to fail
187  // to show some documents to the spy when sorting by non-pure-relevance.
194 
195  Xapian::Enquire enqa(db);
196  Xapian::Enquire enqb(db);
197 
198  enqa.set_query(Xapian::Query("all"));
199  if (startswith(get_dbtype(), "multi")) {
200  // Without this, we short-cut on the second shard because we don't get
201  // the documents in ascending weight order.
203  }
204  enqb.set_query(Xapian::Query("all"));
205 
206  enqa.add_matchspy(&spya0);
207  enqa.add_matchspy(&spya1);
208  enqa.add_matchspy(&spya3);
209  enqb.add_matchspy(&spyb0);
210  enqb.add_matchspy(&spyb1);
211  enqb.add_matchspy(&spyb3);
212 
213  Xapian::MSet mseta = enqa.get_mset(0, 10);
214  enqb.set_sort_by_value(0, false);
215  Xapian::MSet msetb = enqb.get_mset(0, 10, 100);
216 
217  TEST_EQUAL(spya0.get_total(), 25);
218  TEST_EQUAL(spya1.get_total(), 25);
219  TEST_EQUAL(spya3.get_total(), 25);
220  TEST_EQUAL(spyb0.get_total(), 25);
221  TEST_EQUAL(spyb1.get_total(), 25);
222  TEST_EQUAL(spyb3.get_total(), 25);
223 
224  static const char * const results[] = {
225  "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
226  "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
227  "|",
228  "|2:16|1:9|",
229  "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
230  "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
231  "|",
232  "|2:16|1:9|",
233  NULL
234  };
235  std::vector<Xapian::ValueCountMatchSpy *> spies;
236  spies.push_back(&spya0);
237  spies.push_back(&spya1);
238  spies.push_back(NULL);
239  spies.push_back(&spya3);
240  spies.push_back(&spyb0);
241  spies.push_back(&spyb1);
242  spies.push_back(NULL);
243  spies.push_back(&spyb3);
244  for (Xapian::valueno v = 0; results[v]; ++v) {
245  tout << "value " << v << endl;
246  Xapian::ValueCountMatchSpy * spy = spies[v];
247  string allvals_str("|");
248  if (spy != NULL) {
249  size_t allvals_size = 0;
250  for (Xapian::TermIterator i = spy->top_values_begin(100);
251  i != spy->top_values_end(100);
252  ++i, ++allvals_size) {
253  allvals_str += *i;
254  allvals_str += ':';
255  allvals_str += str(i.get_termfreq());
256  allvals_str += '|';
257  }
258  tout << allvals_str << endl;
259  TEST_STRINGS_EQUAL(allvals_str, results[v]);
260 
261  for (size_t count = 0; count < allvals_size; ++count) {
262  tout << "count " << count << endl;
263  for (Xapian::TermIterator i = spy->top_values_begin(100),
264  j = spy->top_values_begin(count);
265  i != spy->top_values_end(100) &&
266  j != spy->top_values_end(count);
267  ++i, ++j) {
268  tout << "j " << j << endl;
269  TEST_EQUAL(*i, *j);
270  TEST_EQUAL(i.get_termfreq(), j.get_termfreq());
271  }
272  }
273  }
274  }
275 }
276 
277 // Test builtin match spies
278 DEFINE_TESTCASE(matchspy5, backend)
279 {
280  Xapian::Database db(get_database("apitest_simpledata"));
281  Xapian::Enquire enquire(db);
282  enquire.set_query(Xapian::Query("this"));
283 
284  Xapian::ValueCountMatchSpy myspy1(1);
285  Xapian::ValueCountMatchSpy myspy2(1);
286 
287  enquire.add_matchspy(&myspy1);
288  enquire.add_matchspy(&myspy2);
289  Xapian::MSet mymset = enquire.get_mset(0, 100);
290  TEST_EQUAL(mymset.size(), 6);
291 
292  Xapian::TermIterator i = myspy1.values_begin();
293  TEST(i != myspy1.values_end());
294  TEST(*i == "h");
295  TEST_EQUAL(i.get_termfreq(), 5);
296  ++i;
297  TEST(i != myspy1.values_end());
298  TEST(*i == "n");
299  TEST_EQUAL(i.get_termfreq(), 1);
300  ++i;
301  TEST(i == myspy1.values_end());
302 
303  i = myspy2.values_begin();
304  TEST(i != myspy2.values_end());
305  TEST(*i == "h");
306  TEST_EQUAL(i.get_termfreq(), 5);
307  ++i;
308  TEST(i != myspy2.values_end());
309  TEST(*i == "n");
310  TEST_EQUAL(i.get_termfreq(), 1);
311  ++i;
312  TEST(i == myspy2.values_end());
313 }
314 
315 class MySpy : public Xapian::MatchSpy {
316  void operator()(const Xapian::Document &, double) {
317  }
318 };
319 
320 // Test exceptions from matchspy base class, and get_description method.
321 DEFINE_TESTCASE(matchspy6, !backend)
322 {
323  MySpy spy;
324 
329  spy.unserialise(std::string(), Xapian::Registry()));
332  spy.merge_results(std::string()));
333  TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
334 }
335 
337 DEFINE_TESTCASE(matchspy7, !backend)
338 {
340  string s = myspy.serialise_results();
341  s += 'x';
342  // This merge_results() call used to enter an infinite loop.
344 }
virtual std::string get_description() const
Return a string describing this object.
Definition: matchspy.cc:84
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
virtual std::string serialise_results() const
Serialise the results of this match spy.
Definition: matchspy.cc:74
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
virtual void merge_results(const std::string &serialised)
Unserialise some results, and merge them into this matchspy.
Definition: matchspy.cc:79
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:869
TermIterator values_end() const
End iterator corresponding to values_begin()
Definition: matchspy.h:255
Abstract base class for match spies.
Definition: matchspy.h:49
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
size_t get_total() const
Return the total number of documents tallied.
Definition: matchspy.h:241
void operator()(const Xapian::Document &doc, double)
Register a document with the match spy.
Definition: api_matchspy.cc:50
a generic test suite engine
virtual std::string serialise_results() const
Serialise the results of this match spy.
Definition: matchspy.cc:360
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Convert types to std::string.
static string values_to_repr(const Xapian::ValueCountMatchSpy &spy)
Definition: api_matchspy.cc:99
DEFINE_TESTCASE(matchspy1, backend &&!remote)
Definition: api_matchspy.cc:59
test functionality of the Xapian API
Class for iterating over a list of terms.
Definition: termiterator.h:41
Xapian::Weight subclass implementing Coordinate Matching.
Definition: weight.h:1509
TermIterator top_values_begin(size_t maxvalues) const
Get an iterator over the most frequent values seen in the slot.
Definition: matchspy.cc:316
Base class for backend handling in test harness.
This class provides read/write access to a database.
Definition: database.h:785
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:351
Registry for user subclasses.
Definition: registry.h:47
Public interfaces for the Xapian library.
virtual std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: matchspy.cc:64
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string get_dbtype()
Definition: apitest.cc:42
string str(int value)
Convert int to std::string.
Definition: str.cc:90
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:607
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:612
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
void operator()(const Xapian::Document &, double)
Register a document with the match spy.
TermIterator top_values_end(size_t) const
End iterator corresponding to top_values_begin()
Definition: matchspy.h:272
TermIterator values_begin() const
Get an iterator over the values seen in the slot.
Definition: matchspy.cc:309
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
static void make_matchspy2_db(Xapian::WritableDatabase &db, const string &)
Class for counting the frequencies of values in the matching documents.
Definition: matchspy.h:205
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
void add_matchspy(MatchSpy *spy)
Add a matchspy.
Definition: omenquire.cc:807
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
Definition: apitest.cc:147
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
Indicates a problem communicating with a remote database.
Definition: error.h:803
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:287
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
virtual MatchSpy * unserialise(const std::string &serialised, const Registry &context) const
Unserialise parameters.
Definition: matchspy.cc:69
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
virtual void merge_results(const std::string &serialised)
Unserialise some results, and merge them into this matchspy.
Definition: matchspy.cc:376
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
std::vector< std::string > seen
Definition: api_matchspy.cc:48
A handle representing a document in a Xapian database.
Definition: document.h:61
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
virtual MatchSpy * clone() const
Clone the match spy.
Definition: matchspy.cc:54
virtual std::string name() const
Return the name of this match spy.
Definition: matchspy.cc:59