xapian-core  1.4.22
api_matchspy.cc
Go to the documentation of this file.
1 
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2012,2015,2019 Olly Betts
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_matchspy.h"
27 
28 #include <xapian.h>
29 
30 #include <vector>
31 
32 #include "backendmanager.h"
33 #include "str.h"
34 #include "testsuite.h"
35 #include "testutils.h"
36 #include "apitest.h"
37 
38 using namespace std;
39 
40 // #######################################################################
41 // # Tests start here
42 
44  public:
45  // Vector which will be filled with all the document contents seen.
46  std::vector<std::string> seen;
47 
48  void operator()(const Xapian::Document &doc, double) {
49  // Note that this is not recommended usage of get_data() - you
50  // generally shouldn't call get_data() from inside a MatchSpy, because
51  // it is (likely to be) a slow operation resulting in considerable IO.
52  seen.push_back(doc.get_data());
53  }
54 };
55 
56 // Basic test of a matchspy.
57 DEFINE_TESTCASE(matchspy1, backend && !remote) {
58  Xapian::Database db(get_database("apitest_simpledata"));
59  Xapian::Enquire enquire(db);
60  enquire.set_query(Xapian::Query("this"));
61 
62  SimpleMatchSpy myspy;
63 
64  Xapian::MSet nospymset = enquire.get_mset(0, 100);
65  enquire.add_matchspy(&myspy);
66  Xapian::MSet spymset = enquire.get_mset(0, 100);
67 
68  // Check that the match estimates aren't affected by the matchspy.
69  TEST_EQUAL(nospymset, spymset);
70 
71  vector<bool> docid_checked(db.get_lastdocid());
72 
73  // Check that we get the expected number of matches, and that the stored
74  // document contents are right.
75  Xapian::MSetIterator i = spymset.begin();
76  TEST(i != spymset.end());
77  TEST_EQUAL(spymset.size(), 6);
78  TEST_EQUAL(myspy.seen.size(), spymset.size());
79 
80  std::sort(myspy.seen.begin(), myspy.seen.end());
81 
82  std::vector<std::string> seen2;
83  for ( ; i != spymset.end(); ++i) {
84  const Xapian::Document doc(i.get_document());
85  seen2.push_back(doc.get_data());
86  }
87  std::sort(seen2.begin(), seen2.end());
88 
89  TEST_EQUAL(myspy.seen.size(), seen2.size());
90  std::vector<std::string>::const_iterator j = myspy.seen.begin();
91  std::vector<std::string>::const_iterator j2 = seen2.begin();
92  for (; j != myspy.seen.end(); ++j, ++j2) {
93  TEST_EQUAL(*j, *j2);
94  }
95 }
96 
97 static string values_to_repr(const Xapian::ValueCountMatchSpy & spy) {
98  string resultrepr("|");
99  for (Xapian::TermIterator i = spy.values_begin();
100  i != spy.values_end();
101  ++i) {
102  resultrepr += *i;
103  resultrepr += ':';
104  resultrepr += str(i.get_termfreq());
105  resultrepr += '|';
106  }
107  return resultrepr;
108 }
109 
110 static void
112 {
113  for (int c = 1; c <= 25; ++c) {
114  Xapian::Document doc;
115  doc.set_data("Document " + str(c));
116  int factors = 0;
117  for (int factor = 1; factor <= c; ++factor) {
118  doc.add_term("all");
119  if (c % factor == 0) {
120  doc.add_term("XFACT" + str(factor));
121  ++factors;
122  }
123  }
124 
125  // Number of factors.
126  doc.add_value(0, str(factors));
127  // Units digits.
128  doc.add_value(1, str(c % 10));
129  // Constant.
130  doc.add_value(2, "fish");
131  // Number of digits.
132  doc.add_value(3, str(str(c).size()));
133 
134  db.add_document(doc);
135  }
136 }
137 
138 DEFINE_TESTCASE(matchspy2, generated)
139 {
141 
145 
146  Xapian::Enquire enq(db);
147 
148  enq.set_query(Xapian::Query("all"));
149  if (startswith(get_dbtype(), "multi")) {
150  // Without this, we short-cut on the second shard because we don't get
151  // the documents in ascending weight order.
153  }
154 
155  enq.add_matchspy(&spy0);
156  enq.add_matchspy(&spy1);
157  enq.add_matchspy(&spy3);
158  Xapian::MSet mset = enq.get_mset(0, 10);
159 
160  TEST_EQUAL(spy0.get_total(), 25);
161  TEST_EQUAL(spy1.get_total(), 25);
162  TEST_EQUAL(spy3.get_total(), 25);
163 
164  static const char * const results[] = {
165  "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
166  "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
167  "|1:9|2:16|",
168  };
169  TEST_STRINGS_EQUAL(values_to_repr(spy0), results[0]);
170  TEST_STRINGS_EQUAL(values_to_repr(spy1), results[1]);
171  TEST_STRINGS_EQUAL(values_to_repr(spy3), results[2]);
172 }
173 
174 DEFINE_TESTCASE(matchspy4, generated)
175 {
176  XFAIL_FOR_BACKEND("multi_remote",
177  "Matchspy counts hits on remote and locally");
178  XFAIL_FOR_BACKEND("multi_glass_remote",
179  "Matchspy counts hits on remote and locally");
180 
182 
183  // We're going to run the match twice - once sorted by relevance, and once
184  // sorted by a value. This is a regression test - the matcher used to fail
185  // to show some documents to the spy when sorting by non-pure-relevance.
192 
193  Xapian::Enquire enqa(db);
194  Xapian::Enquire enqb(db);
195 
196  enqa.set_query(Xapian::Query("all"));
197  if (startswith(get_dbtype(), "multi")) {
198  // Without this, we short-cut on the second shard because we don't get
199  // the documents in ascending weight order.
201  }
202  enqb.set_query(Xapian::Query("all"));
203 
204  enqa.add_matchspy(&spya0);
205  enqa.add_matchspy(&spya1);
206  enqa.add_matchspy(&spya3);
207  enqb.add_matchspy(&spyb0);
208  enqb.add_matchspy(&spyb1);
209  enqb.add_matchspy(&spyb3);
210 
211  Xapian::MSet mseta = enqa.get_mset(0, 10);
212  enqb.set_sort_by_value(0, false);
213  Xapian::MSet msetb = enqb.get_mset(0, 10, 100);
214 
215  TEST_EQUAL(spya0.get_total(), 25);
216  TEST_EQUAL(spya1.get_total(), 25);
217  TEST_EQUAL(spya3.get_total(), 25);
218  TEST_EQUAL(spyb0.get_total(), 25);
219  TEST_EQUAL(spyb1.get_total(), 25);
220  TEST_EQUAL(spyb3.get_total(), 25);
221 
222  static const char * const results[] = {
223  "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
224  "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
225  "|",
226  "|2:16|1:9|",
227  "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
228  "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
229  "|",
230  "|2:16|1:9|",
231  NULL
232  };
233  std::vector<Xapian::ValueCountMatchSpy *> spies;
234  spies.push_back(&spya0);
235  spies.push_back(&spya1);
236  spies.push_back(NULL);
237  spies.push_back(&spya3);
238  spies.push_back(&spyb0);
239  spies.push_back(&spyb1);
240  spies.push_back(NULL);
241  spies.push_back(&spyb3);
242  for (Xapian::valueno v = 0; results[v]; ++v) {
243  tout << "value " << v << endl;
244  Xapian::ValueCountMatchSpy * spy = spies[v];
245  string allvals_str("|");
246  if (spy != NULL) {
247  size_t allvals_size = 0;
248  for (Xapian::TermIterator i = spy->top_values_begin(100);
249  i != spy->top_values_end(100);
250  ++i, ++allvals_size) {
251  allvals_str += *i;
252  allvals_str += ':';
253  allvals_str += str(i.get_termfreq());
254  allvals_str += '|';
255  }
256  tout << allvals_str << endl;
257  TEST_STRINGS_EQUAL(allvals_str, results[v]);
258 
259  for (size_t count = 0; count < allvals_size; ++count) {
260  tout << "count " << count << endl;
261  for (Xapian::TermIterator i = spy->top_values_begin(100),
262  j = spy->top_values_begin(count);
263  i != spy->top_values_end(100) &&
264  j != spy->top_values_end(count);
265  ++i, ++j) {
266  tout << "j " << j << endl;
267  TEST_EQUAL(*i, *j);
268  TEST_EQUAL(i.get_termfreq(), j.get_termfreq());
269  }
270  }
271  }
272  }
273 }
274 
275 // Test builtin match spies
276 DEFINE_TESTCASE(matchspy5, backend)
277 {
278  Xapian::Database db(get_database("apitest_simpledata"));
279  Xapian::Enquire enquire(db);
280  enquire.set_query(Xapian::Query("this"));
281 
282  Xapian::ValueCountMatchSpy myspy1(1);
283  Xapian::ValueCountMatchSpy myspy2(1);
284 
285  enquire.add_matchspy(&myspy1);
286  enquire.add_matchspy(&myspy2);
287  Xapian::MSet mymset = enquire.get_mset(0, 100);
288  TEST_EQUAL(mymset.size(), 6);
289 
290  Xapian::TermIterator i = myspy1.values_begin();
291  TEST(i != myspy1.values_end());
292  TEST(*i == "h");
293  TEST_EQUAL(i.get_termfreq(), 5);
294  ++i;
295  TEST(i != myspy1.values_end());
296  TEST(*i == "n");
297  TEST_EQUAL(i.get_termfreq(), 1);
298  ++i;
299  TEST(i == myspy1.values_end());
300 
301  i = myspy2.values_begin();
302  TEST(i != myspy2.values_end());
303  TEST(*i == "h");
304  TEST_EQUAL(i.get_termfreq(), 5);
305  ++i;
306  TEST(i != myspy2.values_end());
307  TEST(*i == "n");
308  TEST_EQUAL(i.get_termfreq(), 1);
309  ++i;
310  TEST(i == myspy2.values_end());
311 }
312 
313 class MySpy : public Xapian::MatchSpy {
314  void operator()(const Xapian::Document &, double) {
315  }
316 };
317 
318 // Test exceptions from matchspy base class, and get_description method.
319 DEFINE_TESTCASE(matchspy6, !backend)
320 {
321  MySpy spy;
322 
327  spy.unserialise(std::string(), Xapian::Registry()));
330  spy.merge_results(std::string()));
331  TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
332 }
333 
335 DEFINE_TESTCASE(matchspy7, !backend)
336 {
338  string s = myspy.serialise_results();
339  s += 'x';
340  // This merge_results() call used to enter an infinite loop.
342 }
virtual std::string get_description() const
Return a string describing this object.
Definition: matchspy.cc:81
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
virtual std::string serialise_results() const
Serialise the results of this match spy.
Definition: matchspy.cc:71
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
virtual void merge_results(const std::string &serialised)
Unserialise some results, and merge them into this matchspy.
Definition: matchspy.cc:76
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:869
TermIterator values_end() const
End iterator corresponding to values_begin()
Definition: matchspy.h:255
Abstract base class for match spies.
Definition: matchspy.h:49
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
size_t get_total() const
Return the total number of documents tallied.
Definition: matchspy.h:241
void operator()(const Xapian::Document &doc, double)
Register a document with the match spy.
Definition: api_matchspy.cc:48
a generic test suite engine
virtual std::string serialise_results() const
Serialise the results of this match spy.
Definition: matchspy.cc:357
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
Convert types to std::string.
static string values_to_repr(const Xapian::ValueCountMatchSpy &spy)
Definition: api_matchspy.cc:97
DEFINE_TESTCASE(matchspy1, backend &&!remote)
Definition: api_matchspy.cc:57
test functionality of the Xapian API
Class for iterating over a list of terms.
Definition: termiterator.h:41
Xapian::Weight subclass implementing Coordinate Matching.
Definition: weight.h:1509
TermIterator top_values_begin(size_t maxvalues) const
Get an iterator over the most frequent values seen in the slot.
Definition: matchspy.cc:313
Base class for backend handling in test harness.
This class provides read/write access to a database.
Definition: database.h:785
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:351
Registry for user subclasses.
Definition: registry.h:47
Public interfaces for the Xapian library.
virtual std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: matchspy.cc:61
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string get_dbtype()
Definition: apitest.cc:42
string str(int value)
Convert int to std::string.
Definition: str.cc:90
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:607
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:612
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
void operator()(const Xapian::Document &, double)
Register a document with the match spy.
TermIterator top_values_end(size_t) const
End iterator corresponding to top_values_begin()
Definition: matchspy.h:272
TermIterator values_begin() const
Get an iterator over the values seen in the slot.
Definition: matchspy.cc:306
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
static void make_matchspy2_db(Xapian::WritableDatabase &db, const string &)
Class for counting the frequencies of values in the matching documents.
Definition: matchspy.h:205
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
void add_matchspy(MatchSpy *spy)
Add a matchspy.
Definition: omenquire.cc:807
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
Definition: apitest.cc:147
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
Indicates a problem communicating with a remote database.
Definition: error.h:803
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian-specific test helper functions and macros.
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:287
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
virtual MatchSpy * unserialise(const std::string &serialised, const Registry &context) const
Unserialise parameters.
Definition: matchspy.cc:66
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
virtual void merge_results(const std::string &serialised)
Unserialise some results, and merge them into this matchspy.
Definition: matchspy.cc:373
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
std::vector< std::string > seen
Definition: api_matchspy.cc:46
A handle representing a document in a Xapian database.
Definition: document.h:61
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
virtual MatchSpy * clone() const
Clone the match spy.
Definition: matchspy.cc:51
virtual std::string name() const
Return the name of this match spy.
Definition: matchspy.cc:56