xapian-core  1.4.26
api_serialise.cc
Go to the documentation of this file.
1 
4 /* Copyright 2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2012,2013 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "api_serialise.h"
25 
26 #include <xapian.h>
27 
28 #include <exception>
29 #include <stdexcept>
30 
31 #include "apitest.h"
32 #include "testutils.h"
33 
34 using namespace std;
35 
36 // Test for serialising a document
37 DEFINE_TESTCASE(serialise_document1, !backend) {
38  Xapian::Document doc;
39 
40  // Test serialising and unserialising an empty document.
42  TEST_EQUAL(doc1.termlist_count(), 0);
43  TEST_EQUAL(doc1.termlist_begin(), doc1.termlist_end());
44  TEST_EQUAL(doc1.values_count(), 0);
45  TEST_EQUAL(doc1.values_begin(), doc1.values_end());
46  TEST_EQUAL(doc1.get_data(), "");
47 
48  // Test serialising a document with things in.
49  doc.add_term("foo", 2);
50  doc.add_posting("foo", 10);
51  doc.add_value(1, "bar");
52  doc.set_data("baz");
53 
55 
57  TEST_EQUAL(doc.termlist_count(), 1);
61 
62  i = doc.termlist_begin();
63  TEST_NOT_EQUAL(i, doc.termlist_end());
64  TEST_EQUAL(i.get_wdf(), 3);
65  TEST_EQUAL(*i, "foo");
66  TEST_EQUAL(i.positionlist_count(), 1);
67  j = i.positionlist_begin();
68  TEST_NOT_EQUAL(j, i.positionlist_end());
69  TEST_EQUAL(*j, 10);
70  ++j;
71  TEST_EQUAL(j, i.positionlist_end());
72  ++i;
73  TEST_EQUAL(i, doc.termlist_end());
74 
75  TEST_EQUAL(doc.values_count(), 1);
76  k = doc.values_begin();
77  TEST_NOT_EQUAL(k, doc.values_end());
78  TEST_EQUAL(k.get_valueno(), 1);
79  TEST_EQUAL(*k, "bar");
80  ++k;
81  TEST_EQUAL(k, doc.values_end());
82 
83  TEST_EQUAL(doc.get_data(), "baz");
84 
85  i = doc2.termlist_begin();
86  TEST_NOT_EQUAL(i, doc2.termlist_end());
87  TEST_EQUAL(i.get_wdf(), 3);
88  TEST_EQUAL(*i, "foo");
89  TEST_EQUAL(i.positionlist_count(), 1);
90  j = i.positionlist_begin();
91  TEST_NOT_EQUAL(j, i.positionlist_end());
92  TEST_EQUAL(*j, 10);
93  ++j;
94  TEST_EQUAL(j, i.positionlist_end());
95  ++i;
96  TEST_EQUAL(i, doc2.termlist_end());
97 
98  TEST_EQUAL(doc2.values_count(), 1);
99  k = doc2.values_begin();
100  TEST_NOT_EQUAL(k, doc2.values_end());
101  TEST_EQUAL(k.get_valueno(), 1);
102  TEST_EQUAL(*k, "bar");
103  ++k;
104  TEST_EQUAL(k, doc2.values_end());
105 
106  TEST_EQUAL(doc2.get_data(), "baz");
107 }
108 
109 // Test for serialising a document obtained from a database.
110 DEFINE_TESTCASE(serialise_document2, backend) {
111  Xapian::Database db = get_database("serialise_document2",
112  [](Xapian::WritableDatabase& wdb,
113  const string&) {
114  Xapian::Document doc;
115  doc.add_term("foo", 2);
116  doc.add_posting("foo", 10);
117  doc.add_value(1, "bar");
118  doc.set_data("baz");
119  wdb.add_document(doc);
120  });
121 
122  Xapian::Document doc = db.get_document(1);
123 
124  Xapian::Document doc2 = Xapian::Document::unserialise(doc.serialise());
125 
126  TEST_EQUAL(doc.termlist_count(), doc2.termlist_count());
127  TEST_EQUAL(doc.termlist_count(), 1);
131 
132  i = doc.termlist_begin();
133  TEST_NOT_EQUAL(i, doc.termlist_end());
134  TEST_EQUAL(i.get_wdf(), 3);
135  TEST_EQUAL(*i, "foo");
136  TEST_EQUAL(i.positionlist_count(), 1);
137  j = i.positionlist_begin();
138  TEST_NOT_EQUAL(j, i.positionlist_end());
139  TEST_EQUAL(*j, 10);
140  ++j;
141  TEST_EQUAL(j, i.positionlist_end());
142  ++i;
143  TEST_EQUAL(i, doc.termlist_end());
144 
145  TEST_EQUAL(doc.values_count(), 1);
146  k = doc.values_begin();
147  TEST_NOT_EQUAL(k, doc.values_end());
148  TEST_EQUAL(k.get_valueno(), 1);
149  TEST_EQUAL(*k, "bar");
150  ++k;
151  TEST_EQUAL(k, doc.values_end());
152 
153  TEST_EQUAL(doc.get_data(), "baz");
154 
155  i = doc2.termlist_begin();
156  TEST_NOT_EQUAL(i, doc2.termlist_end());
157  TEST_EQUAL(i.get_wdf(), 3);
158  TEST_EQUAL(*i, "foo");
159  TEST_EQUAL(i.positionlist_count(), 1);
160  j = i.positionlist_begin();
161  TEST_NOT_EQUAL(j, i.positionlist_end());
162  TEST_EQUAL(*j, 10);
163  ++j;
164  TEST_EQUAL(j, i.positionlist_end());
165  ++i;
166  TEST_EQUAL(i, doc2.termlist_end());
167 
168  TEST_EQUAL(doc2.values_count(), 1);
169  k = doc2.values_begin();
170  TEST_NOT_EQUAL(k, doc2.values_end());
171  TEST_EQUAL(k.get_valueno(), 1);
172  TEST_EQUAL(*k, "bar");
173  ++k;
174  TEST_EQUAL(k, doc2.values_end());
175 
176  TEST_EQUAL(doc2.get_data(), "baz");
177 }
178 
179 // Test for serialising a query
180 DEFINE_TESTCASE(serialise_query1, !backend) {
181  Xapian::Query q;
184  TEST_EQUAL(q.get_description(), "Query()");
185 
186  q = Xapian::Query("hello");
187  q2 = Xapian::Query::unserialise(q.serialise());
188  TEST_EQUAL(q.get_description(), q2.get_description());
189  TEST_EQUAL(q.get_description(), "Query(hello)");
190 
191  q = Xapian::Query("hello", 1, 1);
192  q2 = Xapian::Query::unserialise(q.serialise());
193  // Regression test for fix in Xapian 1.0.0.
194  TEST_EQUAL(q.get_description(), q2.get_description());
195  TEST_EQUAL(q.get_description(), "Query(hello@1)");
196 
197  q = Xapian::Query(q.OP_OR, Xapian::Query("hello"), Xapian::Query("world"));
198  q2 = Xapian::Query::unserialise(q.serialise());
199  TEST_EQUAL(q.get_description(), q2.get_description());
200  TEST_EQUAL(q.get_description(), "Query((hello OR world))");
201 
202  q = Xapian::Query(q.OP_OR,
203  Xapian::Query("hello", 1, 1),
204  Xapian::Query("world", 1, 1));
205  q2 = Xapian::Query::unserialise(q.serialise());
206  TEST_EQUAL(q.get_description(), q2.get_description());
207  TEST_EQUAL(q.get_description(), "Query((hello@1 OR world@1))");
208 
209  static const char * const phrase[] = { "shaken", "not", "stirred" };
210  q = Xapian::Query(q.OP_PHRASE, phrase, phrase + 3);
211  q = Xapian::Query(q.OP_OR, Xapian::Query("007"), q);
212  q = Xapian::Query(q.OP_SCALE_WEIGHT, q, 3.14);
213  q2 = Xapian::Query::unserialise(q.serialise());
214  TEST_EQUAL(q.get_description(), q2.get_description());
215 }
216 
217 // Test for serialising a query which contains a PostingSource.
218 DEFINE_TESTCASE(serialise_query2, !backend) {
220  Xapian::Query q(&s1);
223  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueWeightPostingSource(slot=10)))");
224 
226  s2.set_default_weight(5.0);
227  q = Xapian::Query(&s2);
230  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueMapPostingSource(slot=11)))");
231 
233  q = Xapian::Query(&s3);
236  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::FixedWeightPostingSource(wt=5.5)))");
237 }
238 
239 // Test for unserialising a query using the default registry.
240 DEFINE_TESTCASE(serialise_query3, !backend) {
242  Xapian::Query q(&s1);
243  Xapian::Registry reg;
246  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueWeightPostingSource(slot=10)))");
247 
249  s2.set_default_weight(5.0);
250  q = Xapian::Query(&s2);
251  q2 = Xapian::Query::unserialise(q.serialise(), reg);
253  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueMapPostingSource(slot=11)))");
254 
256  q = Xapian::Query(&s3);
257  q2 = Xapian::Query::unserialise(q.serialise(), reg);
259  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::FixedWeightPostingSource(wt=5.5)))");
260 }
261 
263  std::string desc;
264  public:
265  MyPostingSource2(const std::string & desc_)
266  : Xapian::ValuePostingSource(0), desc(desc_)
267  {
268  }
269 
270  MyPostingSource2* clone() const override {
271  return new MyPostingSource2(desc);
272  }
273 
274  std::string name() const override {
275  return "MyPostingSource2";
276  }
277 
278  std::string serialise() const override {
279  return desc;
280  }
281 
282  MyPostingSource2* unserialise(const std::string& s) const override {
283  return new MyPostingSource2(s);
284  }
285 
286  double get_weight() const override { return 1.0; }
287 
288  std::string get_description() const override {
289  return "MyPostingSource2(" + desc + ")";
290  }
291 };
292 
293 // Test for unserialising a query which contains a custom PostingSource.
294 DEFINE_TESTCASE(serialise_query4, !backend) {
295  MyPostingSource2 s1("foo");
296  Xapian::Query q(&s1);
297  TEST_EQUAL(q.get_description(), "Query(PostingSource(MyPostingSource2(foo)))");
298  std::string serialised = q.serialise();
299 
301  Xapian::Registry reg;
303 
304  reg.register_posting_source(s1);
305  Xapian::Query q2 = Xapian::Query::unserialise(serialised, reg);
307 }
308 
310 DEFINE_TESTCASE(double_register_leak, !backend) {
311  MyPostingSource2 s1("foo");
313 
314  Xapian::Registry reg;
315  reg.register_posting_source(s1);
316  reg.register_posting_source(s1);
317  reg.register_posting_source(s1);
318 
322 }
323 
325  public:
326  typedef enum { NONE, CLONE } failmode;
327 
328  failmode fail;
329 
330  ExceptionalPostingSource(failmode fail_) : fail(fail_) { }
331 
332  string name() const override {
333  return "ExceptionalPostingSource";
334  }
335 
336  PostingSource* clone() const override {
337  if (fail == CLONE)
338  throw bad_alloc();
339  return new ExceptionalPostingSource(fail);
340  }
341 
342  void init(const Xapian::Database&) override { }
343 
344  Xapian::doccount get_termfreq_min() const override { return 0; }
345  Xapian::doccount get_termfreq_est() const override { return 1; }
346  Xapian::doccount get_termfreq_max() const override { return 2; }
347 
348  void next(double) override { }
349 
350  void skip_to(Xapian::docid, double) override { }
351 
352  bool at_end() const override { return true; }
353  Xapian::docid get_docid() const override { return 0; }
354 };
355 
357 DEFINE_TESTCASE(registry1, !backend) {
358  // Test that a replacement object throwing bad_alloc is handled.
359  {
360  Xapian::Registry reg;
361 
365  reg.register_posting_source(eps);
366  try {
368  reg.register_posting_source(eps_clone);
369  FAIL_TEST("Expected bad_alloc exception to be thrown");
370  } catch (const bad_alloc &) {
371  }
372 
373  // Either the old entry should be removed, or it should work.
374  const Xapian::PostingSource * p;
375  p = reg.get_posting_source("ExceptionalPostingSource");
376  if (p) {
377  TEST_EQUAL(p->name(), "ExceptionalPostingSource");
378  }
379  }
380 }
381 
383  public:
384  typedef enum { NONE, CLONE } failmode;
385 
386  failmode fail;
387 
388  ExceptionalWeight(failmode fail_) : fail(fail_) { }
389 
390  string name() const override {
391  return "ExceptionalWeight";
392  }
393 
394  Weight* clone() const override {
395  if (fail == CLONE)
396  throw bad_alloc();
397  return new ExceptionalWeight(fail);
398  }
399 
400  void init(double) override { }
401 
404  Xapian::termcount) const override {
405  return 0;
406  }
407  double get_maxpart() const override { return 0; }
408 
410  Xapian::termcount) const override {
411  return 0;
412  }
413  double get_maxextra() const override { return 0; }
414 };
415 
417 DEFINE_TESTCASE(registry2, !backend) {
418  // Test that a replacement object throwing bad_alloc is handled.
419  {
420  Xapian::Registry reg;
421 
423  reg.register_weighting_scheme(ewt);
424  try {
426  reg.register_weighting_scheme(ewt_clone);
427  FAIL_TEST("Expected bad_alloc exception to be thrown");
428  } catch (const bad_alloc &) {
429  }
430 
431  // Either the old entry should be removed, or it should work.
432  const Xapian::Weight * p;
433  p = reg.get_weighting_scheme("ExceptionalWeight");
434  if (p) {
435  TEST_EQUAL(p->name(), "ExceptionalWeight");
436  }
437  }
438 }
439 
441  public:
442  typedef enum { NONE, CLONE } failmode;
443 
444  failmode fail;
445 
446  ExceptionalMatchSpy(failmode fail_) : fail(fail_) { }
447 
448  string name() const override {
449  return "ExceptionalMatchSpy";
450  }
451 
452  MatchSpy* clone() const override {
453  if (fail == CLONE)
454  throw bad_alloc();
455  return new ExceptionalMatchSpy(fail);
456  }
457 
458  void operator()(const Xapian::Document&, double) override {
459  }
460 };
461 
463 DEFINE_TESTCASE(registry3, !backend) {
464  // Test that a replacement object throwing bad_alloc is handled.
465  {
466  Xapian::Registry reg;
467 
469  reg.register_match_spy(ems);
470  try {
472  reg.register_match_spy(ems_clone);
473  FAIL_TEST("Expected bad_alloc exception to be thrown");
474  } catch (const bad_alloc &) {
475  }
476 
477  // Either the old entry should be removed, or it should work.
478  const Xapian::MatchSpy * p;
479  p = reg.get_match_spy("ExceptionalMatchSpy");
480  if (p) {
481  TEST_EQUAL(p->name(), "ExceptionalMatchSpy");
482  }
483  }
484 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void register_weighting_scheme(const Xapian::Weight &wt)
Register a weighting scheme.
Definition: registry.cc:265
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
static const Query unserialise(const std::string &serialised, const Registry &reg=Registry())
Unserialise a string and return a Query object.
Definition: query.cc:202
void init(double) override
Allow the subclass to perform any initialisation it needs to.
double get_weight() const override
Return the weight contribution for the current document.
MatchSpy * clone() const override
Clone the match spy.
std::string name() const override
Name of the posting source class.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount termlist_count() const
The length of the termlist - i.e.
Definition: omdocument.cc:191
virtual std::string name() const
Return the name of this weighting scheme.
Definition: weight.cc:135
Abstract base class for match spies.
Definition: matchspy.h:49
A posting source which looks up weights in a map using values as the key.
Xapian::docid get_docid() const override
Return the current docid.
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
virtual PostingSource * unserialise(const std::string &serialised) const
Create object given string serialisation returned by serialise().
Class for iterating over document values.
Definition: valueiterator.h:40
STL namespace.
const Xapian::PostingSource * get_posting_source(const std::string &name) const
Get a posting source given a name.
Definition: registry.cc:286
ExceptionalWeight(failmode fail_)
MyPostingSource2(const std::string &desc_)
string name() const override
Return the name of this weighting scheme.
ExceptionalPostingSource(failmode fail_)
TermIterator termlist_end() const
Equivalent end iterator for termlist_begin().
Definition: document.h:270
std::string serialise() const
Serialise document into a string.
Definition: omdocument.cc:227
Xapian::doccount get_termfreq_est() const override
An estimate of the number of documents this object can return.
test functionality of the Xapian API
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::doccount get_termfreq_max() const override
An upper bound on the number of documents this object can return.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
const Xapian::MatchSpy * get_match_spy(const std::string &name) const
Get a match spy given a name.
Definition: registry.cc:300
std::string desc
DEFINE_TESTCASE(serialise_document1, !backend)
This class provides read/write access to a database.
Definition: database.h:789
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
void register_match_spy(const Xapian::MatchSpy &spy)
Register a user-defined match spy class.
Definition: registry.cc:293
void skip_to(Xapian::docid, double) override
Advance to the specified docid.
Registry for user subclasses.
Definition: registry.h:47
string name() const override
Name of the posting source class.
Public interfaces for the Xapian library.
MyPostingSource2 * clone() const override
Clone the posting source.
double get_sumextra(Xapian::termcount, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
A posting source which returns a fixed weight for all documents.
const Xapian::Weight * get_weighting_scheme(const std::string &name) const
Get the weighting scheme given a name.
Definition: registry.cc:272
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
Xapian::termcount values_count() const
Count the values in this document.
Definition: omdocument.cc:204
std::string serialise() const
Serialise this object into a string.
Definition: query.cc:193
Class for iterating over term positions.
void next(double) override
Advance the current position to the next matching document.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
bool at_end() const override
Return true if the current position is past the last entry in this list.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:281
std::string serialise() const override
Serialise object parameters into a string.
A posting source which generates weights from a value slot.
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const override
Calculate the weight contribution for this object&#39;s term to a document.
std::string get_description() const override
Return a string describing this object.
void add_posting(const std::string &tname, Xapian::termpos tpos, Xapian::termcount wdfinc=1)
Add an occurrence of a term at a particular position.
Definition: omdocument.cc:128
A posting source which reads weights from a value slot.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
static Document unserialise(const std::string &serialised)
Unserialise a document from a string produced by serialise().
Definition: omdocument.cc:234
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
void register_posting_source(const Xapian::PostingSource &source)
Register a user-defined posting source class.
Definition: registry.cc:279
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
void operator()(const Xapian::Document &, double) override
Register a document with the match spy.
Xapian::doccount get_termfreq_min() const override
A lower bound on the number of documents this object can return.
MyPostingSource2 * unserialise(const std::string &s) const override
Create object given string serialisation returned by serialise().
Weight * clone() const override
Clone this object.
Xapian-specific test helper functions and macros.
virtual std::string serialise() const
Serialise object parameters into a string.
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
void init(const Xapian::Database &) override
Set this PostingSource to the start of the list of postings.
PostingSource * clone() const override
Clone the posting source.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
ExceptionalMatchSpy(failmode fail_)
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
TermIterator termlist_begin() const
Start iterating the terms in this document.
Definition: omdocument.cc:197
string name() const override
Return the name of this match spy.
virtual std::string name() const
Name of the posting source class.
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:546
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
Abstract base class for weighting schemes.
Definition: weight.h:35
virtual std::string name() const
Return the name of this match spy.
Definition: matchspy.cc:56