xapian-core  1.4.25
api_serialise.cc
Go to the documentation of this file.
1 
4 /* Copyright 2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2012,2013 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "api_serialise.h"
25 
26 #include <xapian.h>
27 
28 #include <exception>
29 #include <stdexcept>
30 
31 #include "apitest.h"
32 #include "testutils.h"
33 
34 using namespace std;
35 
36 // Test for serialising a document
37 DEFINE_TESTCASE(serialise_document1, !backend) {
38  Xapian::Document doc;
39 
40  // Test serialising and unserialising an empty document.
42  TEST_EQUAL(doc1.termlist_count(), 0);
43  TEST_EQUAL(doc1.termlist_begin(), doc1.termlist_end());
44  TEST_EQUAL(doc1.values_count(), 0);
45  TEST_EQUAL(doc1.values_begin(), doc1.values_end());
46  TEST_EQUAL(doc1.get_data(), "");
47 
48  // Test serialising a document with things in.
49  doc.add_term("foo", 2);
50  doc.add_posting("foo", 10);
51  doc.add_value(1, "bar");
52  doc.set_data("baz");
53 
55 
57  TEST_EQUAL(doc.termlist_count(), 1);
61 
62  i = doc.termlist_begin();
63  TEST_NOT_EQUAL(i, doc.termlist_end());
64  TEST_EQUAL(i.get_wdf(), 3);
65  TEST_EQUAL(*i, "foo");
66  TEST_EQUAL(i.positionlist_count(), 1);
67  j = i.positionlist_begin();
68  TEST_NOT_EQUAL(j, i.positionlist_end());
69  TEST_EQUAL(*j, 10);
70  ++j;
71  TEST_EQUAL(j, i.positionlist_end());
72  ++i;
73  TEST_EQUAL(i, doc.termlist_end());
74 
75  TEST_EQUAL(doc.values_count(), 1);
76  k = doc.values_begin();
77  TEST_NOT_EQUAL(k, doc.values_end());
78  TEST_EQUAL(k.get_valueno(), 1);
79  TEST_EQUAL(*k, "bar");
80  ++k;
81  TEST_EQUAL(k, doc.values_end());
82 
83  TEST_EQUAL(doc.get_data(), "baz");
84 
85  i = doc2.termlist_begin();
86  TEST_NOT_EQUAL(i, doc2.termlist_end());
87  TEST_EQUAL(i.get_wdf(), 3);
88  TEST_EQUAL(*i, "foo");
89  TEST_EQUAL(i.positionlist_count(), 1);
90  j = i.positionlist_begin();
91  TEST_NOT_EQUAL(j, i.positionlist_end());
92  TEST_EQUAL(*j, 10);
93  ++j;
94  TEST_EQUAL(j, i.positionlist_end());
95  ++i;
96  TEST_EQUAL(i, doc2.termlist_end());
97 
98  TEST_EQUAL(doc2.values_count(), 1);
99  k = doc2.values_begin();
100  TEST_NOT_EQUAL(k, doc2.values_end());
101  TEST_EQUAL(k.get_valueno(), 1);
102  TEST_EQUAL(*k, "bar");
103  ++k;
104  TEST_EQUAL(k, doc2.values_end());
105 
106  TEST_EQUAL(doc2.get_data(), "baz");
107 }
108 
109 // Test for serialising a document obtained from a database.
110 DEFINE_TESTCASE(serialise_document2, backend) {
111  Xapian::Database db = get_database("serialise_document2",
112  [](Xapian::WritableDatabase& wdb,
113  const string&) {
114  Xapian::Document doc;
115  doc.add_term("foo", 2);
116  doc.add_posting("foo", 10);
117  doc.add_value(1, "bar");
118  doc.set_data("baz");
119  wdb.add_document(doc);
120  });
121 
122  Xapian::Document doc = db.get_document(1);
123 
124  Xapian::Document doc2 = Xapian::Document::unserialise(doc.serialise());
125 
126  TEST_EQUAL(doc.termlist_count(), doc2.termlist_count());
127  TEST_EQUAL(doc.termlist_count(), 1);
131 
132  i = doc.termlist_begin();
133  TEST_NOT_EQUAL(i, doc.termlist_end());
134  TEST_EQUAL(i.get_wdf(), 3);
135  TEST_EQUAL(*i, "foo");
136  TEST_EQUAL(i.positionlist_count(), 1);
137  j = i.positionlist_begin();
138  TEST_NOT_EQUAL(j, i.positionlist_end());
139  TEST_EQUAL(*j, 10);
140  ++j;
141  TEST_EQUAL(j, i.positionlist_end());
142  ++i;
143  TEST_EQUAL(i, doc.termlist_end());
144 
145  TEST_EQUAL(doc.values_count(), 1);
146  k = doc.values_begin();
147  TEST_NOT_EQUAL(k, doc.values_end());
148  TEST_EQUAL(k.get_valueno(), 1);
149  TEST_EQUAL(*k, "bar");
150  ++k;
151  TEST_EQUAL(k, doc.values_end());
152 
153  TEST_EQUAL(doc.get_data(), "baz");
154 
155  i = doc2.termlist_begin();
156  TEST_NOT_EQUAL(i, doc2.termlist_end());
157  TEST_EQUAL(i.get_wdf(), 3);
158  TEST_EQUAL(*i, "foo");
159  TEST_EQUAL(i.positionlist_count(), 1);
160  j = i.positionlist_begin();
161  TEST_NOT_EQUAL(j, i.positionlist_end());
162  TEST_EQUAL(*j, 10);
163  ++j;
164  TEST_EQUAL(j, i.positionlist_end());
165  ++i;
166  TEST_EQUAL(i, doc2.termlist_end());
167 
168  TEST_EQUAL(doc2.values_count(), 1);
169  k = doc2.values_begin();
170  TEST_NOT_EQUAL(k, doc2.values_end());
171  TEST_EQUAL(k.get_valueno(), 1);
172  TEST_EQUAL(*k, "bar");
173  ++k;
174  TEST_EQUAL(k, doc2.values_end());
175 
176  TEST_EQUAL(doc2.get_data(), "baz");
177 }
178 
179 // Test for serialising a query
180 DEFINE_TESTCASE(serialise_query1, !backend) {
181  Xapian::Query q;
184  TEST_EQUAL(q.get_description(), "Query()");
185 
186  q = Xapian::Query("hello");
187  q2 = Xapian::Query::unserialise(q.serialise());
188  TEST_EQUAL(q.get_description(), q2.get_description());
189  TEST_EQUAL(q.get_description(), "Query(hello)");
190 
191  q = Xapian::Query("hello", 1, 1);
192  q2 = Xapian::Query::unserialise(q.serialise());
193  // Regression test for fix in Xapian 1.0.0.
194  TEST_EQUAL(q.get_description(), q2.get_description());
195  TEST_EQUAL(q.get_description(), "Query(hello@1)");
196 
197  q = Xapian::Query(q.OP_OR, Xapian::Query("hello"), Xapian::Query("world"));
198  q2 = Xapian::Query::unserialise(q.serialise());
199  TEST_EQUAL(q.get_description(), q2.get_description());
200  TEST_EQUAL(q.get_description(), "Query((hello OR world))");
201 
202  q = Xapian::Query(q.OP_OR,
203  Xapian::Query("hello", 1, 1),
204  Xapian::Query("world", 1, 1));
205  q2 = Xapian::Query::unserialise(q.serialise());
206  TEST_EQUAL(q.get_description(), q2.get_description());
207  TEST_EQUAL(q.get_description(), "Query((hello@1 OR world@1))");
208 
209  static const char * const phrase[] = { "shaken", "not", "stirred" };
210  q = Xapian::Query(q.OP_PHRASE, phrase, phrase + 3);
211  q = Xapian::Query(q.OP_OR, Xapian::Query("007"), q);
212  q = Xapian::Query(q.OP_SCALE_WEIGHT, q, 3.14);
213  q2 = Xapian::Query::unserialise(q.serialise());
214  TEST_EQUAL(q.get_description(), q2.get_description());
215 }
216 
217 // Test for serialising a query which contains a PostingSource.
218 DEFINE_TESTCASE(serialise_query2, !backend) {
220  Xapian::Query q(&s1);
223  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueWeightPostingSource(slot=10)))");
224 
226  s2.set_default_weight(5.0);
227  q = Xapian::Query(&s2);
230  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueMapPostingSource(slot=11)))");
231 
233  q = Xapian::Query(&s3);
236  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::FixedWeightPostingSource(wt=5.5)))");
237 }
238 
239 // Test for unserialising a query using the default registry.
240 DEFINE_TESTCASE(serialise_query3, !backend) {
242  Xapian::Query q(&s1);
243  Xapian::Registry reg;
246  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueWeightPostingSource(slot=10)))");
247 
249  s2.set_default_weight(5.0);
250  q = Xapian::Query(&s2);
251  q2 = Xapian::Query::unserialise(q.serialise(), reg);
253  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueMapPostingSource(slot=11)))");
254 
256  q = Xapian::Query(&s3);
257  q2 = Xapian::Query::unserialise(q.serialise(), reg);
259  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::FixedWeightPostingSource(wt=5.5)))");
260 }
261 
263  std::string desc;
264  public:
265  MyPostingSource2(const std::string & desc_)
266  : Xapian::ValuePostingSource(0), desc(desc_)
267  {
268  }
269 
271  {
272  return new MyPostingSource2(desc);
273  }
274 
275  std::string name() const {
276  return "MyPostingSource2";
277  }
278 
279  std::string serialise() const {
280  return desc;
281  }
282 
283  MyPostingSource2 * unserialise(const std::string & s) const {
284  return new MyPostingSource2(s);
285  }
286 
287  double get_weight() const { return 1.0; }
288 
289  std::string get_description() const {
290  return "MyPostingSource2(" + desc + ")";
291  }
292 };
293 
294 // Test for unserialising a query which contains a custom PostingSource.
295 DEFINE_TESTCASE(serialise_query4, !backend) {
296  MyPostingSource2 s1("foo");
297  Xapian::Query q(&s1);
298  TEST_EQUAL(q.get_description(), "Query(PostingSource(MyPostingSource2(foo)))");
299  std::string serialised = q.serialise();
300 
302  Xapian::Registry reg;
304 
305  reg.register_posting_source(s1);
306  Xapian::Query q2 = Xapian::Query::unserialise(serialised, reg);
308 }
309 
311 DEFINE_TESTCASE(double_register_leak, !backend) {
312  MyPostingSource2 s1("foo");
314 
315  Xapian::Registry reg;
316  reg.register_posting_source(s1);
317  reg.register_posting_source(s1);
318  reg.register_posting_source(s1);
319 
323 }
324 
326  public:
327  typedef enum { NONE, CLONE } failmode;
328 
329  failmode fail;
330 
331  ExceptionalPostingSource(failmode fail_) : fail(fail_) { }
332 
333  string name() const {
334  return "ExceptionalPostingSource";
335  }
336 
337  PostingSource * clone() const {
338  if (fail == CLONE)
339  throw bad_alloc();
340  return new ExceptionalPostingSource(fail);
341  }
342 
343  void init(const Xapian::Database &) { }
344 
345  Xapian::doccount get_termfreq_min() const { return 0; }
346  Xapian::doccount get_termfreq_est() const { return 1; }
347  Xapian::doccount get_termfreq_max() const { return 2; }
348 
349  void next(double) { }
350 
351  void skip_to(Xapian::docid, double) { }
352 
353  bool at_end() const { return true; }
354  Xapian::docid get_docid() const { return 0; }
355 };
356 
358 DEFINE_TESTCASE(registry1, !backend) {
359  // Test that a replacement object throwing bad_alloc is handled.
360  {
361  Xapian::Registry reg;
362 
366  reg.register_posting_source(eps);
367  try {
369  reg.register_posting_source(eps_clone);
370  FAIL_TEST("Expected bad_alloc exception to be thrown");
371  } catch (const bad_alloc &) {
372  }
373 
374  // Either the old entry should be removed, or it should work.
375  const Xapian::PostingSource * p;
376  p = reg.get_posting_source("ExceptionalPostingSource");
377  if (p) {
378  TEST_EQUAL(p->name(), "ExceptionalPostingSource");
379  }
380  }
381 }
382 
384  public:
385  typedef enum { NONE, CLONE } failmode;
386 
387  failmode fail;
388 
389  ExceptionalWeight(failmode fail_) : fail(fail_) { }
390 
391  string name() const {
392  return "ExceptionalWeight";
393  }
394 
395  Weight * clone() const {
396  if (fail == CLONE)
397  throw bad_alloc();
398  return new ExceptionalWeight(fail);
399  }
400 
401  void init(double) { }
402 
404  return 0;
405  }
406  double get_maxpart() const { return 0; }
407 
408  double get_sumextra(Xapian::termcount, Xapian::termcount) const { return 0; }
409  double get_maxextra() const { return 0; }
410 };
411 
413 DEFINE_TESTCASE(registry2, !backend) {
414  // Test that a replacement object throwing bad_alloc is handled.
415  {
416  Xapian::Registry reg;
417 
419  reg.register_weighting_scheme(ewt);
420  try {
422  reg.register_weighting_scheme(ewt_clone);
423  FAIL_TEST("Expected bad_alloc exception to be thrown");
424  } catch (const bad_alloc &) {
425  }
426 
427  // Either the old entry should be removed, or it should work.
428  const Xapian::Weight * p;
429  p = reg.get_weighting_scheme("ExceptionalWeight");
430  if (p) {
431  TEST_EQUAL(p->name(), "ExceptionalWeight");
432  }
433  }
434 }
435 
437  public:
438  typedef enum { NONE, CLONE } failmode;
439 
440  failmode fail;
441 
442  ExceptionalMatchSpy(failmode fail_) : fail(fail_) { }
443 
444  string name() const {
445  return "ExceptionalMatchSpy";
446  }
447 
448  MatchSpy * clone() const {
449  if (fail == CLONE)
450  throw bad_alloc();
451  return new ExceptionalMatchSpy(fail);
452  }
453 
454  void operator()(const Xapian::Document &, double) {
455  }
456 };
457 
459 DEFINE_TESTCASE(registry3, !backend) {
460  // Test that a replacement object throwing bad_alloc is handled.
461  {
462  Xapian::Registry reg;
463 
465  reg.register_match_spy(ems);
466  try {
468  reg.register_match_spy(ems_clone);
469  FAIL_TEST("Expected bad_alloc exception to be thrown");
470  } catch (const bad_alloc &) {
471  }
472 
473  // Either the old entry should be removed, or it should work.
474  const Xapian::MatchSpy * p;
475  p = reg.get_match_spy("ExceptionalMatchSpy");
476  if (p) {
477  TEST_EQUAL(p->name(), "ExceptionalMatchSpy");
478  }
479  }
480 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
double get_sumextra(Xapian::termcount, Xapian::termcount) const
Calculate the term-independent weight component for a document.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void register_weighting_scheme(const Xapian::Weight &wt)
Register a weighting scheme.
Definition: registry.cc:265
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
static const Query unserialise(const std::string &serialised, const Registry &reg=Registry())
Unserialise a string and return a Query object.
Definition: query.cc:202
MyPostingSource2 * unserialise(const std::string &s) const
Create object given string serialisation returned by serialise().
string name() const
Name of the posting source class.
void next(double)
Advance the current position to the next matching document.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount termlist_count() const
The length of the termlist - i.e.
Definition: omdocument.cc:191
virtual std::string name() const
Return the name of this weighting scheme.
Definition: weight.cc:135
Abstract base class for match spies.
Definition: matchspy.h:49
bool at_end() const
Return true if the current position is past the last entry in this list.
A posting source which looks up weights in a map using values as the key.
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
virtual PostingSource * unserialise(const std::string &serialised) const
Create object given string serialisation returned by serialise().
std::string name() const
Name of the posting source class.
Class for iterating over document values.
Definition: valueiterator.h:40
STL namespace.
void operator()(const Xapian::Document &, double)
Register a document with the match spy.
const Xapian::PostingSource * get_posting_source(const std::string &name) const
Get a posting source given a name.
Definition: registry.cc:286
ExceptionalWeight(failmode fail_)
MyPostingSource2(const std::string &desc_)
ExceptionalPostingSource(failmode fail_)
double get_weight() const
Return the weight contribution for the current document.
TermIterator termlist_end() const
Equivalent end iterator for termlist_begin().
Definition: document.h:270
std::string serialise() const
Serialise document into a string.
Definition: omdocument.cc:227
void skip_to(Xapian::docid, double)
Advance to the specified docid.
PostingSource * clone() const
Clone the posting source.
MyPostingSource2 * clone() const
Clone the posting source.
test functionality of the Xapian API
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
const Xapian::MatchSpy * get_match_spy(const std::string &name) const
Get a match spy given a name.
Definition: registry.cc:300
std::string desc
Weight * clone() const
Clone this object.
DEFINE_TESTCASE(serialise_document1, !backend)
MatchSpy * clone() const
Clone the match spy.
This class provides read/write access to a database.
Definition: database.h:789
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
void register_match_spy(const Xapian::MatchSpy &spy)
Register a user-defined match spy class.
Definition: registry.cc:293
Registry for user subclasses.
Definition: registry.h:47
Public interfaces for the Xapian library.
A posting source which returns a fixed weight for all documents.
const Xapian::Weight * get_weighting_scheme(const std::string &name) const
Get the weighting scheme given a name.
Definition: registry.cc:272
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Xapian::termcount values_count() const
Count the values in this document.
Definition: omdocument.cc:204
std::string serialise() const
Serialise this object into a string.
Definition: query.cc:193
Class for iterating over term positions.
string name() const
Return the name of this match spy.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
string name() const
Return the name of this weighting scheme.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:281
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const
Calculate the weight contribution for this object&#39;s term to a document.
A posting source which generates weights from a value slot.
void init(double)
Allow the subclass to perform any initialisation it needs to.
void add_posting(const std::string &tname, Xapian::termpos tpos, Xapian::termcount wdfinc=1)
Add an occurrence of a term at a particular position.
Definition: omdocument.cc:128
std::string serialise() const
Serialise object parameters into a string.
std::string get_description() const
Return a string describing this object.
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
A posting source which reads weights from a value slot.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
static Document unserialise(const std::string &serialised)
Unserialise a document from a string produced by serialise().
Definition: omdocument.cc:234
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
void register_posting_source(const Xapian::PostingSource &source)
Register a user-defined posting source class.
Definition: registry.cc:279
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian-specific test helper functions and macros.
virtual std::string serialise() const
Serialise object parameters into a string.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
ExceptionalMatchSpy(failmode fail_)
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
TermIterator termlist_begin() const
Start iterating the terms in this document.
Definition: omdocument.cc:197
virtual std::string name() const
Name of the posting source class.
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:535
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::docid get_docid() const
Return the current docid.
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
Abstract base class for weighting schemes.
Definition: weight.h:35
virtual std::string name() const
Return the name of this match spy.
Definition: matchspy.cc:56