xapian-core  1.4.19
api_serialise.cc
Go to the documentation of this file.
1 
4 /* Copyright 2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2012,2013 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "api_serialise.h"
25 
26 #include <xapian.h>
27 
28 #include <exception>
29 #include <stdexcept>
30 
31 #include "apitest.h"
32 #include "testutils.h"
33 
34 using namespace std;
35 
36 // Test for serialising a document
37 DEFINE_TESTCASE(serialise_document1, !backend) {
38  Xapian::Document doc;
39 
40  // Test serialising and unserialising an empty document.
42  TEST_EQUAL(doc1.termlist_count(), 0);
43  TEST_EQUAL(doc1.termlist_begin(), doc1.termlist_end());
44  TEST_EQUAL(doc1.values_count(), 0);
45  TEST_EQUAL(doc1.values_begin(), doc1.values_end());
46  TEST_EQUAL(doc1.get_data(), "");
47 
48  // Test serialising a document with things in.
49  doc.add_term("foo", 2);
50  doc.add_posting("foo", 10);
51  doc.add_value(1, "bar");
52  doc.set_data("baz");
53 
55 
57  TEST_EQUAL(doc.termlist_count(), 1);
61 
62  i = doc.termlist_begin();
63  TEST_NOT_EQUAL(i, doc.termlist_end());
64  TEST_EQUAL(i.get_wdf(), 3);
65  TEST_EQUAL(*i, "foo");
66  TEST_EQUAL(i.positionlist_count(), 1);
67  j = i.positionlist_begin();
68  TEST_NOT_EQUAL(j, i.positionlist_end());
69  TEST_EQUAL(*j, 10);
70  ++j;
71  TEST_EQUAL(j, i.positionlist_end());
72  ++i;
73  TEST_EQUAL(i, doc.termlist_end());
74 
75  TEST_EQUAL(doc.values_count(), 1);
76  k = doc.values_begin();
77  TEST_NOT_EQUAL(k, doc.values_end());
78  TEST_EQUAL(k.get_valueno(), 1);
79  TEST_EQUAL(*k, "bar");
80  ++k;
81  TEST_EQUAL(k, doc.values_end());
82 
83  TEST_EQUAL(doc.get_data(), "baz");
84 
85  i = doc2.termlist_begin();
86  TEST_NOT_EQUAL(i, doc2.termlist_end());
87  TEST_EQUAL(i.get_wdf(), 3);
88  TEST_EQUAL(*i, "foo");
89  TEST_EQUAL(i.positionlist_count(), 1);
90  j = i.positionlist_begin();
91  TEST_NOT_EQUAL(j, i.positionlist_end());
92  TEST_EQUAL(*j, 10);
93  ++j;
94  TEST_EQUAL(j, i.positionlist_end());
95  ++i;
96  TEST_EQUAL(i, doc2.termlist_end());
97 
98  TEST_EQUAL(doc2.values_count(), 1);
99  k = doc2.values_begin();
100  TEST_NOT_EQUAL(k, doc2.values_end());
101  TEST_EQUAL(k.get_valueno(), 1);
102  TEST_EQUAL(*k, "bar");
103  ++k;
104  TEST_EQUAL(k, doc2.values_end());
105 
106  TEST_EQUAL(doc2.get_data(), "baz");
107 }
108 
109 // Test for serialising a document obtained from a database.
110 DEFINE_TESTCASE(serialise_document2, writable) {
111  Xapian::Document origdoc;
112  origdoc.add_term("foo", 2);
113  origdoc.add_posting("foo", 10);
114  origdoc.add_value(1, "bar");
115  origdoc.set_data("baz");
117  db.add_document(origdoc);
118 
119  Xapian::Document doc = db.get_document(1);
120 
122 
124  TEST_EQUAL(doc.termlist_count(), 1);
128 
129  i = doc.termlist_begin();
130  TEST_NOT_EQUAL(i, doc.termlist_end());
131  TEST_EQUAL(i.get_wdf(), 3);
132  TEST_EQUAL(*i, "foo");
133  TEST_EQUAL(i.positionlist_count(), 1);
134  j = i.positionlist_begin();
135  TEST_NOT_EQUAL(j, i.positionlist_end());
136  TEST_EQUAL(*j, 10);
137  ++j;
138  TEST_EQUAL(j, i.positionlist_end());
139  ++i;
140  TEST_EQUAL(i, doc.termlist_end());
141 
142  TEST_EQUAL(doc.values_count(), 1);
143  k = doc.values_begin();
144  TEST_NOT_EQUAL(k, doc.values_end());
145  TEST_EQUAL(k.get_valueno(), 1);
146  TEST_EQUAL(*k, "bar");
147  ++k;
148  TEST_EQUAL(k, doc.values_end());
149 
150  TEST_EQUAL(doc.get_data(), "baz");
151 
152  i = doc2.termlist_begin();
153  TEST_NOT_EQUAL(i, doc2.termlist_end());
154  TEST_EQUAL(i.get_wdf(), 3);
155  TEST_EQUAL(*i, "foo");
156  TEST_EQUAL(i.positionlist_count(), 1);
157  j = i.positionlist_begin();
158  TEST_NOT_EQUAL(j, i.positionlist_end());
159  TEST_EQUAL(*j, 10);
160  ++j;
161  TEST_EQUAL(j, i.positionlist_end());
162  ++i;
163  TEST_EQUAL(i, doc2.termlist_end());
164 
165  TEST_EQUAL(doc2.values_count(), 1);
166  k = doc2.values_begin();
167  TEST_NOT_EQUAL(k, doc2.values_end());
168  TEST_EQUAL(k.get_valueno(), 1);
169  TEST_EQUAL(*k, "bar");
170  ++k;
171  TEST_EQUAL(k, doc2.values_end());
172 
173  TEST_EQUAL(doc2.get_data(), "baz");
174 }
175 
176 // Test for serialising a query
177 DEFINE_TESTCASE(serialise_query1, !backend) {
178  Xapian::Query q;
181  TEST_EQUAL(q.get_description(), "Query()");
182 
183  q = Xapian::Query("hello");
184  q2 = Xapian::Query::unserialise(q.serialise());
185  TEST_EQUAL(q.get_description(), q2.get_description());
186  TEST_EQUAL(q.get_description(), "Query(hello)");
187 
188  q = Xapian::Query("hello", 1, 1);
189  q2 = Xapian::Query::unserialise(q.serialise());
190  // Regression test for fix in Xapian 1.0.0.
191  TEST_EQUAL(q.get_description(), q2.get_description());
192  TEST_EQUAL(q.get_description(), "Query(hello@1)");
193 
194  q = Xapian::Query(q.OP_OR, Xapian::Query("hello"), Xapian::Query("world"));
195  q2 = Xapian::Query::unserialise(q.serialise());
196  TEST_EQUAL(q.get_description(), q2.get_description());
197  TEST_EQUAL(q.get_description(), "Query((hello OR world))");
198 
199  q = Xapian::Query(q.OP_OR,
200  Xapian::Query("hello", 1, 1),
201  Xapian::Query("world", 1, 1));
202  q2 = Xapian::Query::unserialise(q.serialise());
203  TEST_EQUAL(q.get_description(), q2.get_description());
204  TEST_EQUAL(q.get_description(), "Query((hello@1 OR world@1))");
205 
206  static const char * const phrase[] = { "shaken", "not", "stirred" };
207  q = Xapian::Query(q.OP_PHRASE, phrase, phrase + 3);
208  q = Xapian::Query(q.OP_OR, Xapian::Query("007"), q);
209  q = Xapian::Query(q.OP_SCALE_WEIGHT, q, 3.14);
210  q2 = Xapian::Query::unserialise(q.serialise());
211  TEST_EQUAL(q.get_description(), q2.get_description());
212 }
213 
214 // Test for serialising a query which contains a PostingSource.
215 DEFINE_TESTCASE(serialise_query2, !backend) {
217  Xapian::Query q(&s1);
220  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueWeightPostingSource(slot=10)))");
221 
223  s2.set_default_weight(5.0);
224  q = Xapian::Query(&s2);
227  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueMapPostingSource(slot=11)))");
228 
230  q = Xapian::Query(&s3);
233  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::FixedWeightPostingSource(wt=5.5)))");
234 }
235 
236 // Test for unserialising a query using the default registry.
237 DEFINE_TESTCASE(serialise_query3, !backend) {
239  Xapian::Query q(&s1);
240  Xapian::Registry reg;
243  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueWeightPostingSource(slot=10)))");
244 
246  s2.set_default_weight(5.0);
247  q = Xapian::Query(&s2);
248  q2 = Xapian::Query::unserialise(q.serialise(), reg);
250  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::ValueMapPostingSource(slot=11)))");
251 
253  q = Xapian::Query(&s3);
254  q2 = Xapian::Query::unserialise(q.serialise(), reg);
256  TEST_EQUAL(q.get_description(), "Query(PostingSource(Xapian::FixedWeightPostingSource(wt=5.5)))");
257 }
258 
260  std::string desc;
261  public:
262  MyPostingSource2(const std::string & desc_)
263  : Xapian::ValuePostingSource(0), desc(desc_)
264  {
265  }
266 
268  {
269  return new MyPostingSource2(desc);
270  }
271 
272  std::string name() const {
273  return "MyPostingSource2";
274  }
275 
276  std::string serialise() const {
277  return desc;
278  }
279 
280  MyPostingSource2 * unserialise(const std::string & s) const {
281  return new MyPostingSource2(s);
282  }
283 
284  double get_weight() const { return 1.0; }
285 
286  std::string get_description() const {
287  return "MyPostingSource2(" + desc + ")";
288  }
289 };
290 
291 // Test for unserialising a query which contains a custom PostingSource.
292 DEFINE_TESTCASE(serialise_query4, !backend) {
293  MyPostingSource2 s1("foo");
294  Xapian::Query q(&s1);
295  TEST_EQUAL(q.get_description(), "Query(PostingSource(MyPostingSource2(foo)))");
296  std::string serialised = q.serialise();
297 
299  Xapian::Registry reg;
301 
302  reg.register_posting_source(s1);
303  Xapian::Query q2 = Xapian::Query::unserialise(serialised, reg);
305 }
306 
308 DEFINE_TESTCASE(double_register_leak, !backend) {
309  MyPostingSource2 s1("foo");
311 
312  Xapian::Registry reg;
313  reg.register_posting_source(s1);
314  reg.register_posting_source(s1);
315  reg.register_posting_source(s1);
316 
320 }
321 
323  public:
324  typedef enum { NONE, CLONE } failmode;
325 
326  failmode fail;
327 
328  ExceptionalPostingSource(failmode fail_) : fail(fail_) { }
329 
330  string name() const {
331  return "ExceptionalPostingSource";
332  }
333 
334  PostingSource * clone() const {
335  if (fail == CLONE)
336  throw bad_alloc();
337  return new ExceptionalPostingSource(fail);
338  }
339 
340  void init(const Xapian::Database &) { }
341 
342  Xapian::doccount get_termfreq_min() const { return 0; }
343  Xapian::doccount get_termfreq_est() const { return 1; }
344  Xapian::doccount get_termfreq_max() const { return 2; }
345 
346  void next(double) { }
347 
348  void skip_to(Xapian::docid, double) { }
349 
350  bool at_end() const { return true; }
351  Xapian::docid get_docid() const { return 0; }
352 };
353 
355 DEFINE_TESTCASE(registry1, !backend) {
356  // Test that a replacement object throwing bad_alloc is handled.
357  {
358  Xapian::Registry reg;
359 
363  reg.register_posting_source(eps);
364  try {
366  reg.register_posting_source(eps_clone);
367  FAIL_TEST("Expected bad_alloc exception to be thrown");
368  } catch (const bad_alloc &) {
369  }
370 
371  // Either the old entry should be removed, or it should work.
372  const Xapian::PostingSource * p;
373  p = reg.get_posting_source("ExceptionalPostingSource");
374  if (p) {
375  TEST_EQUAL(p->name(), "ExceptionalPostingSource");
376  }
377  }
378 }
379 
381  public:
382  typedef enum { NONE, CLONE } failmode;
383 
384  failmode fail;
385 
386  ExceptionalWeight(failmode fail_) : fail(fail_) { }
387 
388  string name() const {
389  return "ExceptionalWeight";
390  }
391 
392  Weight * clone() const {
393  if (fail == CLONE)
394  throw bad_alloc();
395  return new ExceptionalWeight(fail);
396  }
397 
398  void init(double) { }
399 
401  return 0;
402  }
403  double get_maxpart() const { return 0; }
404 
405  double get_sumextra(Xapian::termcount, Xapian::termcount) const { return 0; }
406  double get_maxextra() const { return 0; }
407 };
408 
410 DEFINE_TESTCASE(registry2, !backend) {
411  // Test that a replacement object throwing bad_alloc is handled.
412  {
413  Xapian::Registry reg;
414 
416  reg.register_weighting_scheme(ewt);
417  try {
419  reg.register_weighting_scheme(ewt_clone);
420  FAIL_TEST("Expected bad_alloc exception to be thrown");
421  } catch (const bad_alloc &) {
422  }
423 
424  // Either the old entry should be removed, or it should work.
425  const Xapian::Weight * p;
426  p = reg.get_weighting_scheme("ExceptionalWeight");
427  if (p) {
428  TEST_EQUAL(p->name(), "ExceptionalWeight");
429  }
430  }
431 }
432 
434  public:
435  typedef enum { NONE, CLONE } failmode;
436 
437  failmode fail;
438 
439  ExceptionalMatchSpy(failmode fail_) : fail(fail_) { }
440 
441  string name() const {
442  return "ExceptionalMatchSpy";
443  }
444 
445  MatchSpy * clone() const {
446  if (fail == CLONE)
447  throw bad_alloc();
448  return new ExceptionalMatchSpy(fail);
449  }
450 
451  void operator()(const Xapian::Document &, double) {
452  }
453 };
454 
456 DEFINE_TESTCASE(registry3, !backend) {
457  // Test that a replacement object throwing bad_alloc is handled.
458  {
459  Xapian::Registry reg;
460 
462  reg.register_match_spy(ems);
463  try {
465  reg.register_match_spy(ems_clone);
466  FAIL_TEST("Expected bad_alloc exception to be thrown");
467  } catch (const bad_alloc &) {
468  }
469 
470  // Either the old entry should be removed, or it should work.
471  const Xapian::MatchSpy * p;
472  p = reg.get_match_spy("ExceptionalMatchSpy");
473  if (p) {
474  TEST_EQUAL(p->name(), "ExceptionalMatchSpy");
475  }
476  }
477 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
Definition: omdatabase.cc:490
double get_sumextra(Xapian::termcount, Xapian::termcount) const
Calculate the term-independent weight component for a document.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
void register_weighting_scheme(const Xapian::Weight &wt)
Register a weighting scheme.
Definition: registry.cc:265
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
static const Query unserialise(const std::string &serialised, const Registry &reg=Registry())
Unserialise a string and return a Query object.
Definition: query.cc:202
MyPostingSource2 * unserialise(const std::string &s) const
Create object given string serialisation returned by serialise().
string name() const
Name of the posting source class.
void next(double)
Advance the current position to the next matching document.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount termlist_count() const
The length of the termlist - i.e.
Definition: omdocument.cc:191
virtual std::string name() const
Return the name of this weighting scheme.
Definition: weight.cc:135
Abstract base class for match spies.
Definition: matchspy.h:49
bool at_end() const
Return true if the current position is past the last entry in this list.
A posting source which looks up weights in a map using values as the key.
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
Xapian::WritableDatabase get_writable_database(const string &dbname)
Definition: apitest.cc:87
virtual PostingSource * unserialise(const std::string &serialised) const
Create object given string serialisation returned by serialise().
std::string name() const
Name of the posting source class.
Class for iterating over document values.
Definition: valueiterator.h:40
STL namespace.
void operator()(const Xapian::Document &, double)
Register a document with the match spy.
const Xapian::PostingSource * get_posting_source(const std::string &name) const
Get a posting source given a name.
Definition: registry.cc:286
ExceptionalWeight(failmode fail_)
MyPostingSource2(const std::string &desc_)
ExceptionalPostingSource(failmode fail_)
double get_weight() const
Return the weight contribution for the current document.
TermIterator termlist_end() const
Equivalent end iterator for termlist_begin().
Definition: document.h:260
std::string serialise() const
Serialise document into a string.
Definition: omdocument.cc:227
void skip_to(Xapian::docid, double)
Advance to the specified docid.
PostingSource * clone() const
Clone the posting source.
MyPostingSource2 * clone() const
Clone the posting source.
test functionality of the Xapian API
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
const Xapian::MatchSpy * get_match_spy(const std::string &name) const
Get a match spy given a name.
Definition: registry.cc:300
std::string desc
Weight * clone() const
Clone this object.
DEFINE_TESTCASE(serialise_document1, !backend)
MatchSpy * clone() const
Clone the match spy.
This class provides read/write access to a database.
Definition: database.h:785
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
void init(const Xapian::Database &)
Set this PostingSource to the start of the list of postings.
void register_match_spy(const Xapian::MatchSpy &spy)
Register a user-defined match spy class.
Definition: registry.cc:293
Registry for user subclasses.
Definition: registry.h:47
Public interfaces for the Xapian library.
A posting source which returns a fixed weight for all documents.
const Xapian::Weight * get_weighting_scheme(const std::string &name) const
Get the weighting scheme given a name.
Definition: registry.cc:272
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Xapian::termcount values_count() const
Count the values in this document.
Definition: omdocument.cc:204
std::string serialise() const
Serialise this object into a string.
Definition: query.cc:193
Class for iterating over term positions.
string name() const
Return the name of this match spy.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
string name() const
Return the name of this weighting scheme.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:271
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const
Calculate the weight contribution for this object&#39;s term to a document.
A posting source which generates weights from a value slot.
void init(double)
Allow the subclass to perform any initialisation it needs to.
void add_posting(const std::string &tname, Xapian::termpos tpos, Xapian::termcount wdfinc=1)
Add an occurrence of a term at a particular position.
Definition: omdocument.cc:128
std::string serialise() const
Serialise object parameters into a string.
std::string get_description() const
Return a string describing this object.
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
A posting source which reads weights from a value slot.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
static Document unserialise(const std::string &serialised)
Unserialise a document from a string produced by serialise().
Definition: omdocument.cc:234
void register_posting_source(const Xapian::PostingSource &source)
Register a user-defined posting source class.
Definition: registry.cc:279
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian-specific test helper functions and macros.
virtual std::string serialise() const
Serialise object parameters into a string.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
ExceptionalMatchSpy(failmode fail_)
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
TermIterator termlist_begin() const
Iterator for the terms in this document.
Definition: omdocument.cc:197
virtual std::string name() const
Name of the posting source class.
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:535
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::docid get_docid() const
Return the current docid.
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
Abstract base class for weighting schemes.
Definition: weight.h:35
virtual std::string name() const
Return the name of this match spy.
Definition: matchspy.cc:59