xapian-core  1.4.20
api_snippets.cc
Go to the documentation of this file.
1 
4 /* Copyright 2012 Mihai Bivol
5  * Copyright 2015,2016,2017,2019,2020 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_snippets.h"
26 
27 #include <fstream>
28 #include <string>
29 
30 #include <xapian.h>
31 
32 #include "apitest.h"
33 #include "testsuite.h"
34 #include "testutils.h"
35 
36 using namespace std;
37 
39  const char * input;
40  size_t len;
41  const char * expect;
42 };
43 
45 DEFINE_TESTCASE(snippet1, backend) {
46  Xapian::Enquire enquire(get_database("apitest_simpledata"));
48  Xapian::Query("rubbish"),
49  Xapian::Query("mention")));
50  Xapian::MSet mset = enquire.get_mset(0, 0);
51 
52  static const snippet_testcase testcases[] = {
53  // Test highlighting in full sample.
54  { "Rubbish and junk", 20, "<b>Rubbish</b> and junk" },
55  { "Project R.U.B.B.I.S.H. greenlit", 31, "Project <b>R.U.B.B.I.S.H.</b> greenlit" },
56  { "What a load of rubbish", 100, "What a load of <b>rubbish</b>" },
57  { "Mention rubbish", 100, "<b>Mention</b> <b>rubbish</b>" },
58  { "A mention of rubbish", 100, "A <b>mention</b> of <b>rubbish</b>" },
59  { "Rubbish mention of rubbish", 100, "<b>Rubbish</b> <b>mention</b> of <b>rubbish</b>" },
60 
61  // Test selection of snippet.
62  { "Rubbish and junk", 12, "<b>Rubbish</b> and..." },
63  { "Project R.U.B.B.I.S.H. greenlit", 14, "...<b>R.U.B.B.I.S.H.</b>..." },
64  { "What a load of rubbish", 12, "...of <b>rubbish</b>" },
65  { "What a load of rubbish", 8, "...<b>rubbish</b>" },
66  { "Rubbish mention where the start is better than the rubbish ending", 18, "<b>Rubbish</b> <b>mention</b>..." },
67 
68  // Should prefer "interesting" words for context.
69  { "And of the rubbish document to this", 18, "...<b>rubbish</b> document..." },
70  { "And if they document rubbish to be this", 18, "...document <b>rubbish</b>..." },
71  };
72 
73  for (auto i : testcases) {
74  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
75  }
76 }
77 
79 DEFINE_TESTCASE(snippetstem1, backend) {
80  Xapian::Enquire enquire(get_database("apitest_simpledata"));
82  Xapian::Query("rubbish"),
83  Xapian::Query("Zexampl")));
84  Xapian::MSet mset = enquire.get_mset(0, 0);
85 
86  // Term Zexampl isn't in the database, but the highlighter should still
87  // handle it.
88  static const snippet_testcase testcases[] = {
89  // "rubbish" isn't stemmed, example is.
90  { "You rubbished my ideas", 24, "You rubbished my ideas" },
91  { "Rubbished all my examples", 20, "...all my <b>examples</b>" },
92  { "Examples of text", 20, "<b>Examples</b> of text" },
93  };
94 
95  Xapian::Stem stem("en");
96  for (auto i : testcases) {
97  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
98  }
99 }
100 
102 DEFINE_TESTCASE(snippetphrase1, backend) {
103  Xapian::Enquire enquire(get_database("apitest_simpledata"));
105  Xapian::Query("rubbish"),
106  Xapian::Query("mention"));
107  // Regression test - a phrase with a following sibling query would crash in
108  // the highlighting code.
109  enquire.set_query(q &~ Xapian::Query("banana"));
110  Xapian::MSet mset = enquire.get_mset(0, 0);
111 
112  static const snippet_testcase testcases[] = {
113  { "A mention of rubbish", 18, "...mention of rubbish" },
114  { "This is a rubbish mention", 20, "...is a <b>rubbish mention</b>" },
115  { "Mention of a rubbish mention of rubbish", 45, "Mention of a <b>rubbish mention</b> of rubbish" },
116  { "Mention of a rubbish mention of rubbish", 18, "...<b>rubbish mention</b> of..." },
117  { "rubbish rubbish mention mention", 45, "rubbish <b>rubbish mention</b> mention" },
118  { "rubbish mention rubbish mention", 45, "<b>rubbish mention</b> <b>rubbish mention</b>" },
119  };
120 
121  Xapian::Stem stem("en");
122  for (auto i : testcases) {
123  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
124  }
125 }
126 
128 static void
129 make_tg_db(Xapian::WritableDatabase &db, const string & source)
130 {
131  string file = test_driver::get_srcdir();
132  file += "/testdata/";
133  file += source;
134  file += ".txt";
135  ifstream input;
136  input.open(file.c_str());
137  if (!input.is_open()) {
138  FAIL_TEST("Couldn't open input: " << file);
139  }
140 
142  tg.set_stemmer(Xapian::Stem("en"));
143  while (!input.eof()) {
144  Xapian::Document doc;
145  tg.set_document(doc);
146  string line, data;
147  while (true) {
148  getline(input, line);
149  if (find_if(line.begin(), line.end(), C_isnotspace) == line.end())
150  break;
151  tg.index_text(line);
152  if (!data.empty()) data += ' ';
153  data += line;
154  }
155  doc.set_data(data);
156  db.add_document(doc);
157  }
158 }
159 
161 DEFINE_TESTCASE(snippetmisc1, generated) {
162  Xapian::Database db = get_database("snippet", make_tg_db, "snippet");
163  Xapian::Enquire enquire(db);
165  Xapian::Stem stem("en");
166 
167  static const char * const words[] = { "do", "we", "have" };
168  Xapian::Query q(Xapian::Query::OP_PHRASE, words, words + 3);
169  enquire.set_query(q);
170  Xapian::MSet mset = enquire.get_mset(0, 6);
171  TEST_EQUAL(mset.size(), 3);
172  TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 40, stem),
173  "How much o'brien <b>do we have</b>? Miles...");
174  TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 40, stem),
175  "...Unicode: How much o’brien <b>do we have</b>?");
176  TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 32, stem),
177  "We do have we <b>do we have</b> do we.");
178 
179  enquire.set_query(Xapian::Query("Zwelcom") | Xapian::Query("Zmike"));
180  mset = enquire.get_mset(0, 6);
181  TEST_EQUAL(mset.size(), 3);
182  TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 25, stem),
183  "\"<b>Welcome</b> to <b>Mike's</b>...");
184  TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 5, stem),
185  "<b>Mike</b>...");
186  TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 10, stem),
187  "...<b>Mike</b> can...");
188 
189  enquire.set_query(Xapian::Query(q.OP_WILDCARD, "m"));
190  mset = enquire.get_mset(0, 6);
191  TEST_EQUAL(mset.size(), 5);
192  TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 18, stem),
193  "...<b>Mike's</b> <b>Mechanical</b>...");
194  TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 80, stem),
195  "<b>Mike</b> <b>McDonald</b> is a <b>mechanic</b> who enjoys repairing things of a <b>mechanical</b> sort.");
196  TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 102, stem),
197  "From autos to zip-lines, from tea-lights to x-rays, from sea ships to u-boats - <b>Mike</b> can fix them all.");
198  TEST_STRINGS_EQUAL(mset.snippet(mset[3].get_document().get_data(), 64, stem),
199  "How <b>much</b> o'brien do we have? <b>Miles</b> O'Brien, that's how <b>much</b>.");
200  // The requested length is in bytes, so the "fancy" apostrophe results in
201  // fewer Unicode characters in this sample than the previous one.
202  TEST_STRINGS_EQUAL(mset.snippet(mset[4].get_document().get_data(), 64, stem),
203  "...<b>much</b> o’brien do we have? <b>Miles</b> O’Brien, that’s how <b>much</b>.");
204 }
205 
207 DEFINE_TESTCASE(snippet_termcover1, backend) {
208  static const snippet_testcase testcases[] = {
209  // "Zexample" isn't in the database, so should get termweight 0. Once
210  // max_tw is added on, "rubbish" should have just under twice the
211  // relevance of "example" so clearly should win in a straight fight.
212  { "A rubbish, but a good example", 14, "...<b>rubbish</b>, but a..."},
213  // But a second occurrence of "rubbish" has half the relevance, so
214  // "example" should add slightly more relevance.
215  { "Rubbish and rubbish, and rubbish examples", 22, "...and <b>rubbish</b> <b>examples</b>"},
216  // And again.
217  { "rubbish rubbish example rubbish rubbish", 16, "...<b>example</b> <b>rubbish</b>..." },
218  };
219 
220  Xapian::Stem stem("en");
221  // Disable SNIPPET_BACKGROUND_MODEL so we can test the relevance decay
222  // for repeated terms.
223  unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE;
224  for (auto i : testcases) {
225  Xapian::Enquire enquire(get_database("apitest_simpledata"));
227  Xapian::Query("rubbish"),
228  Xapian::Query("Zexampl")));
229 
230  Xapian::MSet mset = enquire.get_mset(0, 0);
231  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem, flags), i.expect);
232  }
233 }
234 
236 DEFINE_TESTCASE(snippet_termcover2, backend) {
237  // With BoolWeight, all terms have 0 termweight, and so relevance 1.0
238  // (since max_tw is set to 1.0 if it is zero).
239  static const snippet_testcase testcases[] = {
240  // Diversity should pick two different terms in preference.
241  { "rubbish rubbish example rubbish rubbish", 16, "...<b>example</b> <b>rubbish</b>..." },
242  // And again.
243  { "Rubbish and rubbish, and rubbish examples", 22, "...and <b>rubbish</b> <b>examples</b>"},
244  // The last of two equal snippet should win.
245  { "A rubbish, but a good example", 14, "...a good <b>example</b>"},
246  };
247 
248  Xapian::Stem stem("en");
249  // Disable SNIPPET_BACKGROUND_MODEL so we can test the relevance decay
250  // for repeated terms.
251  unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE;
252  for (auto i : testcases) {
253  Xapian::Enquire enquire(get_database("apitest_simpledata"));
255  Xapian::Query("rubbish"),
256  Xapian::Query("Zexampl")));
258 
259  Xapian::MSet mset = enquire.get_mset(0, 0);
260  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem, flags), i.expect);
261  }
262 }
263 
265 DEFINE_TESTCASE(snippet_empty, backend) {
266  Xapian::Stem stem("en");
267 
268  Xapian::Enquire enquire(get_database("apitest_simpledata"));
270  Xapian::Query("rubbish"),
271  Xapian::Query("Zexampl")));
272 
273  Xapian::MSet mset = enquire.get_mset(0, 0);
274 
275  // A non-matching text
276  const char *input = "A string without a match.";
277  size_t len = strlen(input);
278 
279  // By default, snippet() returns len bytes of input without markup
280  unsigned flags = 0;
281  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, 0), input);
282 
283  // force snippet() to return the empty string if no term got matched
285  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags), "");
286 
287  // A text with a match
288  input = "A rubbish example text";
289  len = strlen(input);
290 
291  flags = 0;
292  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags),
293  "A <b>rubbish</b> <b>example</b> text");
294 
296  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags),
297  "A <b>rubbish</b> <b>example</b> text");
298 }
299 
301 DEFINE_TESTCASE(snippet_start_nonspace, backend) {
302  Xapian::Enquire enquire(get_database("apitest_simpledata"));
303  enquire.set_query(Xapian::Query("foo") | Xapian::Query("10"));
304 
305  Xapian::MSet mset = enquire.get_mset(0, 0);
306 
307  Xapian::Stem stem;
308 
309  const char *input = "[xapian-devel] Re: foo";
310  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
311  "[xapian-devel] Re: <b>foo</b>");
312 
313  input = "bar [xapian-devel] Re: foo";
314  TEST_STRINGS_EQUAL(mset.snippet(input, 24, stem),
315  "...[xapian-devel] Re: <b>foo</b>");
316 
317  input = "there is a $1000 prize for foo";
318  TEST_STRINGS_EQUAL(mset.snippet(input, 20, stem),
319  "...$1000 prize for <b>foo</b>");
320 
321  input = "-1 is less than foo";
322  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
323  "-1 is less than <b>foo</b>");
324 
325  input = "+1 is less than foo";
326  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
327  "+1 is less than <b>foo</b>");
328 
329  input = "/bin/sh is a foo";
330  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
331  "/bin/sh is a <b>foo</b>");
332 
333  input = "'tis pity foo is a bar";
334  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
335  "'tis pity <b>foo</b> is a bar");
336 
337  input = "\"foo bar\" he whispered";
338  TEST_STRINGS_EQUAL(mset.snippet(input, 11, stem),
339  "\"<b>foo</b> bar\" he...");
340 
341  input = "\\\\server\\share\\foo is a UNC path";
342  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
343  "\\\\server\\share\\<b>foo</b> is a UNC path");
344 
345  input = "«foo» is a placeholder";
346  TEST_STRINGS_EQUAL(mset.snippet(input, 9, stem),
347  "«<b>foo</b>» is...");
348 
349  input = "#include <foo.h> to use libfoo";
350  TEST_STRINGS_EQUAL(mset.snippet(input, 12, stem),
351  "...&lt;<b>foo</b>.h&gt; to...");
352 
353  input = "¡foo!";
354  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
355  "¡<b>foo</b>!");
356 
357  input = "¿foo?";
358  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
359  "¿<b>foo</b>?");
360 
361  input = "(foo) test";
362  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
363  "(<b>foo</b>) test");
364 
365  input = "{foo} test";
366  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
367  "{<b>foo</b>} test");
368 
369  input = "`foo` test";
370  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
371  "`<b>foo</b>` test");
372 
373  input = "@foo@ is replaced";
374  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
375  "@<b>foo</b>@ is replaced");
376 
377  input = "%foo is a perl hash";
378  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
379  "%<b>foo</b> is a perl hash");
380 
381  input = "&foo takes the address of foo";
382  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
383  "&amp;<b>foo</b> takes the address of <b>foo</b>");
384 
385  input = "§3.1.4 foo";
386  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
387  "§3.1.4 <b>foo</b>");
388 
389  input = "#foo";
390  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
391  "#<b>foo</b>");
392 
393  input = "~foo~ test";
394  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
395  "~<b>foo</b>~ test");
396 
397  input = "( foo )";
398  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
399  "<b>foo</b>...");
400 
401  input = "(=foo=)";
402  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
403  "<b>foo</b>...");
404 
405  // Check that excessive non-word characters aren't included.
406  input = "((((((foo";
407  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
408  "<b>foo</b>");
409 
410  // Check we don't include characters that aren't useful.
411  input = "bar,foo!";
412  TEST_STRINGS_EQUAL(mset.snippet(input, 5, stem),
413  "...<b>foo</b>!");
414 
415  // Check trailing characters are included when useful.
416  input = "/opt/foo/bin/";
417  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
418  "/opt/<b>foo</b>/bin/");
419 
420  input = "\"foo bar\"";
421  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
422  "\"<b>foo</b> bar\"");
423 
424  input = "\\\\server\\share\\foo\\";
425  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
426  "\\\\server\\share\\<b>foo</b>\\");
427 
428  input = "«foo»";
429  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
430  "«<b>foo</b>»");
431 
432  input = "#include <foo>";
433  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
434  "#include &lt;<b>foo</b>&gt;");
435 
436  input = "(foo)";
437  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
438  "(<b>foo</b>)");
439 
440  input = "{foo}";
441  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
442  "{<b>foo</b>}");
443 
444  input = "[foo]";
445  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
446  "[<b>foo</b>]");
447 
448  input = "`foo`";
449  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
450  "`<b>foo</b>`");
451 
452  input = "@foo@";
453  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
454  "@<b>foo</b>@");
455 
456  input = "foo for 10¢";
457  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
458  "<b>foo</b> for <b>10</b>¢");
459 }
460 
462 DEFINE_TESTCASE(snippet_small_zerolength, backend) {
463  Xapian::Enquire enquire(get_database("apitest_simpledata"));
465  Xapian::Query("rubbish"),
466  Xapian::Query("mention")));
467  Xapian::MSet mset = enquire.get_mset(0, 0);
468 
469  static const snippet_testcase testcases[] = {
470  // Test with small length
471  { "mention junk rubbish", 3, "" },
472  { "Project R.U.B.B.I.S.H. greenlit", 5, "" },
473  { "What load rubbish", 3, "" },
474  { "Mention rubbish", 4, "" },
475 
476  // Test with zero length.
477  { "Rubbish and junk", 0, "" },
478  { "Project R.U.B.B.I.S.H. greenlit", 0, "" },
479  { "What a load of rubbish", 0, "" },
480  { "rubbish mention rubbish mention", 0, "" },
481  };
482 
483  for (auto i : testcases) {
484  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
485  }
486 }
487 
489 DEFINE_TESTCASE(snippet_cjkngrams, generated) {
490  Xapian::Database db = get_database("snippet_cjkngrams",
491  [](Xapian::WritableDatabase& wdb,
492  const string&)
493  {
494  Xapian::Document doc;
497  tg.set_document(doc);
498  tg.index_text("明末時已經有香港地方的概念");
499  wdb.add_document(doc);
500  });
501  Xapian::Enquire enquire(db);
503  auto q = qp.parse_query("已經完成", qp.FLAG_DEFAULT | qp.FLAG_CJK_NGRAM);
504  enquire.set_query(q);
505 
506  Xapian::MSet mset = enquire.get_mset(0, 0);
507 
508  Xapian::Stem stem;
509  const char *input = "明末時已經有香港地方的概念";
510  size_t len = strlen(input);
511 
512  unsigned flags = Xapian::MSet::SNIPPET_CJK_NGRAM;
513  string s;
514  s = mset.snippet(input, len, stem, flags, "<b>", "</b>", "...");
515  TEST_STRINGS_EQUAL(s, "明末時<b>已</b><b>經</b>有香港地方的概念");
516 
517  s = mset.snippet(input, len / 2, stem, flags, "<b>", "</b>", "...");
518  TEST_STRINGS_EQUAL(s, "...<b>已</b><b>經</b>有香港地...");
519 }
520 
521 DEFINE_TESTCASE(snippet_empty_mset, backend) {
522  Xapian::Enquire enquire(get_database("apitest_simpledata"));
523  enquire.set_query(Xapian::Query());
524  Xapian::MSet mset = enquire.get_mset(0, 0);
525  TEST_STRINGS_EQUAL(mset.snippet("foo", 3), "foo");
526 }
527 
528 DEFINE_TESTCASE(snippet_empty_mset2, !backend) {
529  Xapian::MSet mset;
530  TEST_STRINGS_EQUAL(mset.snippet("foo", 3), "foo");
531 }
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
Wildcard expansion.
Definition: query.h:255
Exhaustively evaluate candidate snippets in MSet::snippet().
Definition: mset.h:179
static void make_tg_db(Xapian::WritableDatabase &db, const string &source)
Index file to a DB with TermGenerator.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Class representing a stemming algorithm.
Definition: stem.h:62
Enable generation of n-grams from CJK text.
void set_document(const Xapian::Document &doc)
Set the current document.
Parses a piece of text and generate terms.
Definition: termgenerator.h:48
Definition: header.h:63
std::string snippet(const std::string &text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, const std::string &hi_start="<b>", const std::string &hi_end="</b>", const std::string &omit="...") const
Generate a snippet.
Definition: omenquire.cc:304
const char * expect
Definition: api_snippets.cc:41
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
a generic test suite engine
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
static std::string get_srcdir()
Read srcdir from environment and if not present, make a valiant attempt to guess a value...
Definition: testsuite.cc:128
void index_text(const Xapian::Utf8Iterator &itor, Xapian::termcount wdf_inc=1, const std::string &prefix=std::string())
Index some text.
test functionality of the Xapian API
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
Enable generation of n-grams from CJK text.
Definition: mset.h:203
This class provides read/write access to a database.
Definition: database.h:785
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Public interfaces for the Xapian library.
void set_stemmer(const Xapian::Stem &stemmer)
Set the Xapian::Stem object to be used for generating stemmed terms.
DEFINE_TESTCASE(snippet1, backend)
Test snippets without stemming.
Definition: api_snippets.cc:45
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Definition: queryparser.cc:161
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
const char * input
Definition: api_snippets.cc:39
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
flags set_flags(flags toggle, flags mask=flags(0))
Set flags.
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
Match documents which at least one subquery matches.
Definition: query.h:92
Xapian-specific test helper functions and macros.
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:287
Enable generation of n-grams from CJK text.
Definition: queryparser.h:886
Return the empty string if no term got matched.
Definition: mset.h:186
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
Class representing a query.
Definition: query.h:46
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
bool C_isnotspace(char ch)
Definition: stringutils.h:176
A handle representing a document in a Xapian database.
Definition: document.h:61
static const testcase testcases[]
Definition: api_unicode.cc:39