xapian-core  2.0.0
api_snippets.cc
Go to the documentation of this file.
1 
4 /* Copyright 2012 Mihai Bivol
5  * Copyright 2015,2016,2017,2019,2020,2026 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "api_snippets.h"
25 
26 #include <fstream>
27 #include <string>
28 
29 #include <xapian.h>
30 
31 #include "apitest.h"
32 #include "testsuite.h"
33 #include "testutils.h"
34 
35 using namespace std;
36 
38  const char * input;
39  size_t len;
40  const char * expect;
41 };
42 
44 DEFINE_TESTCASE(snippet1, backend) {
45  Xapian::Enquire enquire(get_database("apitest_simpledata"));
47  Xapian::Query("rubbish"),
48  Xapian::Query("mention")));
49  Xapian::MSet mset = enquire.get_mset(0, 0);
50 
51  static const snippet_testcase testcases[] = {
52  // Test highlighting in full sample.
53  { "Rubbish and junk", 20, "<b>Rubbish</b> and junk" },
54  { "Project R.U.B.B.I.S.H. greenlit", 31, "Project <b>R.U.B.B.I.S.H.</b> greenlit" },
55  { "What a load of rubbish", 100, "What a load of <b>rubbish</b>" },
56  { "Mention rubbish", 100, "<b>Mention</b> <b>rubbish</b>" },
57  { "A mention of rubbish", 100, "A <b>mention</b> of <b>rubbish</b>" },
58  { "Rubbish mention of rubbish", 100, "<b>Rubbish</b> <b>mention</b> of <b>rubbish</b>" },
59 
60  // Test selection of snippet.
61  { "Rubbish and junk", 12, "<b>Rubbish</b> and..." },
62  { "Project R.U.B.B.I.S.H. greenlit", 14, "...<b>R.U.B.B.I.S.H.</b>..." },
63  { "What a load of rubbish", 12, "...of <b>rubbish</b>" },
64  { "What a load of rubbish", 8, "...<b>rubbish</b>" },
65  { "Rubbish mention where the start is better than the rubbish ending", 18, "<b>Rubbish</b> <b>mention</b>..." },
66 
67  // Should prefer "interesting" words for context.
68  { "And of the rubbish document to this", 18, "...<b>rubbish</b> document..." },
69  { "And if they document rubbish to be this", 18, "...document <b>rubbish</b>..." },
70 
71  // Test handling of soft hyphen (added in Xapian 2.0.0).
72 #define SHY "\xc2\xad"
73  { "rub" SHY "bish ment" SHY "ion", 20,
74  "<b>rub" SHY "bish</b> <b>ment" SHY "ion</b>" },
75 
76  // Test handling of zero-width space (changed in Xapian 2.0.0).
77 #define ZWSP "\xe2\x80\x8b"
78  { "mention" ZWSP "rubbish" ZWSP "dolor", 30,
79  "<b>mention</b>" ZWSP "<b>rubbish</b>" ZWSP "dolor" },
80  };
81 
82  for (auto i : testcases) {
83  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
84  }
85 }
86 
88 DEFINE_TESTCASE(snippetstem1, backend) {
89  Xapian::Enquire enquire(get_database("apitest_simpledata"));
91  Xapian::Query("rubbish"),
92  Xapian::Query("Zexampl")));
93  Xapian::MSet mset = enquire.get_mset(0, 0);
94 
95  // Term Zexampl isn't in the database, but the highlighter should still
96  // handle it.
97  static const snippet_testcase testcases[] = {
98  // "rubbish" isn't stemmed, example is.
99  { "You rubbished my ideas", 24, "You rubbished my ideas" },
100  { "Rubbished all my examples", 20, "...all my <b>examples</b>" },
101  { "Examples of text", 20, "<b>Examples</b> of text" },
102  };
103 
104  Xapian::Stem stem("en");
105  for (auto i : testcases) {
106  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
107  }
108 }
109 
111 DEFINE_TESTCASE(snippetphrase1, backend) {
112  Xapian::Enquire enquire(get_database("apitest_simpledata"));
114  Xapian::Query("rubbish"),
115  Xapian::Query("mention"));
116  // Regression test - a phrase with a following sibling query would crash in
117  // the highlighting code.
118  enquire.set_query(q &~ Xapian::Query("banana"));
119  Xapian::MSet mset = enquire.get_mset(0, 0);
120 
121  static const snippet_testcase testcases[] = {
122  { "A mention of rubbish", 18, "...mention of rubbish" },
123  { "This is a rubbish mention", 20, "...is a <b>rubbish mention</b>" },
124  { "Mention of a rubbish mention of rubbish", 45, "Mention of a <b>rubbish mention</b> of rubbish" },
125  { "Mention of a rubbish mention of rubbish", 18, "...<b>rubbish mention</b> of..." },
126  { "rubbish rubbish mention mention", 45, "rubbish <b>rubbish mention</b> mention" },
127  { "rubbish mention rubbish mention", 45, "<b>rubbish mention</b> <b>rubbish mention</b>" },
128  };
129 
130  Xapian::Stem stem("en");
131  for (auto i : testcases) {
132  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
133  }
134 }
135 
137 static void
138 make_tg_db(Xapian::WritableDatabase &db, const string & source)
139 {
140  string file = test_driver::get_srcdir();
141  file += "/testdata/";
142  file += source;
143  file += ".txt";
144  ifstream input;
145  input.open(file.c_str());
146  if (!input.is_open()) {
147  FAIL_TEST("Couldn't open input: " << file);
148  }
149 
151  tg.set_stemmer(Xapian::Stem("en"));
152  while (!input.eof()) {
153  Xapian::Document doc;
154  tg.set_document(doc);
155  string line, data;
156  while (true) {
157  getline(input, line);
158  if (find_if(line.begin(), line.end(), C_isnotspace) == line.end())
159  break;
160  tg.index_text(line);
161  if (!data.empty()) data += ' ';
162  data += line;
163  }
164  doc.set_data(data);
165  db.add_document(doc);
166  }
167 }
168 
170 DEFINE_TESTCASE(snippetmisc1, backend) {
171  Xapian::Database db = get_database("snippet", make_tg_db, "snippet");
172  Xapian::Enquire enquire(db);
174  Xapian::Stem stem("en");
175 
176  static const char * const words[] = { "do", "we", "have" };
177  Xapian::Query q(Xapian::Query::OP_PHRASE, words, words + 3);
178  enquire.set_query(q);
179  Xapian::MSet mset = enquire.get_mset(0, 6);
180  TEST_EQUAL(mset.size(), 3);
181  TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 40, stem),
182  "How much o'brien <b>do we have</b>? Miles...");
183  TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 40, stem),
184  "...Unicode: How much o’brien <b>do we have</b>?");
185  TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 32, stem),
186  "We do have we <b>do we have</b> do we.");
187 
188  enquire.set_query(Xapian::Query("Zwelcom") | Xapian::Query("Zmike"));
189  mset = enquire.get_mset(0, 6);
190  TEST_EQUAL(mset.size(), 3);
191  TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 25, stem),
192  "\"<b>Welcome</b> to <b>Mike's</b>...");
193  TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 5, stem),
194  "<b>Mike</b>...");
195  TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 10, stem),
196  "...<b>Mike</b> can...");
197 
198  enquire.set_query(Xapian::Query(q.OP_WILDCARD, "m"));
199  mset = enquire.get_mset(0, 6);
200  TEST_EQUAL(mset.size(), 5);
201  TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 18, stem),
202  "...<b>Mike's</b> <b>Mechanical</b>...");
203  TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 80, stem),
204  "<b>Mike</b> <b>McDonald</b> is a <b>mechanic</b> who enjoys repairing things of a <b>mechanical</b> sort.");
205  TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 102, stem),
206  "From autos to zip-lines, from tea-lights to x-rays, from sea ships to u-boats - <b>Mike</b> can fix them all.");
207  TEST_STRINGS_EQUAL(mset.snippet(mset[3].get_document().get_data(), 64, stem),
208  "How <b>much</b> o'brien do we have? <b>Miles</b> O'Brien, that's how <b>much</b>.");
209  // The requested length is in bytes, so the "fancy" apostrophe results in
210  // fewer Unicode characters in this sample than the previous one.
211  TEST_STRINGS_EQUAL(mset.snippet(mset[4].get_document().get_data(), 64, stem),
212  "...<b>much</b> o’brien do we have? <b>Miles</b> O’Brien, that’s how <b>much</b>.");
213 }
214 
216 DEFINE_TESTCASE(snippet_termcover1, backend) {
217  static const snippet_testcase testcases[] = {
218  // "Zexample" isn't in the database, so should get termweight 0. Once
219  // max_tw is added on, "rubbish" should have just under twice the
220  // relevance of "example" so clearly should win in a straight fight.
221  { "A rubbish, but a good example", 14, "...<b>rubbish</b>, but a..."},
222  // But a second occurrence of "rubbish" has half the relevance, so
223  // "example" should add slightly more relevance.
224  { "Rubbish and rubbish, and rubbish examples", 22, "...and <b>rubbish</b> <b>examples</b>"},
225  // And again.
226  { "rubbish rubbish example rubbish rubbish", 16, "...<b>example</b> <b>rubbish</b>..." },
227  };
228 
229  Xapian::Stem stem("en");
230  // Disable SNIPPET_BACKGROUND_MODEL so we can test the relevance decay
231  // for repeated terms.
232  unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE;
233  for (auto i : testcases) {
234  Xapian::Enquire enquire(get_database("apitest_simpledata"));
236  Xapian::Query("rubbish"),
237  Xapian::Query("Zexampl")));
238 
239  Xapian::MSet mset = enquire.get_mset(0, 0);
240  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem, flags), i.expect);
241  }
242 }
243 
245 DEFINE_TESTCASE(snippet_termcover2, backend) {
246  // With BoolWeight, all terms have 0 termweight, and so relevance 1.0
247  // (since max_tw is set to 1.0 if it is zero).
248  static const snippet_testcase testcases[] = {
249  // Diversity should pick two different terms in preference.
250  { "rubbish rubbish example rubbish rubbish", 16, "...<b>example</b> <b>rubbish</b>..." },
251  // And again.
252  { "Rubbish and rubbish, and rubbish examples", 22, "...and <b>rubbish</b> <b>examples</b>"},
253  // The last of two equal snippet should win.
254  { "A rubbish, but a good example", 14, "...a good <b>example</b>"},
255  };
256 
257  Xapian::Stem stem("en");
258  // Disable SNIPPET_BACKGROUND_MODEL so we can test the relevance decay
259  // for repeated terms.
260  unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE;
261  for (auto i : testcases) {
262  Xapian::Enquire enquire(get_database("apitest_simpledata"));
264  Xapian::Query("rubbish"),
265  Xapian::Query("Zexampl")));
267 
268  Xapian::MSet mset = enquire.get_mset(0, 0);
269  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem, flags), i.expect);
270  }
271 }
272 
274 DEFINE_TESTCASE(snippet_empty, backend) {
275  Xapian::Stem stem("en");
276 
277  Xapian::Enquire enquire(get_database("apitest_simpledata"));
279  Xapian::Query("rubbish"),
280  Xapian::Query("Zexampl")));
281 
282  Xapian::MSet mset = enquire.get_mset(0, 0);
283 
284  // A non-matching text
285  const char *input = "A string without a match.";
286  size_t len = strlen(input);
287 
288  // By default, snippet() returns len bytes of input without markup
289  unsigned flags = 0;
290  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, 0), input);
291 
292  // force snippet() to return the empty string if no term got matched
294  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags), "");
295 
296  // A text with a match
297  input = "A rubbish example text";
298  len = strlen(input);
299 
300  flags = 0;
301  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags),
302  "A <b>rubbish</b> <b>example</b> text");
303 
305  TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags),
306  "A <b>rubbish</b> <b>example</b> text");
307 }
308 
310 DEFINE_TESTCASE(snippet_start_nonspace, backend) {
311  Xapian::Enquire enquire(get_database("apitest_simpledata"));
312  enquire.set_query(Xapian::Query("foo") | Xapian::Query("10"));
313 
314  Xapian::MSet mset = enquire.get_mset(0, 0);
315 
316  Xapian::Stem stem;
317 
318  const char *input = "[xapian-devel] Re: foo";
319  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
320  "[xapian-devel] Re: <b>foo</b>");
321 
322  input = "bar [xapian-devel] Re: foo";
323  TEST_STRINGS_EQUAL(mset.snippet(input, 24, stem),
324  "...[xapian-devel] Re: <b>foo</b>");
325 
326  input = "there is a $1000 prize for foo";
327  TEST_STRINGS_EQUAL(mset.snippet(input, 20, stem),
328  "...$1000 prize for <b>foo</b>");
329 
330  input = "-1 is less than foo";
331  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
332  "-1 is less than <b>foo</b>");
333 
334  input = "+1 is less than foo";
335  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
336  "+1 is less than <b>foo</b>");
337 
338  input = "/bin/sh is a foo";
339  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
340  "/bin/sh is a <b>foo</b>");
341 
342  input = "'tis pity foo is a bar";
343  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
344  "'tis pity <b>foo</b> is a bar");
345 
346  input = "\"foo bar\" he whispered";
347  TEST_STRINGS_EQUAL(mset.snippet(input, 11, stem),
348  "\"<b>foo</b> bar\" he...");
349 
350  input = "\\\\server\\share\\foo is a UNC path";
351  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
352  "\\\\server\\share\\<b>foo</b> is a UNC path");
353 
354  input = "«foo» is a placeholder";
355  TEST_STRINGS_EQUAL(mset.snippet(input, 9, stem),
356  "«<b>foo</b>» is...");
357 
358  input = "#include <foo.h> to use libfoo";
359  TEST_STRINGS_EQUAL(mset.snippet(input, 12, stem),
360  "...&lt;<b>foo</b>.h&gt; to...");
361 
362  input = "¡foo!";
363  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
364  "¡<b>foo</b>!");
365 
366  input = "¿foo?";
367  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
368  "¿<b>foo</b>?");
369 
370  input = "(foo) test";
371  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
372  "(<b>foo</b>) test");
373 
374  input = "{foo} test";
375  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
376  "{<b>foo</b>} test");
377 
378  input = "`foo` test";
379  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
380  "`<b>foo</b>` test");
381 
382  input = "@foo@ is replaced";
383  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
384  "@<b>foo</b>@ is replaced");
385 
386  input = "%foo is a perl hash";
387  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
388  "%<b>foo</b> is a perl hash");
389 
390  input = "&foo takes the address of foo";
391  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
392  "&amp;<b>foo</b> takes the address of <b>foo</b>");
393 
394  input = "§3.1.4 foo";
395  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
396  "§3.1.4 <b>foo</b>");
397 
398  input = "#foo";
399  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
400  "#<b>foo</b>");
401 
402  input = "~foo~ test";
403  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
404  "~<b>foo</b>~ test");
405 
406  input = "( foo )";
407  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
408  "<b>foo</b>...");
409 
410  input = "(=foo=)";
411  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
412  "<b>foo</b>...");
413 
414  // Check that excessive non-word characters aren't included.
415  input = "((((((foo";
416  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
417  "<b>foo</b>");
418 
419  // Check we don't include characters that aren't useful.
420  input = "bar,foo!";
421  TEST_STRINGS_EQUAL(mset.snippet(input, 5, stem),
422  "...<b>foo</b>!");
423 
424  // Check trailing characters are included when useful.
425  input = "/opt/foo/bin/";
426  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
427  "/opt/<b>foo</b>/bin/");
428 
429  input = "\"foo bar\"";
430  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
431  "\"<b>foo</b> bar\"");
432 
433  input = "\\\\server\\share\\foo\\";
434  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
435  "\\\\server\\share\\<b>foo</b>\\");
436 
437  input = "«foo»";
438  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
439  "«<b>foo</b>»");
440 
441  input = "#include <foo>";
442  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
443  "#include &lt;<b>foo</b>&gt;");
444 
445  input = "(foo)";
446  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
447  "(<b>foo</b>)");
448 
449  input = "{foo}";
450  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
451  "{<b>foo</b>}");
452 
453  input = "[foo]";
454  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
455  "[<b>foo</b>]");
456 
457  input = "`foo`";
458  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
459  "`<b>foo</b>`");
460 
461  input = "@foo@";
462  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
463  "@<b>foo</b>@");
464 
465  input = "foo for 10¢";
466  TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
467  "<b>foo</b> for <b>10</b>¢");
468 }
469 
471 DEFINE_TESTCASE(snippet_small_zerolength, backend) {
472  Xapian::Enquire enquire(get_database("apitest_simpledata"));
474  Xapian::Query("rubbish"),
475  Xapian::Query("mention")));
476  Xapian::MSet mset = enquire.get_mset(0, 0);
477 
478  static const snippet_testcase testcases[] = {
479  // Test with small length
480  { "mention junk rubbish", 3, "" },
481  { "Project R.U.B.B.I.S.H. greenlit", 5, "" },
482  { "What load rubbish", 3, "" },
483  { "Mention rubbish", 4, "" },
484 
485  // Test with zero length.
486  { "Rubbish and junk", 0, "" },
487  { "Project R.U.B.B.I.S.H. greenlit", 0, "" },
488  { "What a load of rubbish", 0, "" },
489  { "rubbish mention rubbish mention", 0, "" },
490  };
491 
492  for (auto i : testcases) {
493  TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
494  }
495 }
496 
498 DEFINE_TESTCASE(snippet_ngrams, backend) {
499  Xapian::Database db = get_database("snippet_ngrams",
500  [](Xapian::WritableDatabase& wdb,
501  const string&)
502  {
503  Xapian::Document doc;
506  tg.set_document(doc);
507  tg.index_text("明末時已經有香港地方的概念");
508  wdb.add_document(doc);
509  });
510  Xapian::Enquire enquire(db);
512  auto q = qp.parse_query("已經完成", qp.FLAG_DEFAULT | qp.FLAG_NGRAMS);
513  enquire.set_query(q);
514 
515  Xapian::MSet mset = enquire.get_mset(0, 0);
516 
517  Xapian::Stem stem;
518  const char *input = "明末時已經有香港地方的概念";
519  size_t len = strlen(input);
520 
521  unsigned flags = Xapian::MSet::SNIPPET_NGRAMS;
522  string s;
523  s = mset.snippet(input, len, stem, flags, "<b>", "</b>", "...");
524  TEST_STRINGS_EQUAL(s, "明末時<b>已</b><b>經</b>有香港地方的概念");
525 
526  s = mset.snippet(input, len / 2, stem, flags, "<b>", "</b>", "...");
527  TEST_STRINGS_EQUAL(s, "...<b>已</b><b>經</b>有香港地...");
528 }
529 
531 DEFINE_TESTCASE(snippet_wordbreaks, backend) {
532  Xapian::Enquire enquire(get_database("apitest_simpledata"));
533  enquire.set_query(Xapian::Query("已經"));
534 
535  Xapian::MSet mset = enquire.get_mset(0, 0);
536 
537  Xapian::Stem stem;
538  const char *input = "明末時已經有香港地方的概念";
539  const char *input2 = "明末時已經有香港地方的概念. Hello!";
540  size_t len = strlen(input);
541 
542  unsigned flags = Xapian::MSet::SNIPPET_WORD_BREAKS;
543 
544 #ifdef USE_ICU
545 # define DO_TEST(CODE, RESULT) TEST_STRINGS_EQUAL(CODE, RESULT)
546 #else
547 # define DO_TEST(CODE, RESULT) \
548  try { \
549  CODE; \
550  FAIL_TEST("No exception thrown, expected FeatureUnavailableError"); \
551  } catch (const Xapian::FeatureUnavailableError& e) { \
552  TEST_STRINGS_EQUAL( \
553  e.get_msg(), \
554  "SNIPPET_WORD_BREAKS requires building Xapian to use ICU"); \
555  }
556 #endif
557  DO_TEST(mset.snippet(input, len, stem, flags, "<b>", "</b>", "..."),
558  "明末時<b>已經</b>有香港地方的概念");
559  DO_TEST(mset.snippet(input2, len / 2, stem, flags, "[", "]", "~"),
560  "~時[已經]有香港~");
561 #undef DO_TEST
562 }
563 
564 DEFINE_TESTCASE(snippet_empty_mset, backend) {
565  Xapian::Enquire enquire(get_database("apitest_simpledata"));
566  enquire.set_query(Xapian::Query());
567  Xapian::MSet mset = enquire.get_mset(0, 0);
568  TEST_STRINGS_EQUAL(mset.snippet("foo", 3), "foo");
569 }
570 
571 DEFINE_TESTCASE(snippet_empty_mset2, !backend) {
572  Xapian::MSet mset;
573  TEST_STRINGS_EQUAL(mset.snippet("foo", 3), "foo");
574 }
DEFINE_TESTCASE(snippet1, backend)
Test snippets without stemming.
Definition: api_snippets.cc:44
#define DO_TEST(CODE, RESULT)
static void make_tg_db(Xapian::WritableDatabase &db, const string &source)
Index file to a DB with TermGenerator.
#define SHY
#define ZWSP
static const testcase testcases[]
Definition: api_unicode.cc:40
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
Class implementing a "boolean" weighting scheme.
Definition: weight.h:678
An indexed database of documents.
Definition: database.h:75
Class representing a document.
Definition: document.h:64
void set_data(std::string_view data)
Set the document data.
Definition: document.cc:81
Querying session.
Definition: enquire.h:57
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
Definition: enquire.cc:85
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
Class representing a list of search results.
Definition: mset.h:46
@ SNIPPET_EMPTY_WITHOUT_MATCH
Return the empty string if no term got matched.
Definition: mset.h:338
@ SNIPPET_NGRAMS
Generate n-grams for scripts without explicit word breaks.
Definition: mset.h:363
@ SNIPPET_WORD_BREAKS
Find word breaks for text in scripts without explicit word breaks.
Definition: mset.h:387
@ SNIPPET_EXHAUSTIVE
Exhaustively evaluate candidate snippets in MSet::snippet().
Definition: mset.h:331
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
std::string snippet(std::string_view text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, std::string_view hi_start="<b>", std::string_view hi_end="</b>", std::string_view omit="...") const
Generate a snippet.
Definition: mset.cc:380
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:516
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
Definition: queryparser.cc:174
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
Definition: queryparser.h:635
@ FLAG_DEFAULT
The default flags.
Definition: queryparser.h:784
Class representing a query.
Definition: query.h:45
@ OP_WILDCARD
Wildcard expansion.
Definition: query.h:255
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Class representing a stemming algorithm.
Definition: stem.h:74
Parses a piece of text and generate terms.
Definition: termgenerator.h:49
void index_text(const Xapian::Utf8Iterator &itor, Xapian::termcount wdf_inc=1, std::string_view prefix={})
Index some text.
void set_document(const Xapian::Document &doc)
Set the current document.
flags set_flags(flags toggle, flags mask=flags(0))
Set flags.
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
void set_stemmer(const Xapian::Stem &stemmer)
Set the Xapian::Stem object to be used for generating stemmed terms.
This class provides read/write access to a database.
Definition: database.h:964
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
static std::string get_srcdir()
Read srcdir from environment and if not present, make a valiant attempt to guess a value.
Definition: testsuite.cc:135
bool C_isnotspace(char ch)
Definition: stringutils.h:224
Definition: header.h:87
const char * expect
Definition: api_snippets.cc:40
const char * input
Definition: api_snippets.cc:38
a generic test suite engine
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:65
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Definition: testsuite.h:285
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.