54 {
"Rubbish and junk", 20,
"<b>Rubbish</b> and junk" },
55 {
"Project R.U.B.B.I.S.H. greenlit", 31,
"Project <b>R.U.B.B.I.S.H.</b> greenlit" },
56 {
"What a load of rubbish", 100,
"What a load of <b>rubbish</b>" },
57 {
"Mention rubbish", 100,
"<b>Mention</b> <b>rubbish</b>" },
58 {
"A mention of rubbish", 100,
"A <b>mention</b> of <b>rubbish</b>" },
59 {
"Rubbish mention of rubbish", 100,
"<b>Rubbish</b> <b>mention</b> of <b>rubbish</b>" },
62 {
"Rubbish and junk", 12,
"<b>Rubbish</b> and..." },
63 {
"Project R.U.B.B.I.S.H. greenlit", 14,
"...<b>R.U.B.B.I.S.H.</b>..." },
64 {
"What a load of rubbish", 12,
"...of <b>rubbish</b>" },
65 {
"What a load of rubbish", 8,
"...<b>rubbish</b>" },
66 {
"Rubbish mention where the start is better than the rubbish ending", 18,
"<b>Rubbish</b> <b>mention</b>..." },
69 {
"And of the rubbish document to this", 18,
"...<b>rubbish</b> document..." },
70 {
"And if they document rubbish to be this", 18,
"...document <b>rubbish</b>..." },
73 for (
auto i : testcases) {
90 {
"You rubbished my ideas", 24,
"You rubbished my ideas" },
91 {
"Rubbished all my examples", 20,
"...all my <b>examples</b>" },
92 {
"Examples of text", 20,
"<b>Examples</b> of text" },
96 for (
auto i : testcases) {
113 {
"A mention of rubbish", 18,
"...mention of rubbish" },
114 {
"This is a rubbish mention", 20,
"...is a <b>rubbish mention</b>" },
115 {
"Mention of a rubbish mention of rubbish", 45,
"Mention of a <b>rubbish mention</b> of rubbish" },
116 {
"Mention of a rubbish mention of rubbish", 18,
"...<b>rubbish mention</b> of..." },
117 {
"rubbish rubbish mention mention", 45,
"rubbish <b>rubbish mention</b> mention" },
118 {
"rubbish mention rubbish mention", 45,
"<b>rubbish mention</b> <b>rubbish mention</b>" },
122 for (
auto i : testcases) {
132 file +=
"/testdata/";
136 input.open(file.c_str());
137 if (!input.is_open()) {
138 FAIL_TEST(
"Couldn't open input: " << file);
143 while (!input.eof()) {
148 getline(input, line);
149 if (find_if(line.begin(), line.end(),
C_isnotspace) == line.end())
152 if (!data.empty()) data +=
' ';
167 static const char *
const words[] = {
"do",
"we",
"have" };
173 "How much o'brien <b>do we have</b>? Miles...");
175 "...Unicode: How much o’brien <b>do we have</b>?");
177 "We do have we <b>do we have</b> do we.");
183 "\"<b>Welcome</b> to <b>Mike's</b>...");
187 "...<b>Mike</b> can...");
193 "...<b>Mike's</b> <b>Mechanical</b>...");
195 "<b>Mike</b> <b>McDonald</b> is a <b>mechanic</b> who enjoys repairing things of a <b>mechanical</b> sort.");
197 "From autos to zip-lines, from tea-lights to x-rays, from sea ships to u-boats - <b>Mike</b> can fix them all.");
199 "How <b>much</b> o'brien do we have? <b>Miles</b> O'Brien, that's how <b>much</b>.");
203 "...<b>much</b> o’brien do we have? <b>Miles</b> O’Brien, that’s how <b>much</b>.");
212 {
"A rubbish, but a good example", 14,
"...<b>rubbish</b>, but a..."},
215 {
"Rubbish and rubbish, and rubbish examples", 22,
"...and <b>rubbish</b> <b>examples</b>"},
217 {
"rubbish rubbish example rubbish rubbish", 16,
"...<b>example</b> <b>rubbish</b>..." },
224 for (
auto i : testcases) {
241 {
"rubbish rubbish example rubbish rubbish", 16,
"...<b>example</b> <b>rubbish</b>..." },
243 {
"Rubbish and rubbish, and rubbish examples", 22,
"...and <b>rubbish</b> <b>examples</b>"},
245 {
"A rubbish, but a good example", 14,
"...a good <b>example</b>"},
252 for (
auto i : testcases) {
276 const char *
input =
"A string without a match.";
277 size_t len = strlen(input);
288 input =
"A rubbish example text";
293 "A <b>rubbish</b> <b>example</b> text");
297 "A <b>rubbish</b> <b>example</b> text");
309 const char *
input =
"[xapian-devel] Re: foo";
311 "[xapian-devel] Re: <b>foo</b>");
313 input =
"bar [xapian-devel] Re: foo";
315 "...[xapian-devel] Re: <b>foo</b>");
317 input =
"there is a $1000 prize for foo";
319 "...$1000 prize for <b>foo</b>");
321 input =
"-1 is less than foo";
323 "-1 is less than <b>foo</b>");
325 input =
"+1 is less than foo";
327 "+1 is less than <b>foo</b>");
329 input =
"/bin/sh is a foo";
331 "/bin/sh is a <b>foo</b>");
333 input =
"'tis pity foo is a bar";
335 "'tis pity <b>foo</b> is a bar");
337 input =
"\"foo bar\" he whispered";
339 "\"<b>foo</b> bar\" he...");
341 input =
"\\\\server\\share\\foo is a UNC path";
343 "\\\\server\\share\\<b>foo</b> is a UNC path");
345 input =
"«foo» is a placeholder";
347 "«<b>foo</b>» is...");
349 input =
"#include <foo.h> to use libfoo";
351 "...<<b>foo</b>.h> to...");
361 input =
"(foo) test";
363 "(<b>foo</b>) test");
365 input =
"{foo} test";
367 "{<b>foo</b>} test");
369 input =
"`foo` test";
371 "`<b>foo</b>` test");
373 input =
"@foo@ is replaced";
375 "@<b>foo</b>@ is replaced");
377 input =
"%foo is a perl hash";
379 "%<b>foo</b> is a perl hash");
381 input =
"&foo takes the address of foo";
383 "&<b>foo</b> takes the address of <b>foo</b>");
385 input =
"§3.1.4 foo";
387 "§3.1.4 <b>foo</b>");
393 input =
"~foo~ test";
395 "~<b>foo</b>~ test");
416 input =
"/opt/foo/bin/";
418 "/opt/<b>foo</b>/bin/");
420 input =
"\"foo bar\"";
422 "\"<b>foo</b> bar\"");
424 input =
"\\\\server\\share\\foo\\";
426 "\\\\server\\share\\<b>foo</b>\\");
432 input =
"#include <foo>";
434 "#include <<b>foo</b>>");
456 input =
"foo for 10¢";
458 "<b>foo</b> for <b>10</b>¢");
471 {
"mention junk rubbish", 3,
"" },
472 {
"Project R.U.B.B.I.S.H. greenlit", 5,
"" },
473 {
"What load rubbish", 3,
"" },
474 {
"Mention rubbish", 4,
"" },
477 {
"Rubbish and junk", 0,
"" },
478 {
"Project R.U.B.B.I.S.H. greenlit", 0,
"" },
479 {
"What a load of rubbish", 0,
"" },
480 {
"rubbish mention rubbish mention", 0,
"" },
483 for (
auto i : testcases) {
504 enquire.set_query(q);
509 const char *
input =
"明末時已經有香港地方的概念";
510 size_t len = strlen(input);
514 s = mset.
snippet(input, len, stem, flags,
"<b>",
"</b>",
"...");
517 s = mset.
snippet(input, len / 2, stem, flags,
"<b>",
"</b>",
"...");
Xapian::doccount size() const
Return number of items in this MSet object.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Generate n-grams for scripts without explicit word breaks.
Exhaustively evaluate candidate snippets in MSet::snippet().
static void make_tg_db(Xapian::WritableDatabase &db, const string &source)
Index file to a DB with TermGenerator.
This class is used to access a database, or a group of databases.
Class representing a stemming algorithm.
void set_document(const Xapian::Document &doc)
Set the current document.
Parses a piece of text and generate terms.
std::string snippet(const std::string &text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, const std::string &hi_start="<b>", const std::string &hi_end="</b>", const std::string &omit="...") const
Generate a snippet.
Build a Xapian::Query object from a user query string.
a generic test suite engine
Class representing a list of search results.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
static std::string get_srcdir()
Read srcdir from environment and if not present, make a valiant attempt to guess a value...
Generate n-grams for scripts without explicit word breaks.
void index_text(const Xapian::Utf8Iterator &itor, Xapian::termcount wdf_inc=1, const std::string &prefix=std::string())
Index some text.
test functionality of the Xapian API
Class implementing a "boolean" weighting scheme.
This class provides read/write access to a database.
Match only documents where all subqueries match near and in order.
Public interfaces for the Xapian library.
void set_stemmer(const Xapian::Stem &stemmer)
Set the Xapian::Stem object to be used for generating stemmed terms.
DEFINE_TESTCASE(snippet1, backend)
Test snippets without stemming.
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Xapian::Database get_database(const string &dbname)
flags set_flags(flags toggle, flags mask=flags(0))
Set flags.
This class provides an interface to the information retrieval system for the purpose of searching...
Match documents which at least one subquery matches.
Xapian-specific test helper functions and macros.
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Return the empty string if no term got matched.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Class representing a query.
#define TEST_EQUAL(a, b)
Test for equality of two things.
void set_data(const std::string &data)
Set data stored in the document.
bool C_isnotspace(char ch)
Generate n-grams for scripts without explicit word breaks.
A handle representing a document in a Xapian database.
static const testcase testcases[]