53 {
"Rubbish and junk", 20,
"<b>Rubbish</b> and junk" },
54 {
"Project R.U.B.B.I.S.H. greenlit", 31,
"Project <b>R.U.B.B.I.S.H.</b> greenlit" },
55 {
"What a load of rubbish", 100,
"What a load of <b>rubbish</b>" },
56 {
"Mention rubbish", 100,
"<b>Mention</b> <b>rubbish</b>" },
57 {
"A mention of rubbish", 100,
"A <b>mention</b> of <b>rubbish</b>" },
58 {
"Rubbish mention of rubbish", 100,
"<b>Rubbish</b> <b>mention</b> of <b>rubbish</b>" },
61 {
"Rubbish and junk", 12,
"<b>Rubbish</b> and..." },
62 {
"Project R.U.B.B.I.S.H. greenlit", 14,
"...<b>R.U.B.B.I.S.H.</b>..." },
63 {
"What a load of rubbish", 12,
"...of <b>rubbish</b>" },
64 {
"What a load of rubbish", 8,
"...<b>rubbish</b>" },
65 {
"Rubbish mention where the start is better than the rubbish ending", 18,
"<b>Rubbish</b> <b>mention</b>..." },
68 {
"And of the rubbish document to this", 18,
"...<b>rubbish</b> document..." },
69 {
"And if they document rubbish to be this", 18,
"...document <b>rubbish</b>..." },
72 #define SHY "\xc2\xad"
73 {
"rub" SHY "bish ment" SHY "ion", 20,
74 "<b>rub" SHY "bish</b> <b>ment" SHY "ion</b>" },
77 #define ZWSP "\xe2\x80\x8b"
78 {
"mention" ZWSP "rubbish" ZWSP "dolor", 30,
79 "<b>mention</b>" ZWSP "<b>rubbish</b>" ZWSP "dolor" },
99 {
"You rubbished my ideas", 24,
"You rubbished my ideas" },
100 {
"Rubbished all my examples", 20,
"...all my <b>examples</b>" },
101 {
"Examples of text", 20,
"<b>Examples</b> of text" },
122 {
"A mention of rubbish", 18,
"...mention of rubbish" },
123 {
"This is a rubbish mention", 20,
"...is a <b>rubbish mention</b>" },
124 {
"Mention of a rubbish mention of rubbish", 45,
"Mention of a <b>rubbish mention</b> of rubbish" },
125 {
"Mention of a rubbish mention of rubbish", 18,
"...<b>rubbish mention</b> of..." },
126 {
"rubbish rubbish mention mention", 45,
"rubbish <b>rubbish mention</b> mention" },
127 {
"rubbish mention rubbish mention", 45,
"<b>rubbish mention</b> <b>rubbish mention</b>" },
141 file +=
"/testdata/";
145 input.open(file.c_str());
146 if (!
input.is_open()) {
147 FAIL_TEST(
"Couldn't open input: " << file);
152 while (!
input.eof()) {
157 getline(
input, line);
158 if (find_if(line.begin(), line.end(),
C_isnotspace) == line.end())
161 if (!data.empty()) data +=
' ';
176 static const char *
const words[] = {
"do",
"we",
"have" };
182 "How much o'brien <b>do we have</b>? Miles...");
184 "...Unicode: How much o’brien <b>do we have</b>?");
186 "We do have we <b>do we have</b> do we.");
192 "\"<b>Welcome</b> to <b>Mike's</b>...");
196 "...<b>Mike</b> can...");
202 "...<b>Mike's</b> <b>Mechanical</b>...");
204 "<b>Mike</b> <b>McDonald</b> is a <b>mechanic</b> who enjoys repairing things of a <b>mechanical</b> sort.");
206 "From autos to zip-lines, from tea-lights to x-rays, from sea ships to u-boats - <b>Mike</b> can fix them all.");
208 "How <b>much</b> o'brien do we have? <b>Miles</b> O'Brien, that's how <b>much</b>.");
212 "...<b>much</b> o’brien do we have? <b>Miles</b> O’Brien, that’s how <b>much</b>.");
221 {
"A rubbish, but a good example", 14,
"...<b>rubbish</b>, but a..."},
224 {
"Rubbish and rubbish, and rubbish examples", 22,
"...and <b>rubbish</b> <b>examples</b>"},
226 {
"rubbish rubbish example rubbish rubbish", 16,
"...<b>example</b> <b>rubbish</b>..." },
250 {
"rubbish rubbish example rubbish rubbish", 16,
"...<b>example</b> <b>rubbish</b>..." },
252 {
"Rubbish and rubbish, and rubbish examples", 22,
"...and <b>rubbish</b> <b>examples</b>"},
254 {
"A rubbish, but a good example", 14,
"...a good <b>example</b>"},
285 const char *
input =
"A string without a match.";
286 size_t len = strlen(
input);
297 input =
"A rubbish example text";
302 "A <b>rubbish</b> <b>example</b> text");
306 "A <b>rubbish</b> <b>example</b> text");
318 const char *
input =
"[xapian-devel] Re: foo";
320 "[xapian-devel] Re: <b>foo</b>");
322 input =
"bar [xapian-devel] Re: foo";
324 "...[xapian-devel] Re: <b>foo</b>");
326 input =
"there is a $1000 prize for foo";
328 "...$1000 prize for <b>foo</b>");
330 input =
"-1 is less than foo";
332 "-1 is less than <b>foo</b>");
334 input =
"+1 is less than foo";
336 "+1 is less than <b>foo</b>");
338 input =
"/bin/sh is a foo";
340 "/bin/sh is a <b>foo</b>");
342 input =
"'tis pity foo is a bar";
344 "'tis pity <b>foo</b> is a bar");
346 input =
"\"foo bar\" he whispered";
348 "\"<b>foo</b> bar\" he...");
350 input =
"\\\\server\\share\\foo is a UNC path";
352 "\\\\server\\share\\<b>foo</b> is a UNC path");
354 input =
"«foo» is a placeholder";
356 "«<b>foo</b>» is...");
358 input =
"#include <foo.h> to use libfoo";
360 "...<<b>foo</b>.h> to...");
370 input =
"(foo) test";
372 "(<b>foo</b>) test");
374 input =
"{foo} test";
376 "{<b>foo</b>} test");
378 input =
"`foo` test";
380 "`<b>foo</b>` test");
382 input =
"@foo@ is replaced";
384 "@<b>foo</b>@ is replaced");
386 input =
"%foo is a perl hash";
388 "%<b>foo</b> is a perl hash");
390 input =
"&foo takes the address of foo";
392 "&<b>foo</b> takes the address of <b>foo</b>");
394 input =
"§3.1.4 foo";
396 "§3.1.4 <b>foo</b>");
402 input =
"~foo~ test";
404 "~<b>foo</b>~ test");
425 input =
"/opt/foo/bin/";
427 "/opt/<b>foo</b>/bin/");
429 input =
"\"foo bar\"";
431 "\"<b>foo</b> bar\"");
433 input =
"\\\\server\\share\\foo\\";
435 "\\\\server\\share\\<b>foo</b>\\");
441 input =
"#include <foo>";
443 "#include <<b>foo</b>>");
465 input =
"foo for 10¢";
467 "<b>foo</b> for <b>10</b>¢");
480 {
"mention junk rubbish", 3,
"" },
481 {
"Project R.U.B.B.I.S.H. greenlit", 5,
"" },
482 {
"What load rubbish", 3,
"" },
483 {
"Mention rubbish", 4,
"" },
486 {
"Rubbish and junk", 0,
"" },
487 {
"Project R.U.B.B.I.S.H. greenlit", 0,
"" },
488 {
"What a load of rubbish", 0,
"" },
489 {
"rubbish mention rubbish mention", 0,
"" },
513 enquire.set_query(q);
518 const char *
input =
"明末時已經有香港地方的概念";
519 size_t len = strlen(
input);
523 s = mset.
snippet(
input, len, stem, flags,
"<b>",
"</b>",
"...");
526 s = mset.
snippet(
input, len / 2, stem, flags,
"<b>",
"</b>",
"...");
538 const char *
input =
"明末時已經有香港地方的概念";
539 const char *input2 =
"明末時已經有香港地方的概念. Hello!";
540 size_t len = strlen(
input);
545 # define DO_TEST(CODE, RESULT) TEST_STRINGS_EQUAL(CODE, RESULT)
547 # define DO_TEST(CODE, RESULT) \
550 FAIL_TEST("No exception thrown, expected FeatureUnavailableError"); \
551 } catch (const Xapian::FeatureUnavailableError& e) { \
552 TEST_STRINGS_EQUAL( \
554 "SNIPPET_WORD_BREAKS requires building Xapian to use ICU"); \
558 "明末時<b>已經</b>有香港地方的概念");
559 DO_TEST(mset.
snippet(input2, len / 2, stem, flags,
"[",
"]",
"~"),
DEFINE_TESTCASE(snippet1, backend)
Test snippets without stemming.
#define DO_TEST(CODE, RESULT)
static void make_tg_db(Xapian::WritableDatabase &db, const string &source)
Index file to a DB with TermGenerator.
static const testcase testcases[]
Xapian::Database get_database(const string &dbname)
test functionality of the Xapian API
Class implementing a "boolean" weighting scheme.
An indexed database of documents.
Class representing a document.
void set_data(std::string_view data)
Set the document data.
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
void set_query(const Query &query, termcount query_length=0)
Set the query.
Class representing a list of search results.
@ SNIPPET_EMPTY_WITHOUT_MATCH
Return the empty string if no term got matched.
@ SNIPPET_NGRAMS
Generate n-grams for scripts without explicit word breaks.
@ SNIPPET_WORD_BREAKS
Find word breaks for text in scripts without explicit word breaks.
@ SNIPPET_EXHAUSTIVE
Exhaustively evaluate candidate snippets in MSet::snippet().
Xapian::doccount size() const
Return number of items in this MSet object.
std::string snippet(std::string_view text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, std::string_view hi_start="<b>", std::string_view hi_end="</b>", std::string_view omit="...") const
Generate a snippet.
Build a Xapian::Query object from a user query string.
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
@ FLAG_DEFAULT
The default flags.
Class representing a query.
@ OP_WILDCARD
Wildcard expansion.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Class representing a stemming algorithm.
Parses a piece of text and generate terms.
void index_text(const Xapian::Utf8Iterator &itor, Xapian::termcount wdf_inc=1, std::string_view prefix={})
Index some text.
void set_document(const Xapian::Document &doc)
Set the current document.
flags set_flags(flags toggle, flags mask=flags(0))
Set flags.
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
void set_stemmer(const Xapian::Stem &stemmer)
Set the Xapian::Stem object to be used for generating stemmed terms.
This class provides read/write access to a database.
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
static std::string get_srcdir()
Read srcdir from environment and if not present, make a valiant attempt to guess a value.
bool C_isnotspace(char ch)
a generic test suite engine
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
#define TEST_EQUAL(a, b)
Test for equality of two things.
#define TEST_STRINGS_EQUAL(a, b)
Test for equality of two strings.
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.