32 #define XAPIAN_DEPRECATED(X) X
47 for ( ; i != mset.
end(); ++i) {
56 for ( ; i != mset.
end(); ++i) {
63 const string & t1 =
string(),
const string & t2 =
string(),
64 const string & t3 =
string(),
const string & t4 =
string(),
65 const string & t5 =
string(),
const string & t6 =
string(),
66 const string & t7 =
string(),
const string & t8 =
string(),
67 const string & t9 =
string(),
const string & t10 =
string())
71 if (!t1.empty()) v.push_back(
stemmer(t1));
72 if (!t2.empty()) v.push_back(
stemmer(t2));
73 if (!t3.empty()) v.push_back(
stemmer(t3));
74 if (!t4.empty()) v.push_back(
stemmer(t4));
75 if (!t5.empty()) v.push_back(
stemmer(t5));
76 if (!t6.empty()) v.push_back(
stemmer(t6));
77 if (!t7.empty()) v.push_back(
stemmer(t7));
78 if (!t8.empty()) v.push_back(
stemmer(t8));
79 if (!t9.empty()) v.push_back(
stemmer(t9));
80 if (!t10.empty()) v.push_back(
stemmer(t10));
86 const string & t1 =
string(),
const string & t2 =
string(),
87 const string & t3 =
string(),
const string & t4 =
string(),
88 const string & t5 =
string(),
const string & t6 =
string(),
89 const string & t7 =
string(),
const string & t8 =
string(),
90 const string & t9 =
string(),
const string & t10 =
string())
94 if (!t1.empty()) v.push_back(
stemmer(t1));
95 if (!t2.empty()) v.push_back(
stemmer(t2));
96 if (!t3.empty()) v.push_back(
stemmer(t3));
97 if (!t4.empty()) v.push_back(
stemmer(t4));
98 if (!t5.empty()) v.push_back(
stemmer(t5));
99 if (!t6.empty()) v.push_back(
stemmer(t6));
100 if (!t7.empty()) v.push_back(
stemmer(t7));
101 if (!t8.empty()) v.push_back(
stemmer(t8));
102 if (!t9.empty()) v.push_back(
stemmer(t9));
103 if (!t10.empty()) v.push_back(
stemmer(t10));
136 "A query on a database returned a zero docid");
153 vector<Xapian::Query> v;
329 TEST_REL(eset[36].get_weight(), <, 0);
330 TEST_REL(eset[36].get_weight(), >=, -100);
427 tout <<
"mymset.size() = " << mymset.
size() <<
'\n';
470 TEST_EQUAL(mymset1[5].get_document().get_data(),
471 mymset2[2].get_document().get_data());
482 for ( ; i != mymset.
end(); ++i) {
485 "convert_to_%(msetitor) != convert_to_%(wt)");
487 "convert_to_%(msetitor) != convert_to_%(wt)");
489 "percentage out of range: " << pct);
519 "this",
"line",
"paragraph",
"rubbish");
547 unsigned long sum = 0;
548 for (
unsigned ch : tname) {
554 return (sum % 2) == 0;
574 unsigned int neweset_size = 0;
576 for ( ; j != myeset_orig.
end(); ++j) {
577 if (myfunctor(*j)) neweset_size++;
584 tout <<
"orig_eset: ";
585 copy(myeset_orig.
begin(), myeset_orig.
end(),
586 ostream_iterator<Xapian::ESetItem>(
tout,
" "));
589 tout <<
"new_eset: ";
591 ostream_iterator<Xapian::ESetItem>(
tout,
" "));
597 for (; orig != myeset_orig.
end() && filt != myeset.
end(); ++orig, ++filt) {
599 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
605 "Mismatch in items " << *orig <<
" vs. " << *filt
606 <<
" after filtering");
609 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
615 "Extra items in the filtered eset.");
631 unsigned int neweset_size = 0;
636 string prefix(*j, 0, 1);
639 for ( ; j != myeset_orig.
end(); ++j) {
640 if (myfunctor(*j)) neweset_size++;
646 for (; orig != myeset_orig.
end() && filt != myeset.
end(); ++orig, ++filt) {
648 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
654 "Mismatch in items " << *orig <<
" vs. " << *filt
655 <<
" after filtering");
658 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
664 "Extra items in the filtered eset.");
671 "this",
"line",
"paragraph",
"rubbish"));
675 tout <<
"Original mset pcts:";
680 unsigned int num_items = 0;
685 for ( ; i != mymset1.
end(); ++i, ++c) {
687 if (new_pct != my_pct) {
689 if (changes > 3)
break;
697 tout <<
"Cutoff percent: " << my_pct <<
"\n";
704 tout <<
"Percentages after cutoff:";
710 "Match with % cutoff lost too many items");
715 "Match with % cutoff returned too many items");
725 tout <<
"Original mset pcts:";
731 TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
733 int cutoff = mset[0].get_percent() + mset[1].get_percent();
757 tout <<
"Original mset pcts:";
765 if (new_percent != percent) {
767 tout <<
"Testing " << percent <<
"% cutoff\n";
772 percent = new_percent;
781 "this",
"line",
"paragraph",
"rubbish"));
785 tout <<
"Original mset weights:";
790 unsigned int num_items = 0;
795 for ( ; i != mymset1.
end(); ++i, ++c) {
797 if (new_wt != my_wt) {
799 if (changes > 3)
break;
807 tout <<
"Cutoff weight: " << my_wt <<
"\n";
814 tout <<
"Weights after cutoff:";
820 "Match with cutoff lost too many items");
823 (mymset2[num_items].get_weight() == my_wt &&
825 "Match with cutoff returned too many items");
831 string term =
"paragraph";
844 for ( ; j != myeset.
end(); ++j) {
850 for ( ; j != myeset2.
end(); ++j) {
851 if (*j ==
term)
break;
864 for ( ; i != mymset.
end(); ++i) {
878 "Mset was too small to test properly");
891 for ( ; i != mymset1.
end(); ++i, j++) {
906 for ( ; i != mymset1.
end(); ++i) {
924 "Mset was too small to test properly");
937 for ( ; j != mymset2.
end(); ++i, ++j) {
952 for (j = mymset3.
begin(); j != mymset3.
end(); ++j) {
963 list<string> answers_list;
964 answers_list.push_back(
"one");
965 answers_list.push_back(
"two");
966 answers_list.push_back(
"three");
967 answers_list.push_back(
"four");
987 TEST(list == answers_list);
992 list<string> answers_list;
993 answers_list.push_back(
"one");
994 answers_list.push_back(
"two");
995 answers_list.push_back(
"three");
1015 TEST(list == answers_list);
1040 mymset2.
fetch(mymset2[0], mymset2[mymset2.
size() - 1]);
1048 while (it1 != mymset1.
end() && it2 != mymset2.
end()) {
1073 vector<string> terms;
1074 terms.push_back(
"frink");
1111 for (
int i = 0; i < 2; ++i) {
1226 vector<Xapian::Query> qs;
1227 qs.push_back(
query(
"this"));
1230 qs.begin(), qs.end(), 1);
1248 vector<Xapian::Query> qs;
1249 qs.push_back(
query(
"this"));
1252 qs.begin(), qs.end(), 1);
1288 string term1 =
stemmer(
"word");
1289 string term2 =
stemmer(
"rubbish");
1290 string term3 =
stemmer(
"banana");
1292 vector<string> terms;
1293 terms.push_back(term1);
1294 terms.push_back(term2);
1295 terms.push_back(term3);
1333 "word",
"rubbish",
"fibble");
1351 "word",
"rubbish",
"fibble");
1380 for (
int i = 0; i != 3; ++i) {
1381 v.push_back(
"simpl");
1382 v.push_back(
"queri");
1384 v.push_back(
"rubbish");
1385 v.push_back(
"rubbish");
1386 v.push_back(
"rubbish");
1387 v.push_back(
"word");
1388 v.push_back(
"word");
1389 v.push_back(
"word");
1394 v.begin(), v.end(), n);
1420 string theterm =
stemmer(
"another");
1426 for ( ; i != eset1.
end(); ++i) {
1427 if (*i == theterm) {
1435 for ( ; i != eset2.
end(); ++i) {
1436 if (*i == theterm) {
1460 string term1 =
stemmer(
"word");
1461 string term2 =
stemmer(
"inmemory");
1462 string term3 =
stemmer(
"flibble");
1469 myquery = myquery &~ Xapian::Query(
"Boolean");
1473 for (
int i = 1; i <= 2; ++i) {
1518 string term1 =
stemmer(
"paragraph");
1519 string term2 =
stemmer(
"another");
1556 myquery =
query(
"word");
1599 if (db.
size() > 1) {
1722 vector<string> v(t, tend);
1726 vector<string>::const_iterator i;
1727 for (i = v.begin(); i != v.end(); ++i) {
1794 for (m = mymset.
begin(); m != mymset.
end(); ++m) ++count;
1806 for (m = mymset.
begin(); m != mymset.
end(); ++m) ++count;
1812 if (value_no == 0) {
1813 TEST(value.size() > 262);
1814 TEST_EQUAL(
static_cast<unsigned char>(value[262]), 255);
1822 for (m = mymset.
begin(); m != mymset.
end(); ++m) ++count;
1831 vector<string> terms;
1832 terms.push_back(
stemmer(
"this"));
1833 terms.push_back(
stemmer(
"word"));
1834 terms.push_back(
stemmer(
"of"));
1856 vector<string> terms;
1857 terms.push_back(
stemmer(
"this"));
1858 terms.push_back(
stemmer(
"word"));
1859 terms.push_back(
stemmer(
"of"));
1895 vector<Xapian::Query> nullvec;
1928 mymset = enquire.
get_mset(0, 2, 4);
1941 for (
int order = 0; order < 3; ++order) {
1955 bool reverse = (
sort & 1);
1980 mset = enquire.
get_mset(0, 50, 100);
1989 mset = enquire.
get_mset(0, 10, 50);
2160 static const char *
const queries[] = {
2163 "leave milk on fridge",
2164 "ordered milk operator",
2165 "ordered phrase operator",
2166 "leave \"milk on fridge\"",
2168 "leave \"milk notpresent\"",
2170 static const double multipliers[] = {
2171 -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2175 for (
auto qstr : queries) {
2179 for (
const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2180 double mult = *multp;
2200 i1 != mset1.
end() && i2 != mset2.
end(); ++i1, ++i2) {
2206 vector<Xapian::docid> ids1;
2207 vector<Xapian::docid> ids2;
2209 i1 != mset1.
end() && i2 != mset2.
end(); ++i1, ++i2) {
2212 ids1.push_back(*i1);
2213 ids2.push_back(*i2);
2215 sort(ids1.begin(), ids1.end());
2242 vector<Xapian::docid> ids1;
2243 set<Xapian::docid> idsin1;
2244 vector<Xapian::docid> ids3;
2251 for (i = mset1.
begin(); i != mset1.
end(); ++i) {
2256 for (i = mset3.
begin(); i != mset3.
end(); ++i) {
2257 if (idsin1.find(*i) != idsin1.end())
2261 sort(ids3.begin(), ids3.end());
2262 ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2265 vector<Xapian::docid> ids5;
2268 for (i = mset5.
begin(); i != mset5.
end(); ++i) {
2294 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
static void print_mset_percentages(const Xapian::MSet &mset)
static Xapian::TermIterator test_termlist3_helper()
DEFINE_TESTCASE(zerodocid1, backend)
static void print_mset_weights(const Xapian::MSet &mset)
static void test_emptyterm1_helper(Xapian::Database &db)
Xapian::WritableDatabase get_writable_database(const string &dbname)
Xapian::Database get_database(const string &dbname)
test functionality of the Xapian API
#define SKIP_TEST_FOR_BACKEND(B)
bool operator()(const string &tname) const override
Do we want this term in the ESet?
Class implementing a "boolean" weighting scheme.
An indexed database of documents.
Xapian::doccount get_termfreq(std::string_view term) const
Get the number of documents indexed by a specified term.
PostingIterator postlist_begin(std::string_view term) const
Start iterating the postings of a term.
TermIterator termlist_begin(Xapian::docid did) const
Start iterating the terms in a document.
PositionIterator positionlist_begin(Xapian::docid did, std::string_view term) const
Start iterating positions for a term in a document.
size_t size() const
Return number of shards in this Database object.
void add_database(const Database &other)
Add shards from another Database.
bool term_exists(std::string_view term) const
Test is a particular term is present in any document.
Xapian::termcount get_collection_freq(std::string_view term) const
Get the total number of occurrences of a specified term.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
PostingIterator postlist_end(std::string_view) const noexcept
End iterator corresponding to postlist_begin().
TermIterator termlist_end(Xapian::docid) const noexcept
End iterator corresponding to termlist_begin().
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
std::string get_uuid() const
Get the UUID for the database.
Indicates an attempt to access a document not present in the database.
Class representing a document.
void set_data(std::string_view data)
Set the document data.
std::string get_data() const
Get the document data.
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Iterator over a Xapian::ESet.
double get_weight() const
Get the weight for the current position.
Class representing a list of search results.
Xapian::termcount size() const
Return number of items in this ESet object.
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
static const int USE_EXACT_TERMFREQ
Flag telling get_eset() to always use the exact term frequency.
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
TermIterator get_matching_terms_begin(docid did) const
Iterate query terms matching a document.
void set_sort_by_value_then_relevance(valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
void set_cutoff(int percent_threshold, double weight_threshold=0)
Set lower bounds on percentage and/or weight.
void set_expansion_scheme(std::string_view eweightname, double expand_k=1.0) const
Set the weighting scheme to use for expansion.
void set_query(const Query &query, termcount query_length=0)
Set the query.
ESet get_eset(termcount maxitems, const RSet &rset, int flags=0, const ExpandDecider *edecider=NULL, double min_weight=0.0) const
Perform query expansion.
void set_sort_by_relevance_then_value(valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
void set_sort_by_relevance()
Set the sorting to be by relevance only.
void set_sort_by_value(valueno sort_key, bool reverse)
Set the sorting to be by value only.
void set_collapse_key(valueno collapse_key, doccount collapse_max=1)
Control collapsing of results.
void set_docid_order(docid_order order)
Set sort order for document IDs.
@ DESCENDING
docids sort in descending order.
@ ASCENDING
docids sort in ascending order (default)
@ DONT_CARE
docids sort in whatever order is most efficient for the backend.
static const int INCLUDE_QUERY_TERMS
Flag telling get_eset() to allow query terms in Xapian::ESet.
TermIterator get_matching_terms_end(docid) const noexcept
End iterator corresponding to get_matching_terms_begin().
ExpandDecider subclass which restrict terms to a particular prefix.
Virtual base class for expand decider functor.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Iterator over a Xapian::MSet.
int get_percent() const
Convert the weight of the current iterator position to a percentage.
double get_weight() const
Get the weight for the current position.
Xapian::Document get_document() const
Get the Document object for the current position.
Class representing a list of search results.
Xapian::doccount get_termfreq(std::string_view term) const
Get the termfreq of a term.
Xapian::doccount size() const
Return number of items in this MSet object.
double get_max_possible() const
The maximum possible weight any document could achieve.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
int convert_to_percent(double weight) const
Convert a weight to a percentage.
double get_termweight(std::string_view term) const
Get the term weight of a term.
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
double get_max_attained() const
The maximum weight attained by any document.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Class for iterating over a list of terms.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
void skip_to(Xapian::docid did)
Advance the iterator to document did.
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Build a Xapian::Query object from a user query string.
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
Class representing a query.
std::string get_description() const
Return a string describing this object.
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
@ OP_XOR
Match documents which an odd number of subqueries match.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
Class representing a set of documents judged as relevant.
void add_document(Xapian::docid did)
Mark a document as relevant.
Class representing a stemming algorithm.
Class for iterating over a list of terms.
void skip_to(std::string_view term)
Advance the iterator to term term.
Xapian::Weight subclass implementing the traditional probabilistic formula.
This class provides read/write access to a database.
void delete_document(Xapian::docid did)
Delete a document from the database.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a document in the database.
void commit()
Commit pending modifications.
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
static Xapian::Stem stemmer
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
std::ostringstream tout
The debug printing stream.
a generic test suite engine
#define TEST_EQUAL(a, b)
Test for equality of two things.
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
#define TEST(a)
Test a condition, without an additional explanation for failure.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails.
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Public interfaces for the Xapian library.