33 #define XAPIAN_DEPRECATED(X) X
48 for ( ; i != mset.
end(); ++i) {
57 for ( ; i != mset.
end(); ++i) {
64 const string & t1 =
string(),
const string & t2 =
string(),
65 const string & t3 =
string(),
const string & t4 =
string(),
66 const string & t5 =
string(),
const string & t6 =
string(),
67 const string & t7 =
string(),
const string & t8 =
string(),
68 const string & t9 =
string(),
const string & t10 =
string())
72 if (!t1.empty()) v.push_back(
stemmer(t1));
73 if (!t2.empty()) v.push_back(
stemmer(t2));
74 if (!t3.empty()) v.push_back(
stemmer(t3));
75 if (!t4.empty()) v.push_back(
stemmer(t4));
76 if (!t5.empty()) v.push_back(
stemmer(t5));
77 if (!t6.empty()) v.push_back(
stemmer(t6));
78 if (!t7.empty()) v.push_back(
stemmer(t7));
79 if (!t8.empty()) v.push_back(
stemmer(t8));
80 if (!t9.empty()) v.push_back(
stemmer(t9));
81 if (!t10.empty()) v.push_back(
stemmer(t10));
87 const string & t1 =
string(),
const string & t2 =
string(),
88 const string & t3 =
string(),
const string & t4 =
string(),
89 const string & t5 =
string(),
const string & t6 =
string(),
90 const string & t7 =
string(),
const string & t8 =
string(),
91 const string & t9 =
string(),
const string & t10 =
string())
95 if (!t1.empty()) v.push_back(
stemmer(t1));
96 if (!t2.empty()) v.push_back(
stemmer(t2));
97 if (!t3.empty()) v.push_back(
stemmer(t3));
98 if (!t4.empty()) v.push_back(
stemmer(t4));
99 if (!t5.empty()) v.push_back(
stemmer(t5));
100 if (!t6.empty()) v.push_back(
stemmer(t6));
101 if (!t7.empty()) v.push_back(
stemmer(t7));
102 if (!t8.empty()) v.push_back(
stemmer(t8));
103 if (!t9.empty()) v.push_back(
stemmer(t9));
104 if (!t10.empty()) v.push_back(
stemmer(t10));
137 "A query on a database returned a zero docid");
154 vector<Xapian::Query> v;
329 TEST_REL(eset[36].get_weight(), <, 0);
330 TEST_REL(eset[36].get_weight(), >=, -100);
426 tout <<
"mymset.size() = " << mymset.
size() <<
'\n';
469 TEST_EQUAL(mymset1[5].get_document().get_data(),
470 mymset2[2].get_document().get_data());
481 for ( ; i != mymset.
end(); ++i) {
484 "convert_to_%(msetitor) != convert_to_%(wt)");
486 "convert_to_%(msetitor) != convert_to_%(wt)");
488 "percentage out of range: " << pct);
518 "this",
"line",
"paragraph",
"rubbish");
546 unsigned long sum = 0;
547 for (
unsigned ch : tname) {
553 return (sum % 2) == 0;
573 unsigned int neweset_size = 0;
575 for ( ; j != myeset_orig.
end(); ++j) {
576 if (myfunctor(*j)) neweset_size++;
583 tout <<
"orig_eset: ";
584 copy(myeset_orig.
begin(), myeset_orig.
end(),
585 ostream_iterator<Xapian::ESetItem>(
tout,
" "));
588 tout <<
"new_eset: ";
590 ostream_iterator<Xapian::ESetItem>(
tout,
" "));
596 for (; orig != myeset_orig.
end() && filt != myeset.
end(); ++orig, ++filt) {
598 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
604 "Mismatch in items " << *orig <<
" vs. " << *filt
605 <<
" after filtering");
608 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
614 "Extra items in the filtered eset.");
630 unsigned int neweset_size = 0;
635 string prefix(*j, 0, 1);
638 for ( ; j != myeset_orig.
end(); ++j) {
639 if (myfunctor(*j)) neweset_size++;
645 for (; orig != myeset_orig.
end() && filt != myeset.
end(); ++orig, ++filt) {
647 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
653 "Mismatch in items " << *orig <<
" vs. " << *filt
654 <<
" after filtering");
657 while (orig != myeset_orig.
end() && !myfunctor(*orig)) {
663 "Extra items in the filtered eset.");
670 "this",
"line",
"paragraph",
"rubbish"));
674 tout <<
"Original mset pcts:";
679 unsigned int num_items = 0;
684 for ( ; i != mymset1.
end(); ++i, ++c) {
686 if (new_pct != my_pct) {
688 if (changes > 3)
break;
696 tout <<
"Cutoff percent: " << my_pct <<
"\n";
703 tout <<
"Percentages after cutoff:";
709 "Match with % cutoff lost too many items");
714 "Match with % cutoff returned too many items");
724 tout <<
"Original mset pcts:";
730 TEST(mset[0].get_percent() - mset[1].get_percent() >= 2);
732 int cutoff = mset[0].get_percent() + mset[1].get_percent();
756 tout <<
"Original mset pcts:";
780 "this",
"line",
"paragraph",
"rubbish"));
784 tout <<
"Original mset weights:";
789 unsigned int num_items = 0;
794 for ( ; i != mymset1.
end(); ++i, ++c) {
796 if (new_wt != my_wt) {
798 if (changes > 3)
break;
806 tout <<
"Cutoff weight: " << my_wt <<
"\n";
813 tout <<
"Weights after cutoff:";
819 "Match with cutoff lost too many items");
822 (mymset2[num_items].get_weight() == my_wt &&
824 "Match with cutoff returned too many items");
830 string term =
"paragraph";
843 for ( ; j != myeset.
end(); ++j) {
849 for ( ; j != myeset2.
end(); ++j) {
850 if (*j == term)
break;
863 for ( ; i != mymset.
end(); ++i) {
877 "Mset was too small to test properly");
890 for ( ; i != mymset1.
end(); ++i, j++) {
905 for ( ; i != mymset1.
end(); ++i) {
923 "Mset was too small to test properly");
936 for ( ; j != mymset2.
end(); ++i, ++j) {
951 for (j = mymset3.
begin(); j != mymset3.
end(); ++j) {
962 list<string> answers_list;
963 answers_list.push_back(
"one");
964 answers_list.push_back(
"two");
965 answers_list.push_back(
"three");
966 answers_list.push_back(
"four");
986 TEST(list == answers_list);
991 list<string> answers_list;
992 answers_list.push_back(
"one");
993 answers_list.push_back(
"two");
994 answers_list.push_back(
"three");
1014 TEST(list == answers_list);
1039 mymset2.
fetch(mymset2[0], mymset2[mymset2.
size() - 1]);
1047 while (it1 != mymset1.
end() && it2 != mymset2.
end()) {
1072 vector<string> terms;
1073 terms.push_back(
"frink");
1110 for (
int i = 0; i < 2; ++i) {
1225 vector<Xapian::Query> qs;
1226 qs.push_back(
query(
"this"));
1229 qs.begin(), qs.end(), 1);
1247 vector<Xapian::Query> qs;
1248 qs.push_back(
query(
"this"));
1251 qs.begin(), qs.end(), 1);
1287 string term1 =
stemmer(
"word");
1288 string term2 =
stemmer(
"rubbish");
1289 string term3 =
stemmer(
"banana");
1291 vector<string> terms;
1292 terms.push_back(term1);
1293 terms.push_back(term2);
1294 terms.push_back(term3);
1332 "word",
"rubbish",
"fibble");
1350 "word",
"rubbish",
"fibble");
1379 for (
int i = 0; i != 3; ++i) {
1380 v.push_back(
"simpl");
1381 v.push_back(
"queri");
1383 v.push_back(
"rubbish");
1384 v.push_back(
"rubbish");
1385 v.push_back(
"rubbish");
1386 v.push_back(
"word");
1387 v.push_back(
"word");
1388 v.push_back(
"word");
1393 v.begin(), v.end(), n);
1419 string theterm =
stemmer(
"another");
1425 for ( ; i != eset1.
end(); ++i) {
1426 if (*i == theterm) {
1434 for ( ; i != eset2.
end(); ++i) {
1435 if (*i == theterm) {
1459 string term1 =
stemmer(
"word");
1460 string term2 =
stemmer(
"inmemory");
1461 string term3 =
stemmer(
"flibble");
1471 for (
int i = 1; i <= 2; ++i) {
1515 string term1 =
stemmer(
"paragraph");
1516 string term2 =
stemmer(
"another");
1548 bool remote =
get_dbtype().find(
"remote") != string::npos;
1555 myquery =
query(
"word");
1598 if (db.
size() == 1) {
1640 if (db.
size() > 1 && remote) {
1726 vector<string> v(t, tend);
1730 vector<string>::const_iterator i;
1731 for (i = v.begin(); i != v.end(); ++i) {
1798 for (m = mymset.
begin(); m != mymset.
end(); ++m) ++count;
1810 for (m = mymset.
begin(); m != mymset.
end(); ++m) ++count;
1816 if (value_no == 0) {
1817 TEST(value.size() > 262);
1818 TEST_EQUAL(
static_cast<unsigned char>(value[262]), 255);
1826 for (m = mymset.
begin(); m != mymset.
end(); ++m) ++count;
1835 vector<string> terms;
1836 terms.push_back(
stemmer(
"this"));
1837 terms.push_back(
stemmer(
"word"));
1838 terms.push_back(
stemmer(
"of"));
1860 vector<string> terms;
1861 terms.push_back(
stemmer(
"this"));
1862 terms.push_back(
stemmer(
"word"));
1863 terms.push_back(
stemmer(
"of"));
1899 vector<Xapian::Query> nullvec;
1932 mymset = enquire.
get_mset(0, 2, 4);
1945 for (
int order = 0; order < 3; ++order) {
1958 for (
int sort = 0; sort < 7; ++sort) {
1959 bool reverse = (sort & 1);
1984 mset = enquire.
get_mset(0, 50, 100);
1993 mset = enquire.
get_mset(0, 10, 50);
2164 static const char *
const queries[] = {
2167 "leave milk on fridge",
2168 "ordered milk operator",
2169 "ordered phrase operator",
2170 "leave \"milk on fridge\"",
2172 "leave \"milk notpresent\"",
2174 static const double multipliers[] = {
2175 -1000000, -2.5, -1, -0.5, 0, 0.5, 1, 2.5, 1000000,
2179 for (
auto qstr : queries) {
2183 for (
const double *multp = multipliers; multp[0] != multp[1]; ++multp) {
2184 double mult = *multp;
2204 i1 != mset1.
end() && i2 != mset2.
end(); ++i1, ++i2) {
2210 vector<Xapian::docid> ids1;
2211 vector<Xapian::docid> ids2;
2213 i1 != mset1.
end() && i2 != mset2.
end(); ++i1, ++i2) {
2216 ids1.push_back(*i1);
2217 ids2.push_back(*i2);
2219 sort(ids1.begin(), ids1.end());
2246 vector<Xapian::docid> ids1;
2247 set<Xapian::docid> idsin1;
2248 vector<Xapian::docid> ids3;
2255 for (i = mset1.
begin(); i != mset1.
end(); ++i) {
2260 for (i = mset3.
begin(); i != mset3.
end(); ++i) {
2261 if (idsin1.find(*i) != idsin1.end())
2265 sort(ids3.begin(), ids3.end());
2266 ids1.insert(ids1.end(), ids3.begin(), ids3.end());
2269 vector<Xapian::docid> ids5;
2272 for (i = mset5.
begin(); i != mset5.
end(); ++i) {
2298 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
static void print_mset_percentages(const Xapian::MSet &mset)
static Xapian::TermIterator test_termlist3_helper()
DEFINE_TESTCASE(zerodocid1, backend)
static void print_mset_weights(const Xapian::MSet &mset)
static void test_emptyterm1_helper(Xapian::Database &db)
Xapian::WritableDatabase get_writable_database(const string &dbname)
Xapian::Database get_database(const string &dbname)
test functionality of the Xapian API
#define SKIP_TEST_FOR_BACKEND(B)
bool operator()(const string &tname) const override
Do we want this term in the ESet?
Class implementing a "boolean" weighting scheme.
This class is used to access a database, or a group of databases.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
size_t size() const
Return number of shards in this Database object.
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
std::string get_uuid() const
Get a UUID for the database.
bool term_exists(const std::string &tname) const
Check if a given term exists in the database.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Xapian::Document get_document(Xapian::docid did) const
Get a document from the database, given its document id.
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
PositionIterator positionlist_begin(Xapian::docid did, const std::string &tname) const
An iterator pointing to the start of the position list for a given term in a given document.
Indicates an attempt to access a document not present in the database.
A handle representing a document in a Xapian database.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
std::string get_data() const
Get data stored in the document.
std::string get_value(Xapian::valueno slot) const
Get value by number.
void set_data(const std::string &data)
Set data stored in the document.
Iterator over a Xapian::ESet.
double get_weight() const
Get the weight for the current position.
Class representing a list of search results.
ESetIterator back() const
Return iterator pointing to the last object in this ESet.
Xapian::termcount get_ebound() const
Return a bound on the full size of this ESet object.
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Xapian::doccount size() const
Return number of items in this ESet object.
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
This class provides an interface to the information retrieval system for the purpose of searching.
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
void set_sort_by_relevance()
Set the sorting to be by relevance only.
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
void set_docid_order(docid_order order)
Set sort order for document IDs.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
@ DESCENDING
docids sort in descending order.
@ ASCENDING
docids sort in ascending order (default)
@ DONT_CARE
docids sort in whatever order is most efficient for the backend.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
ExpandDecider subclass which restrict terms to a particular prefix.
Virtual base class for expand decider functor.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Iterator over a Xapian::MSet.
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Xapian::Document get_document() const
Get the Document object for the current position.
double get_weight() const
Get the weight for the current position.
Class representing a list of search results.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Xapian::doccount size() const
Return number of items in this MSet object.
double get_max_possible() const
The maximum possible weight any document could achieve.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
void fetch(const MSetIterator &begin, const MSetIterator &end) const
Prefetch hint a range of items.
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
double get_max_attained() const
The maximum weight attained by any document.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Class for iterating over a list of terms.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
void skip_to(Xapian::docid did)
Advance the iterator to document did.
Xapian::termcount get_doclength() const
Return the length of the document at the current position.
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Build a Xapian::Query object from a user query string.
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Class representing a query.
std::string get_description() const
Return a string describing this object.
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
@ OP_XOR
Match documents which an odd number of subqueries match.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
void add_document(Xapian::docid did)
Add a document to the relevance set.
Class representing a stemming algorithm.
Class for iterating over a list of terms.
void skip_to(const std::string &term)
Advance the iterator to term term.
Xapian::Weight subclass implementing the traditional probabilistic formula.
This class provides read/write access to a database.
void delete_document(Xapian::docid did)
Delete a document from the database.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
void commit()
Commit any pending modifications made to the database.
const int DB_BACKEND_INMEMORY
Use the "in memory" backend.
int percent
The percentage score for a document in an MSet.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
static Xapian::Stem stemmer
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
std::ostringstream tout
The debug printing stream.
a generic test suite engine
#define TEST_EQUAL(a, b)
Test for equality of two things.
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
#define TEST(a)
Test a condition, without an additional explanation for failure.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
#define TEST_AND_EXPLAIN(a, b)
Test a condition, and display the test with an extra explanation if the condition fails.
bool mset_range_is_same_weights(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
bool mset_range_is_same(const Xapian::MSet &mset1, unsigned int first1, const Xapian::MSet &mset2, unsigned int first2, unsigned int count)
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
#define TEST_MSET_SIZE(M, S)
Check MSet M has size S.
Public interfaces for the Xapian library.