29 #define XAPIAN_DEPRECATED(D) D
42 tout << classname <<
'\n';
47 string obj_serialised = obj.serialise();
50 unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
55 unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised +
"X"));
56 FAIL_TEST(classname <<
" did not throw for unserialise with junk "
66 #define TEST_WEIGHT_CLASS_NO_PARAMS(W, N) test_weight_class_no_params<W>(#W, N)
71 const W& obj_default,
const W& obj_other)
73 tout << classname <<
'\n';
81 string obj_serialised = obj.serialise();
82 TEST_EQUAL(obj_serialised, obj_default.serialise());
84 string obj_other_serialised = obj_other.serialise();
87 unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
90 unique_ptr<Xapian::Weight> wt2(W().unserialise(obj_other_serialised));
91 TEST_EQUAL(obj_other_serialised, wt2->serialise());
95 unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised +
"X"));
96 FAIL_TEST(classname <<
" did not throw for unserialise with junk "
104 if (target ==
"TradWeight") target =
"BM25Weight";
116 #define TEST_WEIGHT_CLASS(W, N, DEFAULT, OTHER) \
117 test_weight_class<W>(#W, N, W DEFAULT, W OTHER)
132 (1, 0.5, 1, 0.5, 0.5));
134 (1, 0, 1, 0.5, 0.5, 1.0),
135 (1, 0, 1, 0.5, 0.5, 2.0));
162 auto term =
"robinson";
169 string_view params) {
170 tout <<
name <<
'(' << params <<
")\n";
175 if (
name ==
"Xapian::BoolWeight") {
179 }
else if (
name ==
"Xapian::CoordWeight") {
183 }
else if (!params.empty()) {
193 auto lm =
name.find(
"::LM");
198 bool has_sumextra = lm != string::npos &&
name[lm + 4] !=
'J';
200 double w = mset[i].get_weight();
201 double ws = mset_scaled[i].get_weight();
217 #define TEST_WEIGHTING_SCHEME(W, ...) \
218 helper(W(__VA_ARGS__), #W, "" #__VA_ARGS__)
246 #undef TEST_WEIGHTING_SCHEME
276 TEST_EQUAL(wt_ptr->serialise(), wt.serialise());
283 TEST_EQUAL(wt_ptr->serialise(), wt.serialise());
299 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
301 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
325 TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
326 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
327 TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
341 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
343 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
494 for (
int i = 0; i < 5; ++i) {
579 mset[3].get_weight() + 0.0086861771701328694);
606 mset[2].get_weight() + 0.016760925252262027);
640 TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
759 for (
int i = 0; i < 6; ++i) {
769 for (
int i = 0; i < 6; ++i) {
779 TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
780 TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
904 for (
int i = 0; i < 6; ++i) {
919 for (
int i = 0; i < 6; ++i) {
946 (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
948 (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
1072 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
1073 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
1110 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
1111 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
1152 TEST_EQUAL(wt_ptr->serialise(), wt.serialise());
1159 TEST_EQUAL(wt_ptr2->serialise(), wt2.serialise());
1170 : factor(-1.0), zero_inits(z), non_zero_inits(n) {
1171 need_stat(DOC_LENGTH);
1174 void init(
double factor_)
override {
1196 return 1.0 / doclen;
1209 unsigned zero_inits = 0, non_zero_inits = 0;
1219 double factor = -1.0;
1241 const string & term1_,
1242 const string & term2_,
1245 : db(db_), term1(term1_), term2(term2_),
1246 sum(sum_), sum_squares(sum_squares_)
1248 need_stat(COLLECTION_SIZE);
1249 need_stat(RSET_SIZE);
1250 need_stat(AVERAGE_LENGTH);
1251 need_stat(TERMFREQ);
1252 need_stat(RELTERMFREQ);
1253 need_stat(QUERY_LENGTH);
1256 need_stat(DOC_LENGTH);
1257 need_stat(DOC_LENGTH_MIN);
1258 need_stat(DOC_LENGTH_MAX);
1259 need_stat(DB_DOC_LENGTH_MIN);
1260 need_stat(DB_DOC_LENGTH_MAX);
1262 need_stat(COLLECTION_FREQ);
1263 need_stat(UNIQUE_TERMS);
1264 need_stat(UNIQUE_TERMS_MIN);
1265 need_stat(UNIQUE_TERMS_MAX);
1266 need_stat(DB_UNIQUE_TERMS_MIN);
1267 need_stat(DB_UNIQUE_TERMS_MAX);
1268 need_stat(TOTAL_LENGTH);
1269 need_stat(WDF_DOC_MAX);
1273 const string & term_,
1278 void init(
double factor_)
override {
1307 double total_term_occurences = get_average_length() * num_docs;
1309 if (term2.empty() || term2 ==
"=" || term2 ==
"_") {
1312 if (term2.empty()) {
1325 tout <<
"->" << *t <<
" " << tf <<
'\n';
1327 tfmax = max(tfmax, tf);
1330 cfmax = max(cfmax, cf);
1338 tfmax = max(tf1, tf2);
1342 cfmax = max(cf1, cf2);
1346 TEST_REL(get_termfreq(), >=, tfmax);
1347 TEST_REL(get_collection_freq(), >=, cfmax);
1349 TEST_REL(get_termfreq(), <=, tfsum);
1350 TEST_REL(get_collection_freq(), <=, cfsum);
1352 TEST_REL(get_termfreq(), <=, num_docs);
1353 TEST_REL(get_collection_freq(), <=, totlen);
1364 TEST_REL(uniqueterms,>=,uniqueterms_lower);
1365 TEST_REL(uniqueterms,<=,uniqueterms_upper);
1367 TEST_REL(uniqueterms_upper,<=,len_upper);
1376 TEST_EQUAL(get_db_doclength_lower_bound(), db_len_lower);
1377 TEST_EQUAL(get_db_doclength_upper_bound(), db_len_upper);
1378 TEST_EQUAL(get_db_unique_terms_lower_bound(), db_uniqueterms_lower);
1379 TEST_EQUAL(get_db_unique_terms_upper_bound(), db_uniqueterms_upper);
1380 if (db.
size() == 1) {
1383 TEST_EQUAL(uniqueterms_lower, db_uniqueterms_lower);
1384 TEST_EQUAL(uniqueterms_upper, db_uniqueterms_upper);
1386 TEST_REL(len_lower,>=,db_len_lower);
1387 TEST_REL(len_upper,<=,db_len_upper);
1388 TEST_REL(uniqueterms_lower,>=,db_uniqueterms_lower);
1389 TEST_REL(uniqueterms_upper,<=,db_uniqueterms_upper);
1393 sum_squares += wdf * wdf;
1399 if (len_upper == 0) {
1400 len_lower = get_doclength_lower_bound();
1401 len_upper = get_doclength_upper_bound();
1402 uniqueterms_lower = get_unique_terms_lower_bound();
1403 uniqueterms_upper = get_unique_terms_upper_bound();
1404 wdf_upper = get_wdf_upper_bound();
1412 return 1.0 / doclen;
1424 const string &
term = *a;
1443 expected_sum += wdf;
1444 expected_sum_squares += wdf * wdf;
1447 TEST_EQUAL(sum_squares, expected_sum_squares);
1458 const string & term1 = *a;
1460 const string & term2 = *a;
1489 }
else if (did1 < did2) {
1496 expected_sum += wdf;
1497 expected_sum_squares += wdf * wdf;
1519 TEST_EQUAL(sum_squares, expected_sum_squares);
1538 string term{
"radio"};
1541 enquire.set_query(q);
1545 enquire.set_weighting_scheme(wt);
1560 expected_sum += wdf;
1561 expected_sum_squares += wdf * wdf;
1564 TEST_EQUAL(sum_squares, expected_sum_squares);
1585 static const char *
const testcases[] = {
1610 vector<Xapian::PostingIterator> postlists;
1615 Heap::make(postlists.begin(), postlists.end(), PlCmp());
1618 while (!postlists.empty()) {
1622 Heap::pop(postlists.begin(), postlists.end(), PlCmp());
1623 postlists.pop_back();
1627 if (did_new != did) {
1628 expected_sum += wdf;
1629 expected_sum_squares += wdf * wdf;
1635 expected_sum += wdf;
1636 expected_sum_squares += wdf * wdf;
1640 TEST_REL(sum_squares, >=, expected_sum_squares);
1652 const string &
term = *a;
1672 expected_sum += wdf;
1673 expected_sum_squares += wdf * wdf;
1676 TEST_EQUAL(sum_squares, expected_sum_squares);
1692 : factor(-1.0), db(db_), stat_code(stat_code_)
1694 switch (stat_code) {
1699 need_stat(DOC_LENGTH);
1702 need_stat(WDF_DOC_MAX);
1705 void init(
double factor_)
override {
1720 return 1.0 / wdfdocmax;
1766 TEST_REL(mset2[0].get_weight(),<,mset1[0].get_weight());
1767 TEST_REL(mset2[1].get_weight(),<,mset1[1].get_weight());
1768 TEST_REL(mset2[2].get_weight(),<,mset1[2].get_weight());
1769 TEST_REL(mset2[3].get_weight(),<,mset1[3].get_weight());
1770 TEST_REL(mset2[4].get_weight(),<,mset1[4].get_weight());
1777 static const char *
const terms[] = {
1778 "this",
"line",
"paragraph",
"rubbish"
1800 static const char *
const terms[] = {
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
static const testcase testcases[]
static void test_weight_class_no_params(const char *classname, const char *name)
#define TEST_WEIGHT_CLASS_NO_PARAMS(W, N)
#define TEST_WEIGHTING_SCHEME(W,...)
DEFINE_TESTCASE(weightserialisation1, !backend)
Test serialisation and introspection of built-in weighting schemes.
static void test_weight_class(const char *classname, const char *name, const W &obj_default, const W &obj_other)
static void gen_wdf_eq_doclen_db(Xapian::WritableDatabase &db, const string &)
#define TEST_WEIGHT_CLASS(W, N, DEFAULT, OTHER)
Xapian::Database get_database(const string &dbname)
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
test functionality of the Xapian API
Weight * clone() const override
Clone this object.
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
CheckInitWeight(unsigned &z, unsigned &n)
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
unsigned & non_zero_inits
double get_sumextra(Xapian::termcount doclen, Xapian::termcount, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount, Xapian::termcount) const override
Calculate the weight contribution for this object's term to a document.
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount, Xapian::termcount wdfdocmax) const override
Calculate the weight contribution for this object's term to a document.
CheckStatsWeight5(const Xapian::Database &db_, char stat_code_='\0')
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
Weight * clone() const override
Clone this object.
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
CheckStatsWeight(const Xapian::Database &db_, const string &term_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
Weight * clone() const override
Clone this object.
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
double get_sumextra(Xapian::termcount doclen, Xapian::termcount, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
CheckStatsWeight(const Xapian::Database &db_, const string &term1_, const string &term2_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Xapian::termcount & sum_squares
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqueterms, Xapian::termcount wdfdocmax) const override
Calculate the weight contribution for this object's term to a document.
This class implements the BB2 weighting scheme.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Class implementing a "boolean" weighting scheme.
Xapian::Weight subclass implementing Coordinate Matching.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the DPH weighting scheme.
An indexed database of documents.
Xapian::doccount get_termfreq(std::string_view term) const
Get the number of documents indexed by a specified term.
Xapian::termcount get_unique_terms_lower_bound() const
Get a lower bound on the unique terms size of a document in this DB.
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
PostingIterator postlist_begin(std::string_view term) const
Start iterating the postings of a term.
double get_avlength() const
Old name for get_average_length() for backward compatibility.
size_t size() const
Return number of shards in this Database object.
Xapian::termcount get_doclength(Xapian::docid did) const
Get the length of a specified document.
Xapian::termcount get_unique_terms_upper_bound() const
Get an upper bound on the unique terms size of a document in this DB.
TermIterator allterms_end(std::string_view={}) const noexcept
End iterator corresponding to allterms_begin(prefix).
Xapian::termcount get_collection_freq(std::string_view term) const
Get the total number of occurrences of a specified term.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
PostingIterator postlist_end(std::string_view) const noexcept
End iterator corresponding to postlist_begin().
TermIterator allterms_begin(std::string_view prefix={}) const
Start iterating all terms in the database with a given prefix.
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Xapian::Weight subclass implementing Dice Coefficient.
Class representing a document.
void add_boolean_term(std::string_view term)
Add a boolean filter term to the document.
void add_term(std::string_view term, Xapian::termcount wdf_inc=1)
Add a term to this document.
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
TermIterator get_matching_terms_begin(docid did) const
Iterate query terms matching a document.
void set_query(const Query &query, termcount query_length=0)
Set the query.
TermIterator get_matching_terms_end(docid) const noexcept
End iterator corresponding to get_matching_terms_begin().
const std::string & get_msg() const noexcept
Message giving details of the error, intended for human consumption.
This class implements the IfB2 weighting scheme.
This class implements the InL2 weighting scheme.
This class implements the IneB2 weighting scheme.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Language Model weighting with Two Stage smoothing.
Language Model weighting with Absolute Discount smoothing.
Language Model weighting with Dirichlet or Dir+ smoothing.
Language Model weighting with Jelinek-Mercer smoothing.
Iterator over a Xapian::MSet.
double get_weight() const
Get the weight for the current position.
Class representing a list of search results.
Xapian::doccount size() const
Return number of items in this MSet object.
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
This class implements the PL2 weighting scheme.
Class for iterating over a list of terms.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Class representing a query.
std::string get_description() const
Return a string describing this object.
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
@ OP_WILDCARD
Wildcard expansion.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Indicates an error in the std::string serialisation of an object.
Class for iterating over a list of terms.
Xapian::Weight subclass implementing the tf-idf weighting scheme.
@ GLOBAL_FREQ
Global frequency IDF.
@ LOG_GLOBAL_FREQ
Log global frequency IDF.
@ SQRT_GLOBAL_FREQ
Square root global frequency IDF.
@ INCREMENTED_GLOBAL_FREQ
Incremented global frequency IDF.
@ LOG_AVERAGE
Log average.
@ AUG_AVERAGE
Augmented average term frequency.
Xapian::Weight subclass implementing the traditional probabilistic formula.
Abstract base class for weighting schemes.
static const Weight * create(const std::string &scheme, const Registry ®=Registry())
Return the appropriate weighting scheme object.
This class provides read/write access to a database.
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
C++ STL heap implementation with extensions.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
#define CONST_STRLEN(S)
Returns the length of a string constant.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
std::ostringstream tout
The debug printing stream.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
#define TEST_EQUAL(a, b)
Test for equality of two things.
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
#define TEST(a)
Test a condition, without an additional explanation for failure.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Public interfaces for the Xapian library.