45 string obj_serialised = obj.serialise();
48 unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
53 unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised +
"X"));
54 FAIL_TEST(name <<
" did not throw for unserialise with junk appended");
63 #define TEST_WEIGHT_CLASS_NO_PARAMS(W) test_weight_class_no_params<W>(#W) 77 string obj_serialised = obj.serialise();
78 TEST_EQUAL(obj_serialised, obj_default.serialise());
80 string obj_other_serialised = obj_other.serialise();
83 unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
86 unique_ptr<Xapian::Weight> wt2(W().unserialise(obj_other_serialised));
87 TEST_EQUAL(obj_other_serialised, wt2->serialise());
91 unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised +
"X"));
92 FAIL_TEST(name <<
" did not throw for unserialise with junk appended");
108 #define TEST_WEIGHT_CLASS(W, DEFAULT, OTHER) \ 109 test_weight_class<W>(#W, W DEFAULT, W OTHER) 123 (1, 0.5, 1, 0.5, 0.5));
125 (1, 0, 1, 0.5, 0.5, 1.0),
126 (1, 0, 1, 0.5, 0.5, 2.0));
146 auto term =
"robinson";
153 const string& params) {
154 tout << name <<
'(' << params <<
")\n";
159 if (name ==
"Xapian::BoolWeight") {
163 }
else if (name ==
"Xapian::CoordWeight") {
167 }
else if (!params.empty()) {
176 TEST_EQUAL(mset_scaled.size(), expected_matches);
179 mset[i].get_weight() * 15.0);
184 #define TEST_WEIGHTING_SCHEME(W, ...) \ 185 helper(W(__VA_ARGS__), #W, "" #__VA_ARGS__) 209 #undef TEST_WEIGHTING_SCHEME 235 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
237 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
261 TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
262 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
263 TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
277 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
279 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
431 for (
int i = 0; i < 5; ++i) {
516 mset[3].get_weight() + 0.0086861771701328694);
543 mset[2].get_weight() + 0.016760925252262027);
577 TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
681 for (
int i = 0; i < 6; ++i) {
691 for (
int i = 0; i < 6; ++i) {
701 TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
702 TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
720 : factor(-1.0), zero_inits(z), non_zero_inits(n) { }
722 void init(
double factor_)
override {
756 unsigned zero_inits = 0, non_zero_inits = 0;
786 const string & term1_,
787 const string & term2_,
790 : factor(-1.0), db(db_), term1(term1_), term2(term2_),
791 sum(sum_), sum_squares(sum_squares_),
794 need_stat(COLLECTION_SIZE);
795 need_stat(RSET_SIZE);
796 need_stat(AVERAGE_LENGTH);
798 need_stat(RELTERMFREQ);
799 need_stat(QUERY_LENGTH);
802 need_stat(DOC_LENGTH);
803 need_stat(DOC_LENGTH_MIN);
804 need_stat(DOC_LENGTH_MAX);
806 need_stat(COLLECTION_FREQ);
807 need_stat(UNIQUE_TERMS);
808 need_stat(TOTAL_LENGTH);
812 const string & term_,
817 void init(
double factor_)
override {
845 double total_term_occurences = get_average_length() * num_docs;
847 if (term2.empty() || term2 ==
"=" || term2 ==
"_") {
863 tout <<
"->" << *t <<
" " << tf <<
'\n';
865 tfmax = max(tfmax, tf);
868 cfmax = max(cfmax, cf);
876 tfmax = max(tf1, tf2);
880 cfmax = max(cf1, cf2);
884 TEST_REL(get_termfreq(), >=, tfmax);
885 TEST_REL(get_collection_freq(), >=, cfmax);
887 TEST_REL(get_termfreq(), <=, tfsum);
888 TEST_REL(get_collection_freq(), <=, cfsum);
890 TEST_REL(get_termfreq(), <=, num_docs);
891 TEST_REL(get_collection_freq(), <=, totlen);
902 sum_squares += wdf * wdf;
908 if (len_upper == 0) {
909 len_lower = get_doclength_lower_bound();
910 len_upper = get_doclength_upper_bound();
911 wdf_upper = get_wdf_upper_bound();
930 const string & term = *a;
950 expected_sum_squares += wdf * wdf;
953 TEST_EQUAL(sum_squares, expected_sum_squares);
964 const string & term1 = *a;
966 const string & term2 = *a;
995 }
else if (did1 < did2) {
1002 expected_sum += wdf;
1003 expected_sum_squares += wdf * wdf;
1025 TEST_EQUAL(sum_squares, expected_sum_squares);
1047 static const char *
const testcases[] = {
1053 for (
auto pattern : testcases) {
1072 vector<Xapian::PostingIterator> postlists;
1077 make_heap(postlists.begin(), postlists.end(), PlCmp());
1080 while (!postlists.empty()) {
1081 pop_heap(postlists.begin(), postlists.end(), PlCmp());
1085 postlists.pop_back();
1087 push_heap(postlists.begin(), postlists.end(), PlCmp());
1089 if (did_new != did) {
1090 expected_sum += wdf;
1091 expected_sum_squares += wdf * wdf;
1097 expected_sum += wdf;
1098 expected_sum_squares += wdf * wdf;
1102 TEST_REL(sum_squares, >=, expected_sum_squares);
1114 const string & term = *a;
1134 expected_sum += wdf;
1135 expected_sum_squares += wdf * wdf;
1138 TEST_EQUAL(sum_squares, expected_sum_squares);
1224 TEST_REL(mset2[0].get_weight(),>,mset1[0].get_weight());
1225 TEST_REL(mset2[1].get_weight(),>,mset1[1].get_weight());
1226 TEST_REL(mset2[2].get_weight(),>,mset1[2].get_weight());
1227 TEST_REL(mset2[3].get_weight(),>,mset1[3].get_weight());
1228 TEST_REL(mset2[4].get_weight(),>,mset1[4].get_weight());
1260 static const char *
const terms[] = {
1261 "this",
"line",
"paragraph",
"rubbish" 1264 terms, terms +
sizeof(terms) /
sizeof(terms[0]));
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
The Xapian namespace contains public interfaces for the Xapian library.
Xapian::doccount size() const
Return number of items in this MSet object.
Weight * clone() const override
Clone this object.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
#define TEST(a)
Test a condition, without an additional explanation for failure.
This class is used to access a database, or a group of databases.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
double weight
The weight of a document or term.
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
Weight * clone() const override
Clone this object.
Class representing a list of search results.
This class implements the InL2 weighting scheme.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
CheckInitWeight(unsigned &z, unsigned &n)
virtual std::string serialise() const
Return this object's parameters serialised as a single string.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
test functionality of the Xapian API
Xapian::doclength get_avlength() const
Get the average length of the documents in the database.
This class implements the BB2 weighting scheme.
Class for iterating over a list of terms.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Class for iterating over a list of terms.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Xapian::Weight subclass implementing Coordinate Matching.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Xapian::termcount & sum_squares
Xapian::termcount wdf_upper
Class implementing a "boolean" weighting scheme.
This class provides read/write access to a database.
Indicates an error in the std::string serialisation of an object.
std::ostringstream tout
The debug printing stream.
Iterator over a Xapian::MSet.
Scale the weight contributed by a subquery.
Public interfaces for the Xapian library.
CheckStatsWeight(const Xapian::Database &db_, const string &term1_, const string &term2_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
#define TEST_WEIGHT_CLASS_NO_PARAMS(W)
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::Weight subclass implementing the traditional probabilistic formula.
#define CONST_STRLEN(S)
Returns the length of a string constant.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the PL2 weighting scheme.
This class implements the IneB2 weighting scheme.
static void test_weight_class(const char *name, const W &obj_default, const W &obj_other)
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
static void test_weight_class_no_params(const char *name)
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Match like OP_OR but weighting as if a single term.
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
double get_weight() const
Get the weight for the current position.
This class implements the IfB2 weighting scheme.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Match only documents which all subqueries match.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
CheckStatsWeight(const Xapian::Database &db_, const string &term_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Xapian::Database get_database(const string &dbname)
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const override
Calculate the weight contribution for this object's term to a document.
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
std::string get_description() const
Return a string describing this object.
This class provides an interface to the information retrieval system for the purpose of searching...
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Xapian::termcount len_upper
This class implements the DPH weighting scheme.
Match documents which at least one subquery matches.
Xapian-specific test helper functions and macros.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
#define TEST_WEIGHT_CLASS(W, DEFAULT, OTHER)
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Class representing a query.
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
#define TEST_EQUAL(a, b)
Test for equality of two things.
static void gen_wdf_eq_doclen_db(Xapian::WritableDatabase &db, const string &)
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Xapian::Weight subclass implementing the Language Model formula.
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
A handle representing a document in a Xapian database.
DEFINE_TESTCASE(weightserialisation1, !backend)
Test serialisation and introspection of built-in weighting schemes.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
#define TEST_WEIGHTING_SCHEME(W,...)
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqueterms) const override
Calculate the weight contribution for this object's term to a document.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Xapian::Weight subclass implementing the tf-idf weighting scheme.
Xapian::termcount len_lower
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Abstract base class for weighting schemes.
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
static const testcase testcases[]