45 string obj_serialised = obj.serialise();
48 unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
53 unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised +
"X"));
54 FAIL_TEST(
name <<
" did not throw for unserialise with junk appended");
63 #define TEST_WEIGHT_CLASS_NO_PARAMS(W) test_weight_class_no_params<W>(#W)
77 string obj_serialised = obj.serialise();
78 TEST_EQUAL(obj_serialised, obj_default.serialise());
80 string obj_other_serialised = obj_other.serialise();
83 unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
86 unique_ptr<Xapian::Weight> wt2(W().unserialise(obj_other_serialised));
87 TEST_EQUAL(obj_other_serialised, wt2->serialise());
91 unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised +
"X"));
92 FAIL_TEST(
name <<
" did not throw for unserialise with junk appended");
108 #define TEST_WEIGHT_CLASS(W, DEFAULT, OTHER) \
109 test_weight_class<W>(#W, W DEFAULT, W OTHER)
123 (1, 0.5, 1, 0.5, 0.5));
125 (1, 0, 1, 0.5, 0.5, 1.0),
126 (1, 0, 1, 0.5, 0.5, 2.0));
146 auto term =
"robinson";
153 const string& params) {
154 tout <<
name <<
'(' << params <<
")\n";
159 if (
name ==
"Xapian::BoolWeight") {
163 }
else if (
name ==
"Xapian::CoordWeight") {
167 }
else if (!params.empty()) {
179 mset[i].get_weight() * 15.0);
184 #define TEST_WEIGHTING_SCHEME(W, ...) \
185 helper(W(__VA_ARGS__), #W, "" #__VA_ARGS__)
209 #undef TEST_WEIGHTING_SCHEME
235 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
237 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
261 TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
262 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
263 TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
277 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
279 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
431 for (
int i = 0; i < 5; ++i) {
516 mset[3].get_weight() + 0.0086861771701328694);
543 mset[2].get_weight() + 0.016760925252262027);
577 TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
681 for (
int i = 0; i < 6; ++i) {
691 for (
int i = 0; i < 6; ++i) {
701 TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
702 TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
720 : factor(-1.0), zero_inits(z), non_zero_inits(n) { }
722 void init(
double factor_)
override {
756 unsigned zero_inits = 0, non_zero_inits = 0;
786 const string & term1_,
787 const string & term2_,
790 : factor(-1.0), db(db_), term1(term1_), term2(term2_),
791 sum(sum_), sum_squares(sum_squares_),
794 need_stat(COLLECTION_SIZE);
795 need_stat(RSET_SIZE);
796 need_stat(AVERAGE_LENGTH);
798 need_stat(RELTERMFREQ);
799 need_stat(QUERY_LENGTH);
802 need_stat(DOC_LENGTH);
803 need_stat(DOC_LENGTH_MIN);
804 need_stat(DOC_LENGTH_MAX);
806 need_stat(COLLECTION_FREQ);
807 need_stat(UNIQUE_TERMS);
808 need_stat(TOTAL_LENGTH);
812 const string & term_,
817 void init(
double factor_)
override {
845 double total_term_occurences = get_average_length() * num_docs;
847 if (term2.empty() || term2 ==
"=" || term2 ==
"_") {
863 tout <<
"->" << *t <<
" " << tf <<
'\n';
865 tfmax = max(tfmax, tf);
868 cfmax = max(cfmax, cf);
876 tfmax = max(tf1, tf2);
880 cfmax = max(cf1, cf2);
884 TEST_REL(get_termfreq(), >=, tfmax);
885 TEST_REL(get_collection_freq(), >=, cfmax);
887 TEST_REL(get_termfreq(), <=, tfsum);
888 TEST_REL(get_collection_freq(), <=, cfsum);
890 TEST_REL(get_termfreq(), <=, num_docs);
891 TEST_REL(get_collection_freq(), <=, totlen);
902 sum_squares += wdf * wdf;
908 if (len_upper == 0) {
909 len_lower = get_doclength_lower_bound();
910 len_upper = get_doclength_upper_bound();
911 wdf_upper = get_wdf_upper_bound();
930 const string & term = *a;
950 expected_sum_squares += wdf * wdf;
953 TEST_EQUAL(sum_squares, expected_sum_squares);
964 const string & term1 = *a;
966 const string & term2 = *a;
995 }
else if (did1 < did2) {
1002 expected_sum += wdf;
1003 expected_sum_squares += wdf * wdf;
1025 TEST_EQUAL(sum_squares, expected_sum_squares);
1047 static const char *
const testcases[] = {
1072 vector<Xapian::PostingIterator> postlists;
1077 make_heap(postlists.begin(), postlists.end(), PlCmp());
1080 while (!postlists.empty()) {
1081 pop_heap(postlists.begin(), postlists.end(), PlCmp());
1085 postlists.pop_back();
1087 push_heap(postlists.begin(), postlists.end(), PlCmp());
1089 if (did_new != did) {
1090 expected_sum += wdf;
1091 expected_sum_squares += wdf * wdf;
1097 expected_sum += wdf;
1098 expected_sum_squares += wdf * wdf;
1102 TEST_REL(sum_squares, >=, expected_sum_squares);
1114 const string & term = *a;
1134 expected_sum += wdf;
1135 expected_sum_squares += wdf * wdf;
1138 TEST_EQUAL(sum_squares, expected_sum_squares);
1224 TEST_REL(mset2[0].get_weight(),>,mset1[0].get_weight());
1225 TEST_REL(mset2[1].get_weight(),>,mset1[1].get_weight());
1226 TEST_REL(mset2[2].get_weight(),>,mset1[2].get_weight());
1227 TEST_REL(mset2[3].get_weight(),>,mset1[3].get_weight());
1228 TEST_REL(mset2[4].get_weight(),>,mset1[4].get_weight());
1260 static const char *
const terms[] = {
1261 "this",
"line",
"paragraph",
"rubbish"
1264 terms, terms +
sizeof(terms) /
sizeof(terms[0]));
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
static const testcase testcases[]
static void test_weight_class(const char *name, const W &obj_default, const W &obj_other)
#define TEST_WEIGHT_CLASS_NO_PARAMS(W)
#define TEST_WEIGHTING_SCHEME(W,...)
DEFINE_TESTCASE(weightserialisation1, !backend)
Test serialisation and introspection of built-in weighting schemes.
static void gen_wdf_eq_doclen_db(Xapian::WritableDatabase &db, const string &)
static void test_weight_class_no_params(const char *name)
#define TEST_WEIGHT_CLASS(W, DEFAULT, OTHER)
Xapian::Database get_database(const string &dbname)
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
test functionality of the Xapian API
Weight * clone() const override
Clone this object.
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const override
Calculate the weight contribution for this object's term to a document.
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
CheckInitWeight(unsigned &z, unsigned &n)
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
unsigned & non_zero_inits
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
CheckStatsWeight(const Xapian::Database &db_, const string &term_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Xapian::termcount wdf_upper
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
Weight * clone() const override
Clone this object.
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqueterms) const override
Calculate the weight contribution for this object's term to a document.
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
CheckStatsWeight(const Xapian::Database &db_, const string &term1_, const string &term2_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Xapian::termcount len_upper
Xapian::termcount & sum_squares
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
Xapian::termcount len_lower
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
This class implements the BB2 weighting scheme.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Class implementing a "boolean" weighting scheme.
Xapian::Weight subclass implementing Coordinate Matching.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the DPH weighting scheme.
This class is used to access a database, or a group of databases.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Xapian::doclength get_avlength() const
Get the average length of the documents in the database.
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
Xapian::doccount get_doccount() const
Get the number of documents in the database.
A handle representing a document in a Xapian database.
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
This class provides an interface to the information retrieval system for the purpose of searching.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
This class implements the IfB2 weighting scheme.
This class implements the InL2 weighting scheme.
This class implements the IneB2 weighting scheme.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Xapian::Weight subclass implementing the Language Model formula.
Iterator over a Xapian::MSet.
double get_weight() const
Get the weight for the current position.
Class representing a list of search results.
Xapian::doccount size() const
Return number of items in this MSet object.
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
This class implements the PL2 weighting scheme.
Class for iterating over a list of terms.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Class representing a query.
std::string get_description() const
Return a string describing this object.
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
@ OP_WILDCARD
Wildcard expansion.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Indicates an error in the std::string serialisation of an object.
Class for iterating over a list of terms.
Xapian::Weight subclass implementing the tf-idf weighting scheme.
Xapian::Weight subclass implementing the traditional probabilistic formula.
Abstract base class for weighting schemes.
@ ABSOLUTE_DISCOUNT_SMOOTHING
@ DIRICHLET_PLUS_SMOOTHING
@ JELINEK_MERCER_SMOOTHING
This class provides read/write access to a database.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
double weight
The weight of a document or term.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
#define CONST_STRLEN(S)
Returns the length of a string constant.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
std::ostringstream tout
The debug printing stream.
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
#define TEST_EQUAL(a, b)
Test for equality of two things.
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
#define TEST(a)
Test a condition, without an additional explanation for failure.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Xapian-specific test helper functions and macros.
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Public interfaces for the Xapian library.