sourcedoc/html/api__weight_8cc_source.html

 /* Copyright (C) 2004-2024 Olly Betts
  * Copyright (C) 2013 Aarsh Shah
  * Copyright (C) 2016 Vivek Pal
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  */

 #include <config.h>

 #include "api_weight.h"
 #include <cmath>
 #include <memory>

 #include <xapian.h>

 #include "apitest.h"
 #include "testutils.h"

 using namespace std;

 template<class W>
 static inline void
 test_weight_class_no_params(const char* name)
 {
     tout << name << '\n';
     W obj;
     // Check name() returns the class name.
     TEST_EQUAL(obj.name(), name);
     // If there are no parameters, there's nothing to serialise.
     string obj_serialised = obj.serialise();
     TEST_EQUAL(obj_serialised.size(), 0);
     // Check serialising and unserialising gives object with same serialisation.
     unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
     TEST_EQUAL(obj_serialised, wt->serialise());
     // Check that unserialise() throws suitable error for bad serialisation.
     // The easy case to test is extra junk after the serialised weight.
     try {
         unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised + "X"));
         FAIL_TEST(name << " did not throw for unserialise with junk appended");
     } catch (const Xapian::SerialisationError& e) {
         // Check the exception message contains the weighting scheme name
         // (regression test for TradWeight's exception saying "BM25").
         string target = name + CONST_STRLEN("Xapian::");
         TEST(e.get_msg().find(target) != string::npos);
     }
 }

 #define TEST_WEIGHT_CLASS_NO_PARAMS(W) test_weight_class_no_params<W>(#W)

 template<class W>
 static inline void
 test_weight_class(const char* name, const W& obj_default, const W& obj_other)
 {
     tout << name << '\n';
     W obj;
     // Check name() returns the class name.
     TEST_EQUAL(obj.name(), name);
     TEST_EQUAL(obj_default.name(), name);
     TEST_EQUAL(obj_other.name(), name);
     // Check serialisation matches that of object constructed with explicit
     // parameter values of what the defaults are meant to be.
     string obj_serialised = obj.serialise();
     TEST_EQUAL(obj_serialised, obj_default.serialise());
     // Check serialisation is different to object with different parameters.
     string obj_other_serialised = obj_other.serialise();
     TEST_NOT_EQUAL(obj_serialised, obj_other_serialised);
     // Check serialising and unserialising gives object with same serialisation.
     unique_ptr<Xapian::Weight> wt(W().unserialise(obj_serialised));
     TEST_EQUAL(obj_serialised, wt->serialise());
     // Check serialising and unserialising of object with different parameters.
     unique_ptr<Xapian::Weight> wt2(W().unserialise(obj_other_serialised));
     TEST_EQUAL(obj_other_serialised, wt2->serialise());
     // Check that unserialise() throws suitable error for bad serialisation.
     // The easy case to test is extra junk after the serialised weight.
     try {
         unique_ptr<Xapian::Weight> bad(W().unserialise(obj_serialised + "X"));
         FAIL_TEST(name << " did not throw for unserialise with junk appended");
     } catch (const Xapian::SerialisationError& e) {
         // Check the exception message contains the weighting scheme name
         // (regression test for TradWeight's exception saying "BM25").
         string target = name + CONST_STRLEN("Xapian::");
         TEST(e.get_msg().find(target) != string::npos);
     }
 }

 // W Should be the class name.
 //
 // DEFAULT should be a parenthesised parameter list to explicitly construct
 // an object of class W with the documented default parameters.
 //
 // OTHER should be a parenthesised parameter list to construct an object with
 // non-default parameters.
 #define TEST_WEIGHT_CLASS(W, DEFAULT, OTHER) \
     test_weight_class<W>(#W, W DEFAULT, W OTHER)

 DEFINE_TESTCASE(weightserialisation1, !backend) {
     // Parameter-free weighting schemes.
     TEST_WEIGHT_CLASS_NO_PARAMS(Xapian::BoolWeight);
     TEST_WEIGHT_CLASS_NO_PARAMS(Xapian::CoordWeight);
     TEST_WEIGHT_CLASS_NO_PARAMS(Xapian::DLHWeight);
     TEST_WEIGHT_CLASS_NO_PARAMS(Xapian::DPHWeight);

     // Parameterised weighting schemes.
     TEST_WEIGHT_CLASS(Xapian::TradWeight, (1.0), (2.0));
     TEST_WEIGHT_CLASS(Xapian::BM25Weight,
                       (1, 0, 1, 0.5, 0.5),
                       (1, 0.5, 1, 0.5, 0.5));
     TEST_WEIGHT_CLASS(Xapian::BM25PlusWeight,
                       (1, 0, 1, 0.5, 0.5, 1.0),
                       (1, 0, 1, 0.5, 0.5, 2.0));
     TEST_WEIGHT_CLASS(Xapian::TfIdfWeight, ("ntn"), ("bpn"));
     TEST_WEIGHT_CLASS(Xapian::InL2Weight, (1.0), (2.0));
     TEST_WEIGHT_CLASS(Xapian::IfB2Weight, (1.0), (2.0));
     TEST_WEIGHT_CLASS(Xapian::IneB2Weight, (1.0), (2.0));
     TEST_WEIGHT_CLASS(Xapian::BB2Weight, (1.0), (2.0));
     TEST_WEIGHT_CLASS(Xapian::PL2Weight, (1.0), (2.0));
     TEST_WEIGHT_CLASS(Xapian::PL2PlusWeight,
                       (1.0, 0.8),
                       (2.0, 0.9));
     TEST_WEIGHT_CLASS(Xapian::LMWeight,
                       (0.0, Xapian::Weight::TWO_STAGE_SMOOTHING, 0.7, 2000.0),
                       (0.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0.7));
 }

 DEFINE_TESTCASE(weight1, backend) {
     Xapian::Database db(get_database("etext"));
     Xapian::Enquire enquire(db);
     Xapian::Enquire enquire_scaled(db);
     auto term = "robinson";
     Xapian::Query q{term};
     enquire.set_query(q);
     enquire_scaled.set_query(q * 15.0);
     auto expected_matches = db.get_termfreq(term);
     auto helper = [&](const Xapian::Weight& weight,
                       const string& name,
                       const string& params) {
         tout << name << '(' << params << ")\n";
         enquire.set_weighting_scheme(weight);
         enquire_scaled.set_weighting_scheme(weight);
         Xapian::MSet mset = enquire.get_mset(0, expected_matches + 1);
         TEST_EQUAL(mset.size(), expected_matches);
         if (name == "Xapian::BoolWeight") {
             /* All weights should be zero. */
             TEST_EQUAL(mset[0].get_weight(), 0.0);
             TEST_EQUAL(mset.back().get_weight(), 0.0);
         } else if (name == "Xapian::CoordWeight") {
             /* All weights should be 1 for a single term query. */
             TEST_EQUAL(mset[0].get_weight(), 1.0);
             TEST_EQUAL(mset.back().get_weight(), 1.0);
         } else if (!params.empty()) {
             /* All weights should be equal with these particular parameters. */
             TEST_NOT_EQUAL(mset[0].get_weight(), 0.0);
             TEST_EQUAL(mset[0].get_weight(), mset.back().get_weight());
         } else {
             TEST_NOT_EQUAL(mset[0].get_weight(), 0.0);
             TEST_NOT_EQUAL(mset[0].get_weight(), mset.back().get_weight());
         }
         Xapian::MSet mset_scaled = enquire_scaled.get_mset(0, expected_matches);
         TEST_EQUAL(mset_scaled.size(), expected_matches);
         for (Xapian::doccount i = 0; i < expected_matches; ++i) {
             TEST_EQUAL_DOUBLE(mset_scaled[i].get_weight(),
                               mset[i].get_weight() * 15.0);
         }
     };

     // MSVC gives nothing for #__VA_ARGS__ when there are no varargs.
 #define TEST_WEIGHTING_SCHEME(W, ...) \
         helper(W(__VA_ARGS__), #W, "" #__VA_ARGS__)

     TEST_WEIGHTING_SCHEME(Xapian::BoolWeight);
     TEST_WEIGHTING_SCHEME(Xapian::CoordWeight);
     TEST_WEIGHTING_SCHEME(Xapian::DLHWeight);
     TEST_WEIGHTING_SCHEME(Xapian::DPHWeight);
     TEST_WEIGHTING_SCHEME(Xapian::TradWeight);
     TEST_WEIGHTING_SCHEME(Xapian::BM25Weight);
     TEST_WEIGHTING_SCHEME(Xapian::BM25PlusWeight);
     TEST_WEIGHTING_SCHEME(Xapian::TfIdfWeight);
     TEST_WEIGHTING_SCHEME(Xapian::InL2Weight);
     TEST_WEIGHTING_SCHEME(Xapian::IfB2Weight);
     TEST_WEIGHTING_SCHEME(Xapian::IneB2Weight);
     TEST_WEIGHTING_SCHEME(Xapian::BB2Weight);
     TEST_WEIGHTING_SCHEME(Xapian::PL2Weight);
     TEST_WEIGHTING_SCHEME(Xapian::PL2PlusWeight);
     TEST_WEIGHTING_SCHEME(Xapian::LMWeight);
     // Regression test for bug fixed in 1.2.4.
     TEST_WEIGHTING_SCHEME(Xapian::BM25Weight, 0, 0, 0, 0, 1);
     /* As mentioned in the documentation, when parameter k is 0, wdf and
      * document length don't affect the weights.  Regression test for bug fixed
      * in 1.2.4.
      */
     TEST_WEIGHTING_SCHEME(Xapian::TradWeight, 0);
 #undef TEST_WEIGHTING_SCHEME
 }

 DEFINE_TESTCASE(bm25weight1, backend) {
     Xapian::Enquire enquire(get_database("apitest_simpledata"));
     enquire.set_weighting_scheme(Xapian::BM25Weight(1, 25, 1, 0.01, 0.5));
     enquire.set_query(Xapian::Query("word"));

     Xapian::MSet mset = enquire.get_mset(0, 25);
 }

 // Test parameter combinations which should be unaffected by doclength.
 DEFINE_TESTCASE(bm25weight4, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     enquire.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     // Expect: wdf has an effect on weight, but doclen doesn't.
     TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
     TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
     TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());

     enquire.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     // Expect: neither wdf nor doclen affects weight.
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
 }

 // Regression test for bug fixed in 1.2.17 and 1.3.2.
 DEFINE_TESTCASE(bm25weight5, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     enquire.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     // Expect: wdf has no effect on weight; shorter docs rank higher.
     mset_expect_order(mset, 3, 5, 1, 4, 2);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[1].get_weight());
     TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
     TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
 }

 // Test parameter combinations which should be unaffected by doclength.
 DEFINE_TESTCASE(bm25plusweight2, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     enquire.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0, 0.5, 1));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     // Expect: wdf has an effect on weight, but doclen doesn't.
     TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
     TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
     TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());

     enquire.set_weighting_scheme(Xapian::BM25PlusWeight(0, 0, 1, 1, 0.5, 1));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     // Expect: neither wdf nor doclen affects weight.
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
 }

 // Regression test for a mistake corrected in the BM25+ implementation.
 DEFINE_TESTCASE(bm25plusweight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     enquire.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0.5, 0.5, 1));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);

     // The value of each doc weight calculated manually from the BM25+ formulae
     // by using the respective document statistics.
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 0.7920796567487473);
     TEST_EQUAL_DOUBLE(mset[1].get_weight(), 0.7846980783848447);
     TEST_EQUAL_DOUBLE(mset[2].get_weight(), 0.7558817623365934);
     TEST_EQUAL_DOUBLE(mset[3].get_weight(), 0.7210119356168847);
     TEST_EQUAL_DOUBLE(mset[4].get_weight(), 0.7210119356168847);
 }


 // Test for invalid values of c.
 DEFINE_TESTCASE(inl2weight2, !backend) {
     // InvalidArgumentError should be thrown if the parameter c is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::InL2Weight wt(-2.0));

     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::InL2Weight wt2(0.0));
 }

 // Feature tests for Inl2Weight
 DEFINE_TESTCASE(inl2weight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("banana");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 1);
     mset_expect_order(mset1, 6);

     /* The value has been calculated in the python interpreter by looking at the
      * database statistics. */
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.559711143842063);
 }

 // Test for invalid values of c.
 DEFINE_TESTCASE(ifb2weight2, !backend) {
     // InvalidArgumentError should be thrown if the parameter c is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::IfB2Weight wt(-2.0));

     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::IfB2Weight wt2(0.0));
 }

 // Feature test
 DEFINE_TESTCASE(ifb2weight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("banana");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 1);

     /* The value of the weight has been manually calculated using the statistics
      * of the test database. */
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.119422287684126);
 }

 // Test for invalid values of c.
 DEFINE_TESTCASE(ineb2weight2, !backend) {
     // InvalidArgumentError should be thrown if parameter c is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::IneB2Weight wt(-2.0));

     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::IneB2Weight wt2(0.0));
 }

 // Feature test.
 DEFINE_TESTCASE(ineb2weight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("paragraph");
     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 5);

     // The third document in the database is 4th in the ranking.
     /* The weight value has been manually calculated by using the statistics
      * of the test database. */
     TEST_EQUAL_DOUBLE(mset1[4].get_weight(), 0.61709730297692400036);
 }

 // Test for invalid values of c.
 DEFINE_TESTCASE(bb2weight2, !backend) {
     // InvalidArgumentError should be thrown if the parameter c is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::BB2Weight wt(-2.0));

     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::BB2Weight wt2(0.0));
 }

 // Feature test
 DEFINE_TESTCASE(bb2weight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("paragraph");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 5);
     /* The third document in the database has the highest weight and is the
      * first in the mset. */
     // Value calculated manually by using the statistics of the test database.
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.6823696969784483);

     // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
     // were applying the factor to the upper bound twice).
     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 1.0 / 1024));
     enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));

     Xapian::MSet mset3;
     mset3 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset3.size(), 5);

     for (int i = 0; i < 5; ++i) {
         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight() * 1024);
     }
 }

 // Regression test: we used to calculate log2(0) when there was only one doc.
 DEFINE_TESTCASE(bb2weight4, backend) {
     Xapian::Database db = get_database("apitest_onedoc");
     Xapian::Enquire enquire(db);
     Xapian::Query query("word");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::BB2Weight());

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 1);
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.431020621347435);
 }

 // Feature test.
 DEFINE_TESTCASE(dlhweight1, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("a");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::DLHWeight());

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 3);
     mset_expect_order(mset1, 3, 1, 2);
     // Weights calculated manually using stats from the database.
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.0046477754371292362);
     TEST_EQUAL_DOUBLE(mset1[1].get_weight(), 0.97621929514640352757);
     // The following weight would be negative but gets clamped to 0.
     TEST_EQUAL_DOUBLE(mset1[2].get_weight(), 0.0);
 }

 static void
 gen_wdf_eq_doclen_db(Xapian::WritableDatabase& db, const string&)
 {
     Xapian::Document doc;
     doc.add_term("solo", 37);
     db.add_document(doc);
 }

 // Test wdf == doclen.
 DEFINE_TESTCASE(dlhweight3, backend) {
     Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
     Xapian::Enquire enquire(db);
     Xapian::Query query("solo");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::DLHWeight());

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 1);
     // Weight gets clamped to zero.
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 }

 // Test for invalid values of c.
 DEFINE_TESTCASE(pl2weight2, !backend) {
     // InvalidArgumentError should be thrown if parameter c is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::PL2Weight wt(-2.0));
 }

 // Feature Test.
 DEFINE_TESTCASE(pl2weight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("paragraph");
     enquire.set_query(query);
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::PL2Weight(2.0));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     // Expected weight difference calculated in extended precision using stats
     // from the test database.
     TEST_EQUAL_DOUBLE(mset[2].get_weight(),
                       mset[3].get_weight() + 0.0086861771701328694);
 }

 // Test for invalid values of parameters, c and delta.
 DEFINE_TESTCASE(pl2plusweight2, !backend) {
     // InvalidArgumentError should be thrown if parameter c is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::PL2PlusWeight wt(-2.0, 0.9));

     // InvalidArgumentError should be thrown if parameter delta is invalid.
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::PL2PlusWeight wt(1.0, -1.9));
 }

 // Feature Test 1 for PL2PlusWeight.
 DEFINE_TESTCASE(pl2plusweight4, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     enquire.set_query(Xapian::Query("to"));
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::PL2PlusWeight(2.0, 0.8));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 3);
     // Expected weight difference calculated in Python using stats from the
     // test database.
     TEST_EQUAL_DOUBLE(mset[1].get_weight(),
                       mset[2].get_weight() + 0.016760925252262027);
 }

 // Feature Test 2 for PL2PlusWeight
 DEFINE_TESTCASE(pl2plusweight5, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("word");
     enquire.set_query(query);
     Xapian::MSet mset;

     enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
     mset = enquire.get_mset(0, 10);
     // Expect MSet contains two documents having query "word".
     TEST_EQUAL(mset.size(), 2);
     // Expect Document 2 has higher weight than document 4 because
     // "word" appears more no. of times in document 2 than document 4.
     mset_expect_order(mset, 2, 4);
 }

 // Feature test
 DEFINE_TESTCASE(dphweight1, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("paragraph");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::DPHWeight());

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 5);
     /* The weight has been calculated manually by using the statistics of the
      * test database. */
     TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
 }

 // Test wdf == doclen.
 DEFINE_TESTCASE(dphweight3, backend) {
     Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
     Xapian::Enquire enquire(db);
     Xapian::Query query("solo");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::DPHWeight());

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 1);
     // Weight gets clamped to zero.
     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 }

 // Test for various cases of normalization string.
 DEFINE_TESTCASE(tfidfweight1, !backend) {
     // InvalidArgumentError should be thrown if normalization string is invalid
     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::TfIdfWeight b("JOHN_LENNON"));

     TEST_EXCEPTION(Xapian::InvalidArgumentError,
         Xapian::TfIdfWeight b("LOL"));
 }

 // Feature tests for various normalization functions.
 DEFINE_TESTCASE(tfidfweight3, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("word");
     Xapian::MSet mset;

     // Check for "ntn" when termfreq != N
     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 2);
     // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
     mset_expect_order(mset, 2, 4);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * log(6.0 / 2));

     // Check that wqf is taken into account.
     enquire.set_query(Xapian::Query("word", 2));
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
     Xapian::MSet mset2 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset2.size(), 2);
     // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
     mset_expect_order(mset2, 2, 4);
     // wqf is 2, so weights should be doubled.
     TEST_EQUAL_DOUBLE(mset[0].get_weight() * 2, mset2[0].get_weight());
     TEST_EQUAL_DOUBLE(mset[1].get_weight() * 2, mset2[1].get_weight());

     // check for "nfn" when termfreq != N
     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("nfn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 2);
     mset_expect_order(mset, 2, 4);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 / 2);

     // check for "nsn" when termfreq != N
     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("nsn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 2);
     mset_expect_order(mset, 2, 4);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * pow(log(6.0 / 2), 2.0));

     // Check for "bnn" and for both branches of 'b'.
     enquire.set_query(Xapian::Query("test"));
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 1);
     mset_expect_order(mset, 1);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1.0);

     // Check for "lnn" and for both branches of 'l'.
     enquire.set_query(Xapian::Query("word"));
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 2);
     mset_expect_order(mset, 2, 4);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
     TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1.0);         // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1

     // Check for "snn"
     enquire.set_query(Xapian::Query("paragraph"));
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 5);
     mset_expect_order(mset, 2, 1, 4, 3, 5);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 9.0);
     TEST_EQUAL_DOUBLE(mset[4].get_weight(), 1.0);

     // Check for "ntn" when termfreq=N
     enquire.set_query(Xapian::Query("this"));  // N=termfreq and so idfn=0 for "t"
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 6);
     mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
     for (int i = 0; i < 6; ++i) {
         TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
     }

     // Check for "npn" and for both branches of 'p'
     enquire.set_query(Xapian::Query("this"));  // N=termfreq and so idfn=0 for "p"
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 6);
     mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
     for (int i = 0; i < 6; ++i) {
         TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
     }

     // Check for "Lnn".
     enquire.set_query(Xapian::Query("word"));
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("Lnn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 2);
     mset_expect_order(mset, 2, 4);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
     TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));

     enquire.set_query(Xapian::Query("word"));
     enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
     mset = enquire.get_mset(0, 10);
     TEST_EQUAL(mset.size(), 2);
     mset_expect_order(mset, 2, 4);
     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8 * log((6.0 - 2) / 2));
     TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1 * log((6.0 - 2) / 2));
 }

 class CheckInitWeight : public Xapian::Weight {
   public:
     double factor;

     unsigned & zero_inits, & non_zero_inits;

     CheckInitWeight(unsigned &z, unsigned &n)
         : factor(-1.0), zero_inits(z), non_zero_inits(n) { }

     void init(double factor_) override {
         factor = factor_;
         if (factor == 0.0)
             ++zero_inits;
         else
             ++non_zero_inits;
     }

     Weight* clone() const override {
         return new CheckInitWeight(zero_inits, non_zero_inits);
     }

     double get_sumpart(Xapian::termcount, Xapian::termcount,
                        Xapian::termcount) const override {
         return 1.0;
     }

     double get_maxpart() const override { return 1.0; }

     double get_sumextra(Xapian::termcount doclen,
                         Xapian::termcount) const override {
         return 1.0 / doclen;
     }

     double get_maxextra() const override { return 1.0; }
 };

 DEFINE_TESTCASE(checkinitweight1, backend && !multi && !remote) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query q(Xapian::Query::OP_AND,
                     Xapian::Query("this"), Xapian::Query("paragraph"));
     enquire.set_query(q);
     unsigned zero_inits = 0, non_zero_inits = 0;
     CheckInitWeight wt(zero_inits, non_zero_inits);
     enquire.set_weighting_scheme(wt);
     Xapian::MSet mset = enquire.get_mset(0, 3);
     TEST_EQUAL(zero_inits, 1);
     TEST_EQUAL(non_zero_inits, 2);
 }

 class CheckStatsWeight : public Xapian::Weight {
   public:
     double factor;

     Xapian::Database db;

     string term1;

     // When testing OP_SYNONYM, term2 is also set.
     // When testing OP_WILDCARD, term2 == "*".
     // When testing a repeated term, term2 == "=" for the first occurrence and
     // "_" for subsequent occurrences.
     mutable string term2;

     Xapian::termcount & sum;
     Xapian::termcount & sum_squares;

     mutable Xapian::termcount len_upper;
     mutable Xapian::termcount len_lower;
     mutable Xapian::termcount wdf_upper;

     CheckStatsWeight(const Xapian::Database & db_,
                      const string & term1_,
                      const string & term2_,
                      Xapian::termcount & sum_,
                      Xapian::termcount & sum_squares_)
         : factor(-1.0), db(db_), term1(term1_), term2(term2_),
           sum(sum_), sum_squares(sum_squares_),
           len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
     {
         need_stat(COLLECTION_SIZE);
         need_stat(RSET_SIZE);
         need_stat(AVERAGE_LENGTH);
         need_stat(TERMFREQ);
         need_stat(RELTERMFREQ);
         need_stat(QUERY_LENGTH);
         need_stat(WQF);
         need_stat(WDF);
         need_stat(DOC_LENGTH);
         need_stat(DOC_LENGTH_MIN);
         need_stat(DOC_LENGTH_MAX);
         need_stat(WDF_MAX);
         need_stat(COLLECTION_FREQ);
         need_stat(UNIQUE_TERMS);
         need_stat(TOTAL_LENGTH);
     }

     CheckStatsWeight(const Xapian::Database & db_,
                      const string & term_,
                      Xapian::termcount & sum_,
                      Xapian::termcount & sum_squares_)
         : CheckStatsWeight(db_, term_, string(), sum_, sum_squares_) { }

     void init(double factor_) override {
         factor = factor_;
     }

     Weight* clone() const override {
         auto res = new CheckStatsWeight(db, term1, term2, sum, sum_squares);
         if (term2 == "=") {
             // The object passed to Enquire::set_weighting_scheme() is cloned
             // right away, and then cloned again for each term, and then
             // potentially once more for the term-independent weight
             // contribution.  In the repeated case, we want to handle the first
             // actual term specially, so we arrange for that to have "=" for
             // term2, and subsequent clones to have "_", so that we accumulate
             // sum and sum_squares on the first occurrence only.
             term2 = "_";
         }
         return res;
     }

     double get_sumpart(Xapian::termcount wdf,
                        Xapian::termcount doclen,
                        Xapian::termcount uniqueterms) const override {
         Xapian::doccount num_docs = db.get_doccount();
         TEST_EQUAL(get_collection_size(), num_docs);
         TEST_EQUAL(get_rset_size(), 0);
         TEST_EQUAL(get_average_length(), db.get_avlength());
         Xapian::totallength totlen = get_total_length();
         TEST_EQUAL(totlen, db.get_total_length());
         double total_term_occurences = get_average_length() * num_docs;
         TEST_EQUAL(Xapian::totallength(total_term_occurences + 0.5), totlen);
         if (term2.empty() || term2 == "=" || term2 == "_") {
             TEST_EQUAL(get_termfreq(), db.get_termfreq(term1));
             TEST_EQUAL(get_collection_freq(), db.get_collection_freq(term1));
             if (term2.empty()) {
                 TEST_EQUAL(get_query_length(), 1);
             } else {
                 TEST_EQUAL(get_query_length(), 2);
             }
         } else {
             Xapian::doccount tfmax = 0, tfsum = 0;
             Xapian::termcount cfmax = 0, cfsum = 0;
             if (term2 == "*") {
                 // OP_WILDCARD case.
                 for (auto&& t = db.allterms_begin(term1);
                      t != db.allterms_end(term1); ++t) {
                     Xapian::doccount tf = t.get_termfreq();
                     tout << "->" << *t << " " << tf << '\n';
                     tfsum += tf;
                     tfmax = max(tfmax, tf);
                     Xapian::termcount cf = db.get_collection_freq(*t);
                     cfsum += cf;
                     cfmax = max(cfmax, cf);
                 }
                 TEST_EQUAL(get_query_length(), 1);
             } else {
                 // OP_SYNONYM case.
                 Xapian::doccount tf1 = db.get_termfreq(term1);
                 Xapian::doccount tf2 = db.get_termfreq(term2);
                 tfsum = tf1 + tf2;
                 tfmax = max(tf1, tf2);
                 Xapian::termcount cf1 = db.get_collection_freq(term1);
                 Xapian::termcount cf2 = db.get_collection_freq(term2);
                 cfsum = cf1 + cf2;
                 cfmax = max(cf1, cf2);
                 TEST_EQUAL(get_query_length(), 2);
             }
             // Synonym occurs at least as many times as any term.
             TEST_REL(get_termfreq(), >=, tfmax);
             TEST_REL(get_collection_freq(), >=, cfmax);
             // Synonym can't occur more times than the terms do.
             TEST_REL(get_termfreq(), <=, tfsum);
             TEST_REL(get_collection_freq(), <=, cfsum);
             // Synonym can't occur more times than there are documents/terms.
             TEST_REL(get_termfreq(), <=, num_docs);
             TEST_REL(get_collection_freq(), <=, totlen);
         }
         TEST_EQUAL(get_reltermfreq(), 0);
         TEST_EQUAL(get_wqf(), 1);
         TEST_REL(doclen,>=,len_lower);
         TEST_REL(doclen,<=,len_upper);
         TEST_REL(uniqueterms,>=,1);
         TEST_REL(uniqueterms,<=,doclen);
         TEST_REL(wdf,<=,wdf_upper);
         if (term2 != "_") {
             sum += wdf;
             sum_squares += wdf * wdf;
         }
         return 1.0;
     }

     double get_maxpart() const override {
         if (len_upper == 0) {
             len_lower = get_doclength_lower_bound();
             len_upper = get_doclength_upper_bound();
             wdf_upper = get_wdf_upper_bound();
         }
         return 1.0;
     }

     double get_sumextra(Xapian::termcount doclen,
                         Xapian::termcount) const override {
         return 1.0 / doclen;
     }

     double get_maxextra() const override { return 1.0; }
 };

 DEFINE_TESTCASE(checkstatsweight1, backend && !remote) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::TermIterator a;
     for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
         const string & term = *a;
         enquire.set_query(Xapian::Query(term));
         Xapian::termcount sum = 0;
         Xapian::termcount sum_squares = 0;
         CheckStatsWeight wt(db, term, sum, sum_squares);
         enquire.set_weighting_scheme(wt);
         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());

         // The document order in the multi-db case isn't the same as the
         // postlist order on the combined DB, so it's hard to compare the
         // wdf for each document in the Weight objects, but we can sum
         // the wdfs and the squares of the wdfs which provides a decent
         // check that we're not getting the wrong wdf values (it ensures
         // they have the right mean and standard deviation).
         Xapian::termcount expected_sum = 0;
         Xapian::termcount expected_sum_squares = 0;
         Xapian::PostingIterator i;
         for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
             Xapian::termcount wdf = i.get_wdf();
             expected_sum += wdf;
             expected_sum_squares += wdf * wdf;
         }
         TEST_EQUAL(sum, expected_sum);
         TEST_EQUAL(sum_squares, expected_sum_squares);
     }
 }

 // Regression test for bugs fixed in 1.4.1.
 DEFINE_TESTCASE(checkstatsweight2, backend && !remote) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::TermIterator a;
     for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
         const string & term1 = *a;
         if (++a == db.allterms_end()) break;
         const string & term2 = *a;
         Xapian::Query q(Xapian::Query::OP_SYNONYM,
                         Xapian::Query(term1), Xapian::Query(term2));
         tout << q.get_description() << '\n';
         enquire.set_query(q);
         Xapian::termcount sum = 0;
         Xapian::termcount sum_squares = 0;
         CheckStatsWeight wt(db, term1, term2, sum, sum_squares);
         enquire.set_weighting_scheme(wt);
         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());

         // The document order in the multi-db case isn't the same as the
         // postlist order on the combined DB, so it's hard to compare the
         // wdf for each document in the Weight objects, but we can sum
         // the wdfs and the squares of the wdfs which provides a decent
         // check that we're not getting the wrong wdf values (it ensures
         // they have the right mean and standard deviation).
         Xapian::termcount expected_sum = 0;
         Xapian::termcount expected_sum_squares = 0;
         Xapian::PostingIterator i = db.postlist_begin(term1);
         Xapian::PostingIterator j = db.postlist_begin(term2);
         Xapian::docid did1 = *i, did2 = *j;
         while (true) {
             // To calculate expected_sum_squares correctly we need to square
             // the sum per document.
             Xapian::termcount wdf;
             if (did1 == did2) {
                 wdf = i.get_wdf() + j.get_wdf();
                 did1 = did2 = 0;
             } else if (did1 < did2) {
                 wdf = i.get_wdf();
                 did1 = 0;
             } else {
                 wdf = j.get_wdf();
                 did2 = 0;
             }
             expected_sum += wdf;
             expected_sum_squares += wdf * wdf;

             if (did1 == 0) {
                 if (++i != db.postlist_end(term1)) {
                     did1 = *i;
                 } else {
                     if (did2 == Xapian::docid(-1)) break;
                     did1 = Xapian::docid(-1);
                 }
             }
             if (did2 == 0) {
                 if (++j != db.postlist_end(term2)) {
                     did2 = *j;
                 } else {
                     if (did1 == Xapian::docid(-1)) break;
                     did2 = Xapian::docid(-1);
                 }
             }
         }
         // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
         // the individual terms.
         TEST_EQUAL(sum, expected_sum);
         TEST_EQUAL(sum_squares, expected_sum_squares);
     }
 }

 // Regression test for bug fixed in 1.4.1.
 DEFINE_TESTCASE(checkstatsweight3, backend && !remote) {
     // The most correct thing to do would be to collate termfreqs across shards
     // for this, but if that's too hard to do efficiently we could at least
     // scale up the termfreqs proportional to the size of the shard.
     XFAIL_FOR_BACKEND("multi", "OP_WILDCARD+OP_SYNONYM use shard termfreqs");

     struct PlCmp {
         bool operator()(const Xapian::PostingIterator& a,
                         const Xapian::PostingIterator& b) {
             return *a < *b;
         }
     };

     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::TermIterator a;
     static const char * const testcases[] = {
         "a", // a* matches all documents, but no term matches all.
         "pa", // Expands to only "paragraph", matching 5.
         "zulu", // No matches.
         "th", // Term "this" matches all documents.
     };
     for (auto pattern : testcases) {
         Xapian::Query q(Xapian::Query::OP_WILDCARD, pattern);
         tout.str(string{});
         tout << q.get_description() << '\n';
         enquire.set_query(q);
         Xapian::termcount sum = 0;
         Xapian::termcount sum_squares = 0;
         CheckStatsWeight wt(db, pattern, "*", sum, sum_squares);
         enquire.set_weighting_scheme(wt);
         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());

         // The document order in the multi-db case isn't the same as the
         // postlist order on the combined DB, so it's hard to compare the
         // wdf for each document in the Weight objects, but we can sum
         // the wdfs and the squares of the wdfs which provides a decent
         // check that we're not getting the wrong wdf values (it ensures
         // they have the right mean and standard deviation).
         Xapian::termcount expected_sum = 0;
         Xapian::termcount expected_sum_squares = 0;
         vector<Xapian::PostingIterator> postlists;
         for (auto&& t = db.allterms_begin(pattern);
              t != db.allterms_end(pattern); ++t) {
             postlists.emplace_back(db.postlist_begin(*t));
         }
         make_heap(postlists.begin(), postlists.end(), PlCmp());
         Xapian::docid did = 0;
         Xapian::termcount wdf = 0;
         while (!postlists.empty()) {
             pop_heap(postlists.begin(), postlists.end(), PlCmp());
             Xapian::docid did_new = *postlists.back();
             Xapian::termcount wdf_new = postlists.back().get_wdf();
             if (++(postlists.back()) == Xapian::PostingIterator()) {
                 postlists.pop_back();
             } else {
                 push_heap(postlists.begin(), postlists.end(), PlCmp());
             }
             if (did_new != did) {
                 expected_sum += wdf;
                 expected_sum_squares += wdf * wdf;
                 wdf = 0;
                 did = did_new;
             }
             wdf += wdf_new;
         }
         expected_sum += wdf;
         expected_sum_squares += wdf * wdf;
         // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
         // the individual terms.
         TEST_EQUAL(sum, expected_sum);
         TEST_REL(sum_squares, >=, expected_sum_squares);
     }
 }

 // Regression test for bug fixed in 1.4.6.  Doesn't work with
 // multi as the weight object is cloned more times.
 DEFINE_TESTCASE(checkstatsweight4, backend && !remote && !multi) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::TermIterator a;
     for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
         const string & term = *a;
         enquire.set_query(Xapian::Query(term, 1, 1) |
                           Xapian::Query(term, 1, 2));
         Xapian::termcount sum = 0;
         Xapian::termcount sum_squares = 0;
         CheckStatsWeight wt(db, term, "=", sum, sum_squares);
         enquire.set_weighting_scheme(wt);
         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());

         // The document order in the multi-db case isn't the same as the
         // postlist order on the combined DB, so it's hard to compare the
         // wdf for each document in the Weight objects, but we can sum
         // the wdfs and the squares of the wdfs which provides a decent
         // check that we're not getting the wrong wdf values (it ensures
         // they have the right mean and standard deviation).
         Xapian::termcount expected_sum = 0;
         Xapian::termcount expected_sum_squares = 0;
         Xapian::PostingIterator i;
         for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
             Xapian::termcount wdf = i.get_wdf();
             expected_sum += wdf;
             expected_sum_squares += wdf * wdf;
         }
         TEST_EQUAL(sum, expected_sum);
         TEST_EQUAL(sum_squares, expected_sum_squares);
     }
 }

 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
 DEFINE_TESTCASE(unigramlmweight4, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire1(db);
     Xapian::Enquire enquire2(db);
     enquire1.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset1;
     enquire2.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset2;
     // 5 documents available with term paragraph so mset size should be 5
     enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING, 1, 0));
     enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 1, 0));
     mset1 = enquire1.get_mset(0, 10);
     mset2 = enquire2.get_mset(0, 10);

     TEST_EQUAL(mset1.size(), 5);
     TEST_EQUAL_DOUBLE(mset1[1].get_weight(), mset2[1].get_weight());
 }

 /* Test for checking if we don't use smoothing all
  * of them should give same result i.e wdf_double/len_double */
 DEFINE_TESTCASE(unigramlmweight5, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire1(db);
     Xapian::Enquire enquire2(db);
     Xapian::Enquire enquire3(db);
     Xapian::Enquire enquire4(db);
     enquire1.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset1;
     enquire2.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset2;
     enquire3.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset3;
     enquire4.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset4;
     // 5 documents available with term paragraph so mset size should be 5
     enquire1.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING, 0, 0));
     enquire2.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0, 0));
     enquire3.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING, 0, 0));
     enquire4.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING, 0, 0));

     mset1 = enquire1.get_mset(0, 10);
     mset2 = enquire2.get_mset(0, 10);
     mset3 = enquire3.get_mset(0, 10);
     mset4 = enquire4.get_mset(0, 10);

     TEST_EQUAL(mset1.size(), 5);
     TEST_EQUAL(mset2.size(), 5);
     TEST_EQUAL(mset3.size(), 5);
     TEST_EQUAL(mset4.size(), 5);
     for (Xapian::doccount i = 0; i < 5; ++i) {
         TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset4[i].get_weight());
         TEST_EQUAL_DOUBLE(mset2[i].get_weight(), mset4[i].get_weight());
         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset2[i].get_weight());
         TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset2[i].get_weight());
         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset4[i].get_weight());
         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight());
     }
 }

 // Feature test for Dir+ function.
 DEFINE_TESTCASE(unigramlmweight7, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire1(db);
     Xapian::Enquire enquire2(db);
     enquire1.set_query(Xapian::Query("paragraph"));
     enquire2.set_query(Xapian::Query("paragraph"));
     Xapian::MSet mset1;
     Xapian::MSet mset2;

     enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
     enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_PLUS_SMOOTHING, 2000, 0.05));

     mset1 = enquire1.get_mset(0, 10);
     mset2 = enquire2.get_mset(0, 10);

     // mset size should be 5
     TEST_EQUAL(mset1.size(), 5);
     TEST_EQUAL(mset2.size(), 5);

     // Expect mset weights associated with Dir+ more than mset weights by Dir
     // because of the presence of extra weight component in Dir+ function.
     TEST_REL(mset2[0].get_weight(),>,mset1[0].get_weight());
     TEST_REL(mset2[1].get_weight(),>,mset1[1].get_weight());
     TEST_REL(mset2[2].get_weight(),>,mset1[2].get_weight());
     TEST_REL(mset2[3].get_weight(),>,mset1[3].get_weight());
     TEST_REL(mset2[4].get_weight(),>,mset1[4].get_weight());
 }

 // Regression test that OP_SCALE_WEIGHT works with LMWeight (fixed in 1.4.1).
 DEFINE_TESTCASE(unigramlmweight8, backend) {
     Xapian::Database db = get_database("apitest_simpledata");
     Xapian::Enquire enquire(db);
     Xapian::Query query("paragraph");

     enquire.set_query(query);
     enquire.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));

     Xapian::MSet mset1;
     mset1 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset1.size(), 5);

     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
     enquire.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));

     Xapian::MSet mset2;
     mset2 = enquire.get_mset(0, 10);
     TEST_EQUAL(mset2.size(), mset1.size());
     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
     for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
         TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
     }
 }

 // Feature test for CoordWeight.
 DEFINE_TESTCASE(coordweight1, backend) {
     Xapian::Enquire enquire(get_database("apitest_simpledata"));
     enquire.set_weighting_scheme(Xapian::CoordWeight());
     static const char * const terms[] = {
         "this", "line", "paragraph", "rubbish"
     };
     Xapian::Query query(Xapian::Query::OP_OR,
                         terms, terms + sizeof(terms) / sizeof(terms[0]));
     enquire.set_query(query);
     Xapian::MSet mymset1 = enquire.get_mset(0, 100);
     // CoordWeight scores 1 for each matching term, so the weight should equal
     // the number of matching terms.
     for (Xapian::MSetIterator i = mymset1.begin(); i != mymset1.end(); ++i) {
         Xapian::termcount matching_terms = 0;
         Xapian::TermIterator t = enquire.get_matching_terms_begin(i);
         while (t != enquire.get_matching_terms_end(i)) {
             ++matching_terms;
             ++t;
         }
         TEST_EQUAL(i.get_weight(), matching_terms);
     }
 }
CheckInitWeight::get_maxpart
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
Definition: api_weight.cc:739

Xapian
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80

Xapian::MSet::size
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318

CheckInitWeight::clone
Weight * clone() const override
Clone this object.
Definition: api_weight.cc:730

Xapian::WritableDatabase::add_document
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902

Xapian::Query::OP_WILDCARD
Wildcard expansion.
Definition: query.h:255

CheckStatsWeight::get_sumextra
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
Definition: api_weight.cc:916

CheckStatsWeight
Definition: api_weight.cc:764

TEST
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275

Xapian::Database
This class is used to access a database, or a group of databases.
Definition: database.h:68

Xapian::PostingIterator::get_wdf
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Definition: postingiterator.cc:110

Xapian::Enquire::get_matching_terms_end
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:717

Xapian::weight
double weight
The weight of a document or term.
Definition: types.h:122

CheckStatsWeight::get_maxextra
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
Definition: api_weight.cc:921

Xapian::totallength
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139

Xapian::Database::allterms_end
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
Definition: database.h:269

TEST_NOT_EQUAL_DOUBLE
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:300

Xapian::Error::get_msg
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
Definition: error.h:122

CheckStatsWeight::clone
Weight * clone() const override
Clone this object.
Definition: api_weight.cc:821

Xapian::MSet
Class representing a list of search results.
Definition: mset.h:44

config.h

Xapian::InL2Weight
This class implements the InL2 weighting scheme.
Definition: weight.h:844

std
STL namespace.

Xapian::Enquire::get_mset
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938

CheckInitWeight::CheckInitWeight
CheckInitWeight(unsigned &z, unsigned &n)
Definition: api_weight.cc:719

CheckStatsWeight::factor
double factor
Definition: api_weight.cc:766

Xapian::Weight::serialise
virtual std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: weight.cc:141

Xapian::PL2PlusWeight
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
Definition: weight.h:1263

Xapian::Database::get_doccount
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267

Xapian::Database::get_total_length
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
Definition: omdatabase.cc:312

Xapian::Enquire::get_matching_terms_begin
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:962

Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING
Definition: weight.h:161

apitest.h
test functionality of the Xapian API

Xapian::Database::get_avlength
Xapian::doclength get_avlength() const
Get the average length of the documents in the database.
Definition: omdatabase.cc:293

Xapian::BB2Weight
This class implements the BB2 weighting scheme.
Definition: weight.h:1060

Xapian::TermIterator
Class for iterating over a list of terms.
Definition: termiterator.h:41

Xapian::termcount
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72

TEST_REL
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32

Xapian::PostingIterator
Class for iterating over a list of terms.
Definition: postingiterator.h:41

TEST_NOT_EQUAL
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305

Xapian::CoordWeight
Xapian::Weight subclass implementing Coordinate Matching.
Definition: weight.h:1516

Xapian::InvalidArgumentError
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241

CheckStatsWeight::sum_squares
Xapian::termcount & sum_squares
Definition: api_weight.cc:779

CheckStatsWeight::wdf_upper
Xapian::termcount wdf_upper
Definition: api_weight.cc:783

Xapian::BoolWeight
Class implementing a "boolean" weighting scheme.
Definition: weight.h:433

Xapian::WritableDatabase
This class provides read/write access to a database.
Definition: database.h:789

Xapian::SerialisationError
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929

tout
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104

Xapian::MSetIterator
Iterator over a Xapian::MSet.
Definition: mset.h:368

Xapian::Query::OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166

Xapian::Weight::DIRICHLET_PLUS_SMOOTHING
Definition: weight.h:163

xapian.h
Public interfaces for the Xapian library.

CheckStatsWeight::CheckStatsWeight
CheckStatsWeight(const Xapian::Database &db_, const string &term1_, const string &term2_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Definition: api_weight.cc:785

Xapian::Weight::JELINEK_MERCER_SMOOTHING
Definition: weight.h:162

TEST_WEIGHT_CLASS_NO_PARAMS
#define TEST_WEIGHT_CLASS_NO_PARAMS(W)
Definition: api_weight.cc:63

CheckStatsWeight::init
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
Definition: api_weight.cc:817

TEST_EXCEPTION
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109

Xapian::MSet::begin
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624

Xapian::MSet::end
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629

CheckInitWeight
Definition: api_weight.cc:713

CheckStatsWeight::sum
Xapian::termcount & sum
Definition: api_weight.cc:778

Xapian::TradWeight
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:774

CONST_STRLEN
#define CONST_STRLEN(S)
Returns the length of a string constant.
Definition: stringutils.h:43

Xapian::DLHWeight
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
Definition: weight.h:1136

Xapian::PL2Weight
This class implements the PL2 weighting scheme.
Definition: weight.h:1196

Xapian::IneB2Weight
This class implements the IneB2 weighting scheme.
Definition: weight.h:988

test_weight_class
static void test_weight_class(const char *name, const W &obj_default, const W &obj_other)
Definition: api_weight.cc:67

Xapian::Database::allterms_begin
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
Definition: omdatabase.cc:223

TEST_EQUAL_DOUBLE
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295

test_weight_class_no_params
static void test_weight_class_no_params(const char *name)
Definition: api_weight.cc:38

Xapian::Enquire::set_query
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793

Xapian::Query::OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:239

CheckStatsWeight::get_maxpart
double get_maxpart() const override
Return an upper bound on what get_sumpart() can return for any document.
Definition: api_weight.cc:907

Xapian::MSetIterator::get_weight
double get_weight() const
Get the weight for the current position.
Definition: omenquire.cc:460

Xapian::IfB2Weight
This class implements the IfB2 weighting scheme.
Definition: weight.h:915

FAIL_TEST
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68

Xapian::Query::OP_AND
Match only documents which all subqueries match.
Definition: query.h:84

query
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63

CheckInitWeight::get_maxextra
double get_maxextra() const override
Return an upper bound on what get_sumextra() can return for any document.
Definition: api_weight.cc:746

CheckInitWeight::get_sumextra
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const override
Calculate the term-independent weight component for a document.
Definition: api_weight.cc:741

CheckStatsWeight::CheckStatsWeight
CheckStatsWeight(const Xapian::Database &db_, const string &term_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Definition: api_weight.cc:811

get_database
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48

CheckInitWeight::get_sumpart
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const override
Calculate the weight contribution for this object&#39;s term to a document.
Definition: api_weight.cc:734

XFAIL_FOR_BACKEND
void XFAIL_FOR_BACKEND(const std::string &backend_prefix, const char *msg)
Definition: apitest.cc:147

name
char name[9]
Definition: dbcheck.cc:55

Xapian::Query::get_description
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232

Xapian::Enquire
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152

Xapian::doccount
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38

CheckStatsWeight::len_upper
Xapian::termcount len_upper
Definition: api_weight.cc:781

Xapian::DPHWeight
This class implements the DPH weighting scheme.
Definition: weight.h:1359

CheckStatsWeight::term2
string term2
Definition: api_weight.cc:776

Xapian::Query::OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92

testutils.h
Xapian-specific test helper functions and macros.

api_weight.h

mset_expect_order
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225

name
Definition: header.h:151

TEST_WEIGHT_CLASS
#define TEST_WEIGHT_CLASS(W, DEFAULT, OTHER)
Definition: api_weight.cc:108

Xapian::Enquire::set_weighting_scheme
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819

Xapian::docid
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52

Xapian::Query
Class representing a query.
Definition: query.h:46

Xapian::Weight::TWO_STAGE_SMOOTHING
Definition: weight.h:159

CheckInitWeight::init
void init(double factor_) override
Allow the subclass to perform any initialisation it needs to.
Definition: api_weight.cc:722

TEST_EQUAL
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278

CheckStatsWeight::term1
string term1
Definition: api_weight.cc:770

gen_wdf_eq_doclen_db
static void gen_wdf_eq_doclen_db(Xapian::WritableDatabase &db, const string &)
Definition: api_weight.cc:472

Xapian::Database::postlist_end
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225

Xapian::MSet::back
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:641

CheckInitWeight::factor
double factor
Definition: api_weight.cc:715

Xapian::LMWeight
Xapian::Weight subclass implementing the Language Model formula.
Definition: weight.h:1413

Xapian::Weight::DIRICHLET_SMOOTHING
Definition: weight.h:160

CheckStatsWeight::db
Xapian::Database db
Definition: api_weight.cc:768

Xapian::Database::get_termfreq
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323

Xapian::Document
A handle representing a document in a Xapian database.
Definition: document.h:61

DEFINE_TESTCASE
DEFINE_TESTCASE(weightserialisation1, !backend)
Test serialisation and introspection of built-in weighting schemes.
Definition: api_weight.cc:112

Xapian::BM25PlusWeight
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Definition: weight.h:650

TEST_WEIGHTING_SCHEME
#define TEST_WEIGHTING_SCHEME(W,...)

CheckStatsWeight::get_sumpart
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqueterms) const override
Calculate the weight contribution for this object&#39;s term to a document.
Definition: api_weight.cc:836

Xapian::BM25Weight
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:546

Xapian::Database::postlist_begin
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162

Xapian::TfIdfWeight
Xapian::Weight subclass implementing the tf-idf weighting scheme.
Definition: weight.h:458

CheckStatsWeight::len_lower
Xapian::termcount len_lower
Definition: api_weight.cc:782

Xapian::Document::add_term
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140

Xapian::Weight
Abstract base class for weighting schemes.
Definition: weight.h:35

Xapian::Database::get_collection_freq
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339

CheckInitWeight::zero_inits
unsigned & zero_inits
Definition: api_weight.cc:717

testcases
static const testcase testcases[]
Definition: api_unicode.cc:39