40 BM25Weight::clone()
const
42 return new BM25Weight(param_k1, param_k2, param_k3, param_b,
47 BM25Weight::init(
double factor)
52 if (get_rset_size() != 0) {
61 AssertRel(reltermfreq,<=,get_rset_size());
67 AssertRel(reldocs_not_indexed,<=,get_collection_size() - tf);
72 double numerator = (reltermfreq + 0.5) * (Q - tf + 0.5);
73 double denom = (reldocs_not_indexed + 0.5) * (nonreldocs_indexed + 0.5);
74 tw = numerator / denom;
76 tw = (get_collection_size() - tf + 0.5) / (tf + 0.5);
97 if (
rare(tw <= 1.0)) {
100 termweight = log(tw) * factor;
102 double wqf_double = get_wqf();
103 termweight *= (param_k3 + 1) * wqf_double / (param_k3 + wqf_double);
107 if (tw < 2) tw = tw * 0.5 + 1;
108 termweight = log(tw) * factor;
110 double wqf_double = get_wqf();
111 termweight *= (param_k3 + 1) * wqf_double / (param_k3 + wqf_double);
114 termweight *= (param_k1 + 1);
118 if (param_k2 == 0 && (param_b == 0 || param_k1 == 0)) {
123 len_factor = get_average_length();
126 if (len_factor != 0) len_factor = 1 / len_factor;
139 BM25Weight::serialise()
const
150 BM25Weight::unserialise(
const string & s)
const
152 const char *ptr = s.data();
153 const char *end = ptr + s.size();
159 if (
rare(ptr != end))
161 return new BM25Weight(k1, k2, k3, b, min_normlen);
168 LOGCALL(WTCALC,
double,
"BM25Weight::get_sumpart", wdf | len);
171 double wdf_double = wdf;
172 double denom = param_k1 * (normlen * param_b + (1 - param_b)) + wdf_double;
174 RETURN(termweight * (wdf_double / denom));
178 BM25Weight::get_maxpart()
const
180 LOGCALL(WTCALC,
double,
"BM25Weight::get_maxpart", NO_ARGS);
181 double denom = param_k1;
183 if (param_k1 != 0.0) {
184 if (param_b != 0.0) {
194 max(max(wdf_max, get_doclength_lower_bound()) * len_factor,
196 denom *= (normlen_lb * param_b + (1 - param_b));
201 RETURN(termweight * (wdf_max / denom));
218 LOGCALL(WTCALC,
double,
"BM25Weight::get_sumextra", len);
219 double num = (2.0 * param_k2 * get_query_length());
220 RETURN(num / (1.0 + max(len * len_factor, param_min_normlen)));
224 BM25Weight::get_maxextra()
const
226 LOGCALL(WTCALC,
double,
"BM25Weight::get_maxextra", NO_ARGS);
229 double num = (2.0 * param_k2 * get_query_length());
230 RETURN(num / (1.0 + max(get_doclength_lower_bound() * len_factor,
231 param_min_normlen)));
242 BM25Weight::create_from_parameters(
const char* params)
const
244 const char*
p = params;
251 double min_normlen = 0.5;
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Indicates an error in the std::string serialisation of an object.
static void parameter_error(const char *msg, const std::string &scheme, const char *params)
static bool double_param(const char **p, double *ptr_val)
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Hierarchy of classes which Xapian can throw as exceptions.
static void parameter_error(const char *message, const std::string &scheme, const char *params)
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
double doclength
A normalised document length.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Various assertion macros.
#define AssertRel(A, REL, B)
string serialise_double(double v)
Serialise a double to a string.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
functions to serialise and unserialise a double
Xapian::Weight::Internal class, holding database and term statistics.