66 LMJMWeight::init(
double factor_)
68 factor = factor_ * get_wqf();
70 auto collection_freq = get_collection_freq();
71 if (
rare(collection_freq == 0)) {
79 double lambda = param_lambda;
80 if (lambda <= 0.0 || lambda >= 1.0) {
81 auto query_len = get_query_length();
84 }
else if (query_len < 8) {
85 lambda = (query_len - 1) * 0.1;
93 multiplier = (1.0 - lambda) * total_length / (lambda * collection_freq);
100 double w = multiplier * wdf / len;
101 return factor * log(1.0 + w);
105 LMJMWeight::get_maxpart()
const
109 double w = multiplier;
110 if (wdf_max < len_min) {
113 w *= double(wdf_max) / len_min;
115 return factor * log(1.0 + w);
119 LMJMWeight::clone()
const {
130 LMJMWeight::serialise()
const
136 LMJMWeight::unserialise(
const string& s)
const
138 const char *ptr = s.data();
139 const char *end = ptr + s.size();
141 if (
rare(ptr != end))
143 "LMJMWeight::unserialise()");
149 LMJMWeight::create_from_parameters(
const char* params)
const
151 const char*
p = params;
161 LMDirichletWeight::init(
double factor_)
163 factor = factor_ * get_wqf();
165 double mu = param_mu;
167 auto doclen_max = get_doclength_upper_bound();
168 extra_offset = get_query_length() * log(doclen_max + mu);
174 auto collection_freq = get_collection_freq();
175 if (
rare(collection_freq == 0)) {
184 multiplier = get_total_length() / (collection_freq * mu);
186 double delta = param_delta;
189 factor *= log(1.0 + delta * multiplier);
197 return factor * log(1.0 + wdf * multiplier);
201 LMDirichletWeight::get_maxpart()
const
204 return factor * log(1.0 + wdf_max * multiplier);
233 return extra_offset - get_query_length() * log(doclen + param_mu);
237 LMDirichletWeight::get_maxextra()
const
239 auto doclen_min = get_doclength_lower_bound();
240 return extra_offset - get_query_length() * log(doclen_min + param_mu);
244 LMDirichletWeight::clone()
const {
251 return "lmdirichlet";
255 LMDirichletWeight::serialise()
const
263 LMDirichletWeight::unserialise(
const string& s)
const
265 const char *ptr = s.data();
266 const char *end = ptr + s.size();
269 if (
rare(ptr != end))
271 "LMDirichletWeight::unserialise()");
277 LMDirichletWeight::create_from_parameters(
const char* params)
const
279 const char*
p = params;
292 LMAbsDiscountWeight::init(
double factor_)
294 factor = factor_ * get_wqf();
296 auto doclen_max = get_doclength_upper_bound();
297 extra_offset = get_query_length() * log(
double(doclen_max));
299 auto collection_freq = get_collection_freq();
300 if (
rare(collection_freq == 0)) {
310 multiplier = total_length / (param_delta * collection_freq);
319 return factor * log(1.0 + (wdf - param_delta) / uniqterms * multiplier);
323 LMAbsDiscountWeight::get_maxpart()
const
327 double x = (wdf_max - param_delta) * multiplier;
333 if (doclen_min > wdf_max)
334 x *= (doclen_min - 1) / wdf_max + 1;
335 return factor * log(1.0 + x);
363 return extra_offset + get_query_length() * log(
double(uniqterms) / doclen);
367 LMAbsDiscountWeight::get_maxextra()
const
375 LMAbsDiscountWeight::clone()
const {
382 return "lmabsdiscount";
386 LMAbsDiscountWeight::serialise()
const
392 LMAbsDiscountWeight::unserialise(
const string& s)
const
394 const char *ptr = s.data();
395 const char *end = ptr + s.size();
397 if (
rare(ptr != end))
399 "LMAbsDiscountWeight::unserialise()");
405 LMAbsDiscountWeight::create_from_parameters(
const char* params)
const
407 const char*
p = params;
410 parameter_error(
"Parameter delta is invalid",
"lmabsdiscount", params);
412 parameter_error(
"Extra data after parameter",
"lmabsdiscount", params);
417 LM2StageWeight::init(
double factor_)
419 factor = factor_ * get_wqf();
421 double lambda = param_lambda;
422 double mu = param_mu;
424 auto doclen_max = get_doclength_upper_bound();
425 extra_offset = -log((lambda * doclen_max + mu) / (doclen_max + mu));
426 extra_offset *= get_query_length();
428 auto collection_freq = get_collection_freq();
429 if (
rare(collection_freq == 0)) {
438 multiplier = (1 - lambda) * get_total_length() / collection_freq;
448 double lambda = param_lambda;
449 double mu = param_mu;
450 return factor * log(1.0 + wdf / (lambda * doclen + mu) * multiplier);
454 LM2StageWeight::get_maxpart()
const
456 double lambda = param_lambda;
457 double mu = param_mu;
464 double x = wdf_max / (lambda * max(doclen_min, wdf_max) + mu);
465 return factor * log(1.0 + x * multiplier);
488 double lambda = param_lambda;
489 double mu = param_mu;
490 return extra_offset +
491 get_query_length() * log((lambda * doclen + mu) / (doclen + mu));
495 LM2StageWeight::get_maxextra()
const
511 double lambda = param_lambda;
512 double mu = param_mu;
513 auto doclen = get_doclength_lower_bound();
514 return extra_offset +
515 get_query_length() * log((lambda * doclen + mu) / (doclen + mu));
519 LM2StageWeight::clone()
const {
530 LM2StageWeight::serialise()
const
538 LM2StageWeight::unserialise(
const string & s)
const
540 const char *ptr = s.data();
541 const char *end = ptr + s.size();
544 if (
rare(ptr != end))
546 "LM2StageWeight::unserialise()");
551 LM2StageWeight::create_from_parameters(
const char* params)
const
553 const char*
p = params;
Language Model weighting with Two Stage smoothing.
Language Model weighting with Absolute Discount smoothing.
Language Model weighting with Dirichlet or Dir+ smoothing.
Language Model weighting with Jelinek-Mercer smoothing.
Indicates an error in the std::string serialisation of an object.
static void parameter_error(const char *msg, const std::string &scheme, const char *params)
static bool double_param(const char **p, double *ptr_val)
Hierarchy of classes which Xapian can throw as exceptions.
static void parameter_error(const char *message, const std::string &scheme, const char *params)
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Various assertion macros.
string serialise_double(double v)
Serialise a double to a string.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
functions to serialise and unserialise a double
Various handy string-related helpers.
Xapian::Weight::Internal class, holding database and term statistics.