40 LMWeight::clone()
const {
41 return new LMWeight(param_log, select_smoothing, param_smoothing1, param_smoothing2);
45 LMWeight::init(
double factor_)
48 weight_collection = factor_;
55 if (param_log == 0.0) {
56 param_log = get_doclength_upper_bound();
64 if (select_smoothing == JELINEK_MERCER_SMOOTHING ||
65 select_smoothing == TWO_STAGE_SMOOTHING) {
66 if (param_smoothing1 == 0.7) {
67 if (get_query_length() <= 2) {
68 param_smoothing1 = 0.1;
78 if (select_smoothing == DIRICHLET_SMOOTHING) {
79 if (param_smoothing1 == 0.7) {
80 param_smoothing1 = 2000;
86 if (select_smoothing == DIRICHLET_PLUS_SMOOTHING) {
87 if (param_smoothing1 == 0.7) {
88 param_smoothing1 = 2000;
96 return "Xapian::LMWeight";
100 LMWeight::serialise()
const 103 result +=
static_cast<unsigned char>(select_smoothing);
110 LMWeight::unserialise(
const string & s)
const 112 const char *ptr = s.data();
113 const char *end = ptr + s.size();
118 if (
rare(ptr != end))
120 return new LMWeight(param_log_, select_smoothing_, param_smoothing1_, param_smoothing2_);
128 double wdf_double = wdf;
130 double len_double = len;
140 double wt_coll = get_collection_freq() / double(get_total_length());
143 if (select_smoothing == JELINEK_MERCER_SMOOTHING) {
147 double weight_document = wdf_double / len_double;
148 weight_sum = (param_smoothing1 * wt_coll) +
149 ((1 - param_smoothing1) * weight_document);
150 }
else if (select_smoothing == DIRICHLET_SMOOTHING) {
151 weight_sum = (wdf_double + (param_smoothing1 * wt_coll)) /
152 (len_double + param_smoothing1);
153 }
else if (select_smoothing == DIRICHLET_PLUS_SMOOTHING) {
161 weight_sum = (1 + (wdf_double / (param_smoothing1 * wt_coll))) *
162 (1 + (param_smoothing2 / (param_smoothing1 * wt_coll)));
163 }
else if (select_smoothing == ABSOLUTE_DISCOUNT_SMOOTHING) {
164 double uniqterm_double = uniqterm;
165 weight_sum = ((((wdf_double - param_smoothing1) > 0) ? (wdf_double - param_smoothing1) : 0) / len_double) + ((param_smoothing1 * wt_coll * uniqterm_double) / len_double);
167 weight_sum = (((1 - param_smoothing1) * (wdf_double + (param_smoothing2 * wt_coll)) / (len_double + param_smoothing2)) + (param_smoothing1 * wt_coll));
176 double product = weight_sum * param_log;
178 return (product > 1.0) ? weight_collection * log(product) : 0;
182 LMWeight::get_maxpart()
const 187 double wdf_max = get_wdf_upper_bound();
195 double wt_coll = get_collection_freq() / double(get_total_length());
198 if (select_smoothing == JELINEK_MERCER_SMOOTHING) {
199 upper_bound = (param_smoothing1 * wt_coll) + (1 - param_smoothing1);
200 }
else if (select_smoothing == DIRICHLET_SMOOTHING) {
201 upper_bound = (get_doclength_upper_bound() + (param_smoothing1 * wt_coll)) / (get_doclength_upper_bound() + param_smoothing1);
202 }
else if (select_smoothing == DIRICHLET_PLUS_SMOOTHING) {
203 upper_bound = (1 + (wdf_max / (param_smoothing1 * wt_coll))) *
204 (1 + (param_smoothing2 / (param_smoothing1 * wt_coll)));
205 }
else if (select_smoothing == ABSOLUTE_DISCOUNT_SMOOTHING) {
206 upper_bound = param_smoothing1 * wt_coll + 1;
208 upper_bound = (((1 - param_smoothing1) * (get_doclength_upper_bound() + (param_smoothing2 * wt_coll)) / (get_doclength_upper_bound() + param_smoothing2)) + (param_smoothing1 * wt_coll));
214 double product = upper_bound * param_log;
216 return (product > 1.0) ? weight_collection * log(product) : 1.0;
229 if (select_smoothing == DIRICHLET_PLUS_SMOOTHING) {
230 double extra_weight = param_smoothing1 / (len + param_smoothing1);
231 return get_query_length() * log(extra_weight);
237 LMWeight::get_maxextra()
const 239 if (select_smoothing == DIRICHLET_PLUS_SMOOTHING) {
240 double extra_weight = param_smoothing1 / (get_doclength_lower_bound() + param_smoothing1);
241 return get_query_length() * log(extra_weight);
The Xapian namespace contains public interfaces for the Xapian library.
Hierarchy of classes which Xapian can throw as exceptions.
type_smoothing
Type of smoothing to use with the Language Model Weighting scheme.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
functions to serialise and unserialise a double
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Indicates an error in the std::string serialisation of an object.
std::string serialise_double(double v)
Serialise a double to a string.
Various assertion macros.
Xapian::Weight subclass implementing the Language Model formula.