35 DLHWeight::clone()
const 41 DLHWeight::init(
double factor)
52 double wdf_upper = get_wdf_upper_bound();
58 const double wdf_lower = 1.0;
59 double len_upper = get_doclength_upper_bound();
60 double len_lower = get_doclength_lower_bound();
62 double F = get_collection_freq();
65 log_constant = get_total_length() / F;
66 wqf_product_factor = get_wqf() * factor;
71 double max_wdf_over_l = wdf_upper < len_lower ? wdf_upper / len_lower : 1.0;
80 double logged_expr = max_wdf_over_l * log_constant;
81 double w_for_A = logged_expr > 1.0 ? wdf_upper : wdf_lower;
82 double A = w_for_A / (w_for_A + 0.5) *
log2(logged_expr);
96 if (len_lower > wdf_upper) {
123 double B1 = (len_lower - wdf_lower) *
log2(1.0 - wdf_lower / len_lower);
124 double B2 = (len_lower - wdf_upper) *
log2(1.0 - wdf_upper / len_lower);
138 double wdf_var = min(wdf_upper, len_upper / 2.0);
139 double max_product = wdf_var * (1.0 - wdf_var / len_upper);
145 double min_wdf_to_len = wdf_lower / len_upper;
146 double max_product_2 = wdf_upper * (1.0 - min_wdf_to_len);
148 max_product = min(max_product, max_product_2);
150 double C = 0.5 *
log2(2.0 * M_PI * max_product) / (wdf_lower + 0.5);
151 upper_bound = A + B +
C;
153 if (
rare(upper_bound < 0.0))
156 upper_bound *= wqf_product_factor;
162 return "Xapian::DLHWeight";
166 DLHWeight::serialise()
const 172 DLHWeight::unserialise(
const string& s)
const 174 if (
rare(!s.empty()))
183 if (wdf == 0 || wdf == len)
return 0.0;
185 double wdf_to_len = double(wdf) / len;
186 double one_minus_wdf_to_len = 1.0 - wdf_to_len;
188 double wt = wdf *
log2(wdf_to_len * log_constant) +
189 (len - wdf) *
log2(one_minus_wdf_to_len) +
190 0.5 *
log2(2.0 * M_PI * wdf * one_minus_wdf_to_len);
191 if (
rare(wt <= 0.0))
return 0.0;
193 return wqf_product_factor * wt / (wdf + 0.5);
197 DLHWeight::get_maxpart()
const 209 DLHWeight::get_maxextra()
const The Xapian namespace contains public interfaces for the Xapian library.
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Indicates an error in the std::string serialisation of an object.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
Defines a log2() function to find the logarithm to base 2 if not already defined in the library...