36 DPHWeight::clone()
const
42 DPHWeight::init(
double factor)
50 double F = get_collection_freq();
51 double wdf_lower = 1.0;
52 double wdf_upper = get_wdf_upper_bound();
54 double len_upper = get_doclength_upper_bound();
61 double min_wdf_to_len = wdf_lower / len_upper;
64 log_constant = log2(get_total_length() / F);
65 wqf_product_factor = get_wqf() * factor;
72 double max_product_1 = wdf_upper * (1.0 - min_wdf_to_len);
76 double wdf_var = min(wdf_upper, len_upper / 2.0);
77 double max_product_2 = wdf_var * (1.0 - wdf_var / len_upper);
79 double max_product = min(max_product_1, max_product_2);
93 double wdf_root = 0.25 * (sqrt(8.0 * len_upper + 9.0) - 3.0);
96 if (wdf_root > wdf_upper) {
98 }
else if (wdf_root < wdf_lower) {
102 double x = 1 - wdf_root / len_upper;
103 double x_squared = x * x;
104 auto max_wdf_product_normalization = wdf_root / (wdf_root + 1) * x_squared;
106 double max_weight = max_wdf_product_normalization *
107 (log_constant + (0.5 * log2(2 * M_PI * max_product)));
109 upper_bound = wqf_product_factor * max_weight;
110 if (
rare(upper_bound < 0.0)) upper_bound = 0.0;
120 DPHWeight::serialise()
const
126 DPHWeight::unserialise(
const string& s)
const
128 if (
rare(!s.empty()))
137 if (wdf == 0 || wdf == len)
return 0.0;
139 double wdf_to_len = double(wdf) / len;
141 double x = 1 - wdf_to_len;
142 double normalization = x * x / (wdf + 1);
144 double wt = normalization *
145 (wdf * (log2(wdf_to_len) + log_constant) +
146 (0.5 * log2(2 * M_PI * wdf * (1 - wdf_to_len))));
147 if (
rare(wt <= 0.0))
return 0.0;
149 return wqf_product_factor * wt;
153 DPHWeight::get_maxpart()
const
159 DPHWeight::create_from_parameters(
const char *
p)
const
This class implements the DPH weighting scheme.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Indicates an error in the std::string serialisation of an object.
Hierarchy of classes which Xapian can throw as exceptions.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.