35 static double stirling_value(
double difference,
double y,
double stirling_constant)
37 return ((y + 0.5) * (stirling_constant -
log2(y)) + (difference * stirling_constant));
40 BB2Weight::BB2Weight(
double c) : param_c(c)
73 if (wdfn_upper == 0) {
79 double wdfn_lower(1.0);
87 if (
rare(wdfn_lower >= F - 1))
89 if (
rare(wdfn_upper >= F - 1))
98 wt = -1.0 / log(2.0) -
log2(N - 1.0);
105 double y_min = F - wdfn_upper;
106 double y_max = N + F - wdfn_lower - 2.0;
113 double B_max =
B_constant / (wdfn_lower + 1.0);
122 return "Xapian::BB2Weight";
134 const char *ptr = s.data();
135 const char *end = ptr + s.size();
137 if (
rare(ptr != end))
146 if (wdf == 0)
return 0.0;
154 if (
rare(wdfn >= F - 1))
162 double y2 = F - wdfn;
163 double y1 = N_less_2 + y2;
168 double final_weight = B * (
wt + stirling);
169 if (
rare(final_weight < 0.0))
The Xapian namespace contains public interfaces for the Xapian library.
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
void init(double factor)
Allow the subclass to perform any initialisation it needs to.
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqterms) const
Calculate the weight contribution for this object's term to a document.
Xapian::doccount get_collection_size() const
The number of documents in the collection.
Xapian::termcount get_collection_freq() const
The collection frequency of the term.
Upper bound on document lengths.
static double stirling_value(double difference, double y, double stirling_constant)
double stirling_constant_1
double param_c
The wdf normalization parameter in the formula.
Lower bound on (non-zero) document lengths.
std::string serialise() const
Return this object's parameters serialised as a single string.
Hierarchy of classes which Xapian can throw as exceptions.
This class implements the BB2 weighting scheme.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
functions to serialise and unserialise a double
Length of the current document (sum wdf).
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Xapian::termcount get_doclength_lower_bound() const
A lower bound on the minimum length of any document in the shard.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
Indicates an error in the std::string serialisation of an object.
Within-query-frequency of the current term.
double stirling_constant_2
Average length of documents in the collection.
BB2Weight * clone() const
Clone this object.
Xapian::termcount get_wqf() const
The within-query-frequency of this term.
double upper_bound
The upper bound on the weight.
Xapian::termcount get_doclength_upper_bound() const
An upper bound on the maximum length of any document in the shard.
Sum of wdf over the whole collection for the current term.
Within-document-frequency of the current term in the current document.
Xapian::doccount get_termfreq() const
The number of documents which this term indexes.
How many documents the current term is in.
Xapian::doclength get_average_length() const
The average length of a document in the collection.
std::string serialise_double(double v)
Serialise a double to a string.
std::string name() const
Return the name of this weighting scheme.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Number of documents in the collection.
Defines a log2() function to find the logarithm to base 2 if not already defined in the library...
void need_stat(stat_flags flag)
Tell Xapian that your subclass will want a particular statistic.
double get_sumextra(Xapian::termcount doclen, Xapian::termcount uniqterms) const
Calculate the term-independent weight component for a document.
double c_product_avlen
The constant values to be used in get_sumpart().
BB2Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Xapian::termcount get_wdf_upper_bound() const
An upper bound on the wdf of this term in the shard.