38 TfIdfWeight::TfIdfWeight(
const std::string &normals)
39 : normalizations(normals)
82 return "Xapian::TfIdfWeight";
103 if (wdf == 0)
return 0;
104 double uniqterm_double = uniqterms;
105 double doclen_double = doclen;
107 if (doclen_double == 0 || uniqterm_double == 0)
110 wdf_avg = doclen_double / uniqterm_double;
111 double num = 1 + log(
double(wdf));
112 double den = 1 + log(wdf_avg);
170 if (wdf == 0)
return 0;
175 if (wdf == 0)
return 0;
176 return (1 + log(
double(wdf)));
193 if (N == termfreq)
return 0;
194 return log((N - termfreq) / termfreq);
196 return (1.0 / termfreq);
198 double x = log(N / termfreq);
203 return (log(N / termfreq));
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Indicates an error in the std::string serialisation of an object.
Xapian::Weight subclass implementing the tf-idf weighting scheme.
double get_sumextra(Xapian::termcount doclen, Xapian::termcount uniqterms) const
Calculate the term-independent weight component for a document.
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
TfIdfWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
double get_wdfn(Xapian::termcount wdf, char c) const
double get_wtn(double wt, char c) const
double get_idfn(Xapian::doccount termfreq, char c) const
void init(double factor)
Allow the subclass to perform any initialisation it needs to.
TfIdfWeight * clone() const
Clone this object.
std::string name() const
Return the name of this weighting scheme.
TfIdfWeight()
Construct a TfIdfWeight using the default normalizations ("ntn").
double factor
The factor to multiply with the weight.
std::string serialise() const
Return this object's parameters serialised as a single string.
std::string normalizations
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqterm) const
Calculate the weight contribution for this object's term to a document.
Xapian::termcount get_doclength_lower_bound() const
A lower bound on the minimum length of any document in the shard.
Xapian::doccount get_termfreq() const
The number of documents which this term indexes.
void need_stat(stat_flags flag)
Tell Xapian that your subclass will want a particular statistic.
Xapian::termcount get_wqf() const
The within-query-frequency of this term.
Xapian::doccount get_collection_size() const
The number of documents in the collection.
@ UNIQUE_TERMS
Number of unique terms in the current document.
@ DOC_LENGTH_MAX
Upper bound on document lengths.
@ DOC_LENGTH
Length of the current document (sum wdf).
@ TERMFREQ
How many documents the current term is in.
@ WQF
Within-query-frequency of the current term.
@ COLLECTION_SIZE
Number of documents in the collection.
@ WDF_MAX
Upper bound on wdf.
@ DOC_LENGTH_MIN
Lower bound on (non-zero) document lengths.
@ WDF
Within-document-frequency of the current term in the current document.
Xapian::termcount get_wdf_upper_bound() const
An upper bound on the wdf of this term in the shard.
Hierarchy of classes which Xapian can throw as exceptions.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
static double get_wdfn_for_L(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqterms)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Various assertion macros.