22 #ifndef XAPIAN_INCLUDED_EXPANDWEIGHT_H 23 #define XAPIAN_INCLUDED_EXPANDWEIGHT_H 66 : avlen(avlen_), expand_k(0), dbsize(0), termfreq(0),
67 rcollection_freq(0), rtermfreq(0), multiplier(0) {
72 : avlen(avlen_), expand_k(expand_k_), dbsize(0), termfreq(0),
73 rcollection_freq(0), rtermfreq(0), multiplier(0) {
82 if (wdf == 0) wdf = 1;
84 rcollection_freq += wdf;
86 multiplier += (expand_k + 1) * wdf / (expand_k * doclen / avlen + wdf);
90 if (shard_index >= dbs_seen.size() || !dbs_seen[shard_index]) {
91 if (shard_index >= dbs_seen.size()) {
92 dbs_seen.resize(shard_index + 1);
94 dbs_seen[shard_index] =
true;
107 rcollection_freq = 0;
156 bool use_exact_termfreq_)
157 : db(db_), dbsize(db.get_doccount()), avlen(db.get_avlength()),
158 rsize(rsize_), collection_freq(0),
159 collection_len(avlen * dbsize + .5),
160 use_exact_termfreq(use_exact_termfreq_), stats(avlen) {}
173 bool use_exact_termfreq_,
175 : db(db_), dbsize(db.get_doccount()), avlen(db.get_avlength()),
176 rsize(rsize_), collection_freq(0),
177 collection_len(avlen * dbsize + .5),
178 use_exact_termfreq(use_exact_termfreq_), stats(avlen, expand_k_) {}
184 void collect_stats(
TermList * merger,
const std::string & term);
187 virtual double get_weight()
const = 0;
228 bool use_exact_termfreq_,
230 :
ExpandWeight(db_, rsize_, use_exact_termfreq_, expand_k_) { }
232 double get_weight()
const;
260 bool use_exact_termfreq_)
263 double get_weight()
const;
269 #endif // XAPIAN_INCLUDED_EXPANDWEIGHT_H The Xapian namespace contains public interfaces for the Xapian library.
This class is used to access a database, or a group of databases.
Xapian::doccount rtermfreq
The number of documents from the RSet indexed by the current term (r).
Bo1EWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_)
Constructor.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Xapian::doclength avlen
Average document length in the whole database.
Abstract base class for termlists.
double expand_k
The parameter k to be used for TradWeight query expansion.
ExpandStats(Xapian::doclength avlen_)
Constructor for expansion schemes which do not require the "expand_k" parameter.
double get_avlen() const
Return the average length of the database.
Xapian::doccount get_rsize() const
Return the number of documents in the RSet.
Class for calculating ESet term weights.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Xapian::doccount rsize
The number of documents in the RSet.
Xapian::totallength collection_len
The total length of the database.
Xapian::totallength get_collection_len() const
Return the length of the collection.
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
bool use_exact_termfreq
Should we calculate the exact term frequency when generating an ESet?
ExpandWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, double expand_k_)
Constructor.
ExpandStats(Xapian::doclength avlen_, double expand_k_)
Constructor for expansion schemes which require the "expand_k" parameter.
TradEWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, double expand_k_)
Constructor.
std::vector< bool > dbs_seen
Which databases in a multidb are included in termfreq.
double doclength
A normalised document length.
Xapian::termcount rcollection_freq
The number of times the term occurs in the rset.
double multiplier
The multiplier to be used in TradWeight query expansion.
API for working with Xapian databases.
const Xapian::Database db
The combined database.
Xapian::doclength avlen
Average document length in the whole database.
Xapian::doccount dbsize
The number of documents in the whole database.
Xapian::termcount get_collection_freq() const
Return the collection frequency of the term.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
ExpandStats stats
An ExpandStats object to accumulate statistics.
Collates statistics while calculating term weight in an ESet.
Xapian::doccount termfreq
Term frequency (for a multidb, may be for a subset of the databases).
This class implements the TradWeight scheme for query expansion.
Abstract base class for termlists.
Xapian::termcount collection_freq
The collection frequency of the term.
ExpandWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_)
Constructor.
Xapian::doccount get_dbsize() const
Return the size of the database.
Xapian::doccount dbsize
Size of the subset of a multidb to which the value in termfreq applies.
This class implements the Bo1 scheme for query expansion.