22 #ifndef XAPIAN_INCLUDED_EXPANDWEIGHT_H 23 #define XAPIAN_INCLUDED_EXPANDWEIGHT_H 66 : avlen(avlen_), expand_k(0), dbsize(0), termfreq(0),
67 rcollection_freq(0), rtermfreq(0), multiplier(0) {
72 : avlen(avlen_), expand_k(expand_k_), dbsize(0), termfreq(0),
73 rcollection_freq(0), rtermfreq(0), multiplier(0) {
82 if (wdf == 0) wdf = 1;
84 rcollection_freq += wdf;
86 multiplier += (expand_k + 1) * wdf / (expand_k * doclen / avlen + wdf);
90 if (shard_index >= dbs_seen.size() || !dbs_seen[shard_index]) {
91 if (shard_index >= dbs_seen.size()) {
92 dbs_seen.resize(shard_index + 1);
94 dbs_seen[shard_index] =
true;
107 rcollection_freq = 0;
159 bool use_exact_termfreq_,
160 bool want_collection_freq_)
161 : db(db_), dbsize(db.get_doccount()), avlen(db.get_avlength()),
162 rsize(rsize_), collection_freq(0),
163 collection_len(avlen * dbsize + .5),
164 use_exact_termfreq(use_exact_termfreq_),
165 want_collection_freq(want_collection_freq_),
179 bool use_exact_termfreq_,
180 bool want_collection_freq_,
182 : db(db_), dbsize(db.get_doccount()), avlen(db.get_avlength()),
183 rsize(rsize_), collection_freq(0),
184 collection_len(avlen * dbsize + .5),
185 use_exact_termfreq(use_exact_termfreq_),
186 want_collection_freq(want_collection_freq_),
187 stats(avlen, expand_k_) {}
193 void collect_stats(
TermList * merger,
const std::string & term);
196 virtual double get_weight()
const = 0;
237 bool use_exact_termfreq_,
241 double get_weight()
const;
269 bool use_exact_termfreq_)
272 double get_weight()
const;
278 #endif // XAPIAN_INCLUDED_EXPANDWEIGHT_H The Xapian namespace contains public interfaces for the Xapian library.
ExpandWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, bool want_collection_freq_, double expand_k_)
Constructor.
This class is used to access a database, or a group of databases.
Xapian::doccount rtermfreq
The number of documents from the RSet indexed by the current term (r).
Bo1EWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_)
Constructor.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Xapian::doclength avlen
Average document length in the whole database.
Abstract base class for termlists.
double expand_k
The parameter k to be used for TradWeight query expansion.
ExpandStats(Xapian::doclength avlen_)
Constructor for expansion schemes which do not require the "expand_k" parameter.
double get_avlen() const
Return the average length of the database.
Xapian::doccount get_rsize() const
Return the number of documents in the RSet.
ExpandWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, bool want_collection_freq_)
Constructor.
Class for calculating ESet term weights.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Xapian::doccount rsize
The number of documents in the RSet.
Xapian::totallength collection_len
The total length of the database.
Xapian::totallength get_collection_len() const
Return the length of the collection.
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
bool use_exact_termfreq
Should we calculate the exact term frequency when generating an ESet?
ExpandStats(Xapian::doclength avlen_, double expand_k_)
Constructor for expansion schemes which require the "expand_k" parameter.
TradEWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, double expand_k_)
Constructor.
std::vector< bool > dbs_seen
Which databases in a multidb are included in termfreq.
double doclength
A normalised document length.
Xapian::termcount rcollection_freq
The number of times the term occurs in the rset.
double multiplier
The multiplier to be used in TradWeight query expansion.
API for working with Xapian databases.
const Xapian::Database db
The combined database.
Xapian::doclength avlen
Average document length in the whole database.
Xapian::doccount dbsize
The number of documents in the whole database.
Xapian::termcount get_collection_freq() const
Return the collection frequency of the term.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
ExpandStats stats
An ExpandStats object to accumulate statistics.
Collates statistics while calculating term weight in an ESet.
Xapian::doccount termfreq
Term frequency (for a multidb, may be for a subset of the databases).
This class implements the TradWeight scheme for query expansion.
bool want_collection_freq
Does the expansion scheme use collection frequency?
Abstract base class for termlists.
Xapian::termcount collection_freq
The collection frequency of the term.
Xapian::doccount get_dbsize() const
Return the size of the database.
Xapian::doccount dbsize
Size of the subset of a multidb to which the value in termfreq applies.
This class implements the Bo1 scheme for query expansion.