22 #ifndef XAPIAN_INCLUDED_EXPANDWEIGHT_H    23 #define XAPIAN_INCLUDED_EXPANDWEIGHT_H    66         : avlen(avlen_), expand_k(0), dbsize(0), termfreq(0),
    67           rcollection_freq(0), rtermfreq(0), multiplier(0) {
    72         : avlen(avlen_), expand_k(expand_k_), dbsize(0), termfreq(0),
    73           rcollection_freq(0), rtermfreq(0), multiplier(0) {
    82         if (wdf == 0) wdf = 1;
    84         rcollection_freq += wdf;
    86         multiplier += (expand_k + 1) * wdf / (expand_k * doclen / avlen + wdf);
    90         if (shard_index >= dbs_seen.size() || !dbs_seen[shard_index]) {
    91             if (shard_index >= dbs_seen.size()) {
    92                 dbs_seen.resize(shard_index + 1);
    94             dbs_seen[shard_index] = 
true;
   107         rcollection_freq = 0;
   159                  bool use_exact_termfreq_,
   160                  bool want_collection_freq_)
   161         : db(db_), dbsize(db.get_doccount()), avlen(db.get_avlength()),
   162           rsize(rsize_), collection_freq(0),
   163           collection_len(avlen * dbsize + .5),
   164           use_exact_termfreq(use_exact_termfreq_),
   165           want_collection_freq(want_collection_freq_),
   179                  bool use_exact_termfreq_,
   180                  bool want_collection_freq_,
   182         : db(db_), dbsize(db.get_doccount()), avlen(db.get_avlength()),
   183           rsize(rsize_), collection_freq(0),
   184           collection_len(avlen * dbsize + .5),
   185           use_exact_termfreq(use_exact_termfreq_),
   186           want_collection_freq(want_collection_freq_),
   187           stats(avlen, expand_k_) {}
   193     void collect_stats(
TermList * merger, 
const std::string & term);
   196     virtual double get_weight() 
const = 0;
   237                 bool use_exact_termfreq_,
   241     double get_weight() 
const;
   269                bool use_exact_termfreq_)
   272     double get_weight() 
const;
   278 #endif // XAPIAN_INCLUDED_EXPANDWEIGHT_H The Xapian namespace contains public interfaces for the Xapian library. 
 
ExpandWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, bool want_collection_freq_, double expand_k_)
Constructor. 
 
This class is used to access a database, or a group of databases. 
 
Xapian::doccount rtermfreq
The number of documents from the RSet indexed by the current term (r). 
 
Bo1EWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_)
Constructor. 
 
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database. 
 
Xapian::doclength avlen
Average document length in the whole database. 
 
Abstract base class for termlists. 
 
double expand_k
The parameter k to be used for TradWeight query expansion. 
 
ExpandStats(Xapian::doclength avlen_)
Constructor for expansion schemes which do not require the "expand_k" parameter. 
 
double get_avlen() const
Return the average length of the database. 
 
Xapian::doccount get_rsize() const
Return the number of documents in the RSet. 
 
ExpandWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, bool want_collection_freq_)
Constructor. 
 
Class for calculating ESet term weights. 
 
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms. 
 
Xapian::doccount rsize
The number of documents in the RSet. 
 
Xapian::totallength collection_len
The total length of the database. 
 
Xapian::totallength get_collection_len() const
Return the length of the collection. 
 
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
 
bool use_exact_termfreq
Should we calculate the exact term frequency when generating an ESet? 
 
ExpandStats(Xapian::doclength avlen_, double expand_k_)
Constructor for expansion schemes which require the "expand_k" parameter. 
 
TradEWeight(const Xapian::Database &db_, Xapian::doccount rsize_, bool use_exact_termfreq_, double expand_k_)
Constructor. 
 
std::vector< bool > dbs_seen
Which databases in a multidb are included in termfreq. 
 
double doclength
A normalised document length. 
 
Xapian::termcount rcollection_freq
The number of times the term occurs in the rset. 
 
double multiplier
The multiplier to be used in TradWeight query expansion. 
 
API for working with Xapian databases. 
 
const Xapian::Database db
The combined database. 
 
Xapian::doclength avlen
Average document length in the whole database. 
 
Xapian::doccount dbsize
The number of documents in the whole database. 
 
Xapian::termcount get_collection_freq() const
Return the collection frequency of the term. 
 
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents. 
 
ExpandStats stats
An ExpandStats object to accumulate statistics. 
 
Collates statistics while calculating term weight in an ESet. 
 
Xapian::doccount termfreq
Term frequency (for a multidb, may be for a subset of the databases). 
 
This class implements the TradWeight scheme for query expansion. 
 
bool want_collection_freq
Does the expansion scheme use collection frequency? 
 
Abstract base class for termlists. 
 
Xapian::termcount collection_freq
The collection frequency of the term. 
 
Xapian::doccount get_dbsize() const
Return the size of the database. 
 
Xapian::doccount dbsize
Size of the subset of a multidb to which the value in termfreq applies. 
 
This class implements the Bo1 scheme for query expansion.