36 for (
size_t i = 0; i < n_kids; ++i) {
49 bool all_exact = (max == plist[0]->get_termfreq_min());
50 unsigned overflow = 0;
51 for (
size_t i = 1; i < n_kids; ++i) {
53 if (tf_max > max) max = tf_max;
61 all_exact = (tf_max == plist[i]->get_termfreq_min());
69 for (
size_t i = 0; i < n_kids; ++i) {
75 if (overflow == 0 || all_the_rest > sum) {
76 if (tf_min > all_the_rest) {
77 result = std::max(result, tf_min - all_the_rest);
83 if (all_exact && result == 0) {
97 bool all_exact = (result == plist[0]->get_termfreq_min());
98 bool overflow =
false;
99 for (
size_t i = 1; i < n_kids; ++i) {
104 if (result < old_result)
107 all_exact = (tf_max == plist[i]->get_termfreq_min());
108 if (!all_exact && (overflow || result >= db_size))
111 if (all_exact && (overflow || result > db_size)) {
115 return db_size - ((result & 1) != (db_size & 1));
129 double scale = 1.0 / db_size;
130 double P_est = plist[0]->get_termfreq_est() * scale;
131 for (
size_t i = 1; i < n_kids; ++i) {
132 double P_i = plist[i]->get_termfreq_est() * scale;
133 P_est += P_i - 2.0 * P_est * P_i;
142 LOGCALL(MATCH,
TermFreqs,
"MultiXorPostList::get_termfreq_est_using_stats", stats);
146 TermFreqs freqs(plist[0]->get_termfreq_est_using_stats(stats));
151 double P_est = freqs.
termfreq * scale;
152 double rtf_scale = 0.0;
158 double cf_scale = 0.0;
162 double Pc_est = freqs.
collfreq * cf_scale;
164 for (
size_t i = 1; i < n_kids; ++i) {
165 freqs = plist[i]->get_termfreq_est_using_stats(stats);
166 double P_i = freqs.
termfreq * scale;
167 P_est += P_i - 2.0 * P_est * P_i;
168 double Pc_i = freqs.
collfreq * cf_scale;
169 Pc_est += Pc_i - 2.0 * Pc_est * Pc_i;
174 Pr_est += Pr_i - 2.0 * Pr_est * Pr_i;
185 LOGCALL(MATCH,
double,
"MultiXorPostList::get_maxweight", NO_ARGS);
200 bool doclength_set =
false;
201 for (
size_t i = 0; i < n_kids; ++i) {
202 if (plist[i]->get_docid() == did) {
204 AssertEq(doclength, plist[i]->get_doclength());
206 doclength = plist[i]->get_doclength();
207 doclength_set =
true;
220 bool unique_terms_set =
false;
221 for (
size_t i = 0; i < n_kids; ++i) {
222 if (plist[i]->get_docid() == did) {
223 if (unique_terms_set) {
224 AssertEq(unique_terms, plist[i]->get_unique_terms());
226 unique_terms = plist[i]->get_unique_terms();
227 unique_terms_set =
true;
240 for (
size_t i = 0; i < n_kids; ++i) {
241 if (plist[i]->get_docid() == did)
242 result += plist[i]->get_weight();
256 LOGCALL(MATCH,
double,
"MultiXorPostList::recalc_maxweight", NO_ARGS);
257 max_total = plist[0]->recalc_maxweight();
258 double min_max = max_total;
259 for (
size_t i = 1; i < n_kids; ++i) {
260 double new_max = plist[i]->recalc_maxweight();
261 if (new_max < min_max)
263 max_total += new_max;
265 if ((n_kids & 1) == 0) {
267 max_total -= min_max;
278 size_t matching_count = 0;
279 for (
size_t i = 0; i < n_kids; ++i) {
280 if (old_did == 0 || plist[i]->get_docid() <= old_did) {
289 if (plist[i]->at_end()) {
296 if (did == 0 || new_did < did) {
299 }
else if (new_did == did) {
314 if (matching_count & 1)
324 LOGCALL(MATCH,
PostList *,
"MultiXorPostList::skip_to", did_min | w_min);
327 size_t matching_count = 0;
328 for (
size_t i = 0; i < n_kids; ++i) {
329 if (old_did == 0 || plist[i]->get_docid() < did_min) {
338 if (plist[i]->at_end()) {
345 if (did == 0 || new_did < did) {
348 }
else if (new_did == did) {
364 if (matching_count & 1)
375 desc += plist[0]->get_description();
376 for (
size_t i = 1; i < n_kids; ++i) {
378 desc += plist[i]->get_description();
388 for (
size_t i = 0; i < n_kids; ++i) {
389 if (plist[i]->get_docid() == did)
390 totwdf += plist[i]->get_wdf();
399 for (
size_t i = 0; i < n_kids; ++i) {
400 if (plist[i]->get_docid() == did)
401 total += plist[i]->count_matching_subqs();
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Abstract base class for postlists.
bool at_end() const
Return true if the current position is past the last entry in this list.
Xapian::doccount get_termfreq_est() const
Get an estimate of the number of documents indexed by this term.
Xapian::docid get_docid() const
Return the current docid.
virtual Internal * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual double recalc_maxweight()=0
Recalculate the upper bound on what get_weight() can return.
Xapian::doccount termfreq
Xapian::doccount collection_size
Number of documents in the collection.
Xapian::doccount rset_size
Number of relevant documents in the collection.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Xapian::doccount get_termfreq_max() const
Get an upper bound on the number of documents indexed by this term.
double doclength
A normalised document length.
Xapian::doccount get_termfreq_min() const
Get a lower bound on the number of documents indexed by this term.
Class to hold statistics for a given collection.
Internal * next()
Advance the current position to the next document in the postlist.
Xapian::termcount collfreq
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
double get_weight() const
Return the weight contribution for the current position.
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
The frequencies for a term.
virtual Internal * next(double w_min)=0
Advance the current position to the next document in the postlist.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Xapian::termcount get_doclength() const
Return the length of current document.
std::string get_description() const
Return a string description of this object.
Various assertion macros.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Xapian::totallength total_length
Total length of all documents in the collection.
TermFreqs get_termfreq_est_using_stats(const Xapian::Weight::Internal &stats) const
Get an estimate for the termfreq and reltermfreq, given the stats.
Xapian::doccount reltermfreq
Xapian::termcount get_wdf() const
get_wdf() for MultiXorPostlists returns the sum of the wdfs of the sub postlists which match the curr...
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
double get_maxweight() const
Return an upper bound on what get_weight() can return.