00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <config.h>
00026 #include "orpostlist.h"
00027
00028 #include "debuglog.h"
00029 #include "multiandpostlist.h"
00030 #include "andmaybepostlist.h"
00031 #include "omassert.h"
00032
00033 #include <algorithm>
00034
00035 OrPostList::OrPostList(PostList *left_,
00036 PostList *right_,
00037 MultiMatch *matcher_,
00038 Xapian::doccount dbsize_)
00039 : BranchPostList(left_, right_, matcher_),
00040 lhead(0), rhead(0), lvalid(false), rvalid(false),
00041 lmax(0), rmax(0), minmax(0), dbsize(dbsize_)
00042 {
00043 LOGCALL_CTOR(MATCH, "OrPostList", left_ | right_ | matcher_ | dbsize_);
00044 AssertRel(left_->get_termfreq_est(),>=,right_->get_termfreq_est());
00045 }
00046
00047 PostList *
00048 OrPostList::next(Xapian::weight w_min)
00049 {
00050 LOGCALL(MATCH, PostList *, "OrPostList::next", w_min);
00051 if (w_min > minmax) {
00052
00053 PostList *ret;
00054 if (w_min > lmax) {
00055 if (w_min > rmax) {
00056 LOGLINE(MATCH, "OR -> AND");
00057 ret = new MultiAndPostList(l, r, lmax, rmax, matcher, dbsize);
00058 Xapian::docid newdocid = std::max(lhead, rhead);
00059 if (newdocid == 0 || (lvalid && rvalid && lhead == rhead)) {
00060 ++newdocid;
00061 }
00062 skip_to_handling_prune(ret, newdocid, w_min, matcher);
00063 } else {
00064 LOGLINE(MATCH, "OR -> AND MAYBE (1)");
00065 AndMaybePostList * ret2 =
00066 new AndMaybePostList(r, l, matcher, dbsize, rhead, lhead);
00067 ret = ret2;
00068
00069
00070 if (rhead <= lhead) {
00071 next_handling_prune(ret, w_min, matcher);
00072 } else {
00073 handle_prune(ret, ret2->sync_rhs(w_min));
00074 }
00075 }
00076 } else {
00077
00078 Assert(w_min > rmax);
00079 LOGLINE(MATCH, "OR -> AND MAYBE (2)");
00080 AndMaybePostList * ret2 =
00081 new AndMaybePostList(l, r, matcher, dbsize, lhead, rhead);
00082 ret = ret2;
00083 if (lhead <= rhead) {
00084 next_handling_prune(ret, w_min, matcher);
00085 } else {
00086 handle_prune(ret, ret2->sync_rhs(w_min));
00087 }
00088 }
00089
00090 l = r = NULL;
00091 RETURN(ret);
00092 }
00093
00094 bool ldry = false;
00095 bool rnext = !rvalid;
00096
00097 if (!lvalid || lhead <= rhead) {
00098 if (lhead == rhead) rnext = true;
00099 next_handling_prune(l, w_min - rmax, matcher);
00100 lvalid = true;
00101 if (l->at_end()) ldry = true;
00102 } else {
00103 rnext = true;
00104 }
00105
00106 if (rnext) {
00107 next_handling_prune(r, w_min - lmax, matcher);
00108 rvalid = true;
00109 if (r->at_end()) {
00110 PostList *ret = l;
00111 l = NULL;
00112 RETURN(ret);
00113 }
00114 rhead = r->get_docid();
00115 }
00116
00117 if (!ldry) {
00118 lhead = l->get_docid();
00119 RETURN(NULL);
00120 }
00121
00122 PostList *ret = r;
00123 r = NULL;
00124 RETURN(ret);
00125 }
00126
00127 PostList *
00128 OrPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00129 {
00130 LOGCALL(MATCH, PostList *, "OrPostList::skip_to", did | w_min);
00131 if (w_min > minmax) {
00132
00133 PostList *ret;
00134 if (w_min > lmax) {
00135 if (w_min > rmax) {
00136 LOGLINE(MATCH, "OR -> AND (in skip_to)");
00137 ret = new MultiAndPostList(l, r, lmax, rmax, matcher, dbsize);
00138 did = std::max(did, std::max(lhead, rhead));
00139 } else {
00140 LOGLINE(MATCH, "OR -> AND MAYBE (in skip_to) (1)");
00141 AndMaybePostList * ret2 =
00142 new AndMaybePostList(r, l, matcher, dbsize, rhead, lhead);
00143 ret = ret2;
00144 handle_prune(ret, ret2->sync_rhs(w_min));
00145 did = std::max(did, rhead);
00146 }
00147 } else {
00148
00149 Assert(w_min > rmax);
00150 LOGLINE(MATCH, "OR -> AND MAYBE (in skip_to) (2)");
00151 AndMaybePostList * ret2 =
00152 new AndMaybePostList(l, r, matcher, dbsize, lhead, rhead);
00153 ret = ret2;
00154 handle_prune(ret, ret2->sync_rhs(w_min));
00155 did = std::max(did, lhead);
00156 }
00157
00158 l = r = NULL;
00159 skip_to_handling_prune(ret, did, w_min, matcher);
00160 RETURN(ret);
00161 }
00162
00163 bool ldry = false;
00164 if (lhead < did) {
00165 skip_to_handling_prune(l, did, w_min - rmax, matcher);
00166 lvalid = true;
00167 ldry = l->at_end();
00168 }
00169
00170 if (rhead < did) {
00171 skip_to_handling_prune(r, did, w_min - lmax, matcher);
00172 rvalid = true;
00173
00174 if (r->at_end()) {
00175 PostList *ret = l;
00176 l = NULL;
00177 RETURN(ret);
00178 }
00179 rhead = r->get_docid();
00180 }
00181
00182 if (!ldry) {
00183 lhead = l->get_docid();
00184 RETURN(NULL);
00185 }
00186
00187 PostList *ret = r;
00188 r = NULL;
00189 RETURN(ret);
00190 }
00191
00192 PostList *
00193 OrPostList::check(Xapian::docid did, Xapian::weight w_min, bool &valid)
00194 {
00195 LOGCALL(MATCH, PostList *, "OrPostList::check", did | w_min);
00196 if (w_min > minmax) {
00197
00198 PostList *ret;
00199 if (w_min > lmax) {
00200 if (w_min > rmax) {
00201 LOGLINE(MATCH, "OR -> AND (in check)");
00202 ret = new MultiAndPostList(l, r, lmax, rmax, matcher, dbsize);
00203 did = std::max(did, std::max(lhead, rhead));
00204 } else {
00205 LOGLINE(MATCH, "OR -> AND MAYBE (in check) (1)");
00206 AndMaybePostList * ret2 = new AndMaybePostList(r, l, matcher, dbsize, rhead, lhead);
00207 ret = ret2;
00208 handle_prune(ret, ret2->sync_rhs(w_min));
00209 did = std::max(did, rhead);
00210 }
00211 } else {
00212
00213 Assert(w_min > rmax);
00214 LOGLINE(MATCH, "OR -> AND MAYBE (in check) (2)");
00215 AndMaybePostList * ret2 = new AndMaybePostList(l, r, matcher, dbsize, lhead, rhead);
00216 ret = ret2;
00217 handle_prune(ret, ret2->sync_rhs(w_min));
00218 did = std::max(did, lhead);
00219 }
00220
00221 l = r = NULL;
00222 check_handling_prune(ret, did, w_min, matcher, valid);
00223 RETURN(ret);
00224 }
00225
00226 bool ldry = false;
00227 if (!lvalid || lhead < did) {
00228 lvalid = false;
00229 check_handling_prune(l, did, w_min - rmax, matcher, lvalid);
00230 ldry = l->at_end();
00231 }
00232
00233 if (!rvalid || rhead <= did) {
00234 rvalid = false;
00235 check_handling_prune(r, did, w_min - lmax, matcher, rvalid);
00236
00237 if (r->at_end()) {
00238 PostList *ret = l;
00239 l = NULL;
00240 valid = lvalid;
00241 RETURN(ret);
00242 }
00243 if (rvalid) {
00244 rhead = r->get_docid();
00245 } else {
00246 rhead = did + 1;
00247 }
00248 }
00249
00250 if (!ldry) {
00251 if (lvalid) {
00252 lhead = l->get_docid();
00253 } else {
00254 lhead = did + 1;
00255 }
00256
00257 if (lhead < rhead) {
00258 valid = lvalid;
00259 } else if (rhead < lhead) {
00260 valid = rvalid;
00261 } else {
00262 valid = lvalid || rvalid;
00263 }
00264 RETURN(NULL);
00265 }
00266
00267 PostList *ret = r;
00268 r = NULL;
00269 valid = rvalid;
00270 RETURN(ret);
00271 }
00272
00273 Xapian::doccount
00274 OrPostList::get_termfreq_max() const
00275 {
00276 LOGCALL(MATCH, Xapian::doccount, "OrPostList::get_termfreq_max", NO_ARGS);
00277 RETURN(std::min(l->get_termfreq_max() + r->get_termfreq_max(), dbsize));
00278 }
00279
00280 Xapian::doccount
00281 OrPostList::get_termfreq_min() const
00282 {
00283 LOGCALL(MATCH, Xapian::doccount, "OrPostList::get_termfreq_min", NO_ARGS);
00284 RETURN(std::max(l->get_termfreq_min(), r->get_termfreq_min()));
00285 }
00286
00287 Xapian::doccount
00288 OrPostList::get_termfreq_est() const
00289 {
00290 LOGCALL(MATCH, Xapian::doccount, "OrPostList::get_termfreq_est", NO_ARGS);
00291 if (rare(dbsize == 0))
00292 RETURN(0);
00293
00294
00295 double lest = static_cast<double>(l->get_termfreq_est());
00296 double rest = static_cast<double>(r->get_termfreq_est());
00297 double est = lest + rest - (lest * rest / dbsize);
00298 RETURN(static_cast<Xapian::doccount>(est + 0.5));
00299 }
00300
00301 TermFreqs
00302 OrPostList::get_termfreq_est_using_stats(
00303 const Xapian::Weight::Internal & stats) const
00304 {
00305 LOGCALL(MATCH, TermFreqs, "OrPostList::get_termfreq_est_using_stats", stats);
00306
00307
00308 TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats));
00309 TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats));
00310
00311 double freqest, relfreqest;
00312
00313
00314 Assert(stats.collection_size);
00315
00316 freqest = lfreqs.termfreq + rfreqs.termfreq -
00317 (lfreqs.termfreq * rfreqs.termfreq / stats.collection_size);
00318
00319 if (stats.rset_size == 0) {
00320 relfreqest = 0;
00321 } else {
00322 relfreqest = lfreqs.reltermfreq + rfreqs.reltermfreq -
00323 (lfreqs.reltermfreq * rfreqs.reltermfreq / stats.rset_size);
00324 }
00325
00326 RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
00327 static_cast<Xapian::doccount>(relfreqest + 0.5)));
00328 }
00329
00330 Xapian::docid
00331 OrPostList::get_docid() const
00332 {
00333 LOGCALL(MATCH, Xapian::docid, "OrPostList::get_docid", NO_ARGS);
00334 Assert(lhead != 0 && rhead != 0);
00335 RETURN(std::min(lhead, rhead));
00336 }
00337
00338
00339 Xapian::weight
00340 OrPostList::get_weight() const
00341 {
00342 LOGCALL(MATCH, Xapian::weight, "OrPostList::get_weight", NO_ARGS);
00343 Assert(lhead != 0 && rhead != 0);
00344 if (lhead < rhead) RETURN(l->get_weight());
00345 if (lhead > rhead) RETURN(r->get_weight());
00346 RETURN(l->get_weight() + r->get_weight());
00347 }
00348
00349
00350 Xapian::weight
00351 OrPostList::get_maxweight() const
00352 {
00353 LOGCALL(MATCH, Xapian::weight, "OrPostList::get_maxweight", NO_ARGS);
00354 RETURN(lmax + rmax);
00355 }
00356
00357 Xapian::weight
00358 OrPostList::recalc_maxweight()
00359 {
00360 LOGCALL(MATCH, Xapian::weight, "OrPostList::recalc_maxweight", NO_ARGS);
00361
00362
00363
00364 lmax = l->recalc_maxweight();
00365 rmax = r->recalc_maxweight();
00366 minmax = std::min(lmax, rmax);
00367 RETURN(OrPostList::get_maxweight());
00368 }
00369
00370 bool
00371 OrPostList::at_end() const
00372 {
00373 LOGCALL(MATCH, bool, "OrPostList::at_end", NO_ARGS);
00374
00375 AssertParanoid(!(l->at_end()) && !(r->at_end()));
00376 RETURN(false);
00377 }
00378
00379 std::string
00380 OrPostList::get_description() const
00381 {
00382 return "(" + l->get_description() + " Or " + r->get_description() + ")";
00383 }
00384
00385 Xapian::termcount
00386 OrPostList::get_doclength() const
00387 {
00388 LOGCALL(MATCH, Xapian::termcount, "OrPostList::get_doclength", NO_ARGS);
00389 Xapian::termcount doclength;
00390
00391 Assert(lhead != 0 && rhead != 0);
00392 if (lhead > rhead) {
00393 doclength = r->get_doclength();
00394 LOGLINE(MATCH, "OrPostList::get_doclength() [right docid=" << rhead <<
00395 "] = " << doclength);
00396 } else {
00397 doclength = l->get_doclength();
00398 LOGLINE(MATCH, "OrPostList::get_doclength() [left docid=" << lhead <<
00399 "] = " << doclength);
00400 }
00401
00402 RETURN(doclength);
00403 }
00404
00405 Xapian::termcount
00406 OrPostList::get_wdf() const
00407 {
00408 LOGCALL(MATCH, Xapian::termcount, "OrPostList::get_wdf", NO_ARGS);
00409 if (lhead < rhead) RETURN(l->get_wdf());
00410 if (lhead > rhead) RETURN(r->get_wdf());
00411 RETURN(l->get_wdf() + r->get_wdf());
00412 }
00413
00414 Xapian::termcount
00415 OrPostList::count_matching_subqs() const
00416 {
00417 LOGCALL(MATCH, Xapian::termcount, "OrPostList::count_matching_subqs", NO_ARGS);
00418 if (lhead < rhead) RETURN(l->count_matching_subqs());
00419 if (lhead > rhead) RETURN(r->count_matching_subqs());
00420 RETURN(l->count_matching_subqs() + r->count_matching_subqs());
00421 }