00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "multixorpostlist.h"
00025
00026 #include "debuglog.h"
00027 #include "multimatch.h"
00028 #include "omassert.h"
00029
00030 using namespace std;
00031
00032 MultiXorPostList::~MultiXorPostList()
00033 {
00034 if (plist) {
00035 for (size_t i = 0; i < n_kids; ++i) {
00036 delete plist[i];
00037 }
00038 delete [] plist;
00039 }
00040 }
00041
00042 Xapian::doccount
00043 MultiXorPostList::get_termfreq_min() const
00044 {
00045
00046
00047
00048
00049
00050
00051 return 0;
00052 }
00053
00054 Xapian::doccount
00055 MultiXorPostList::get_termfreq_max() const
00056 {
00057
00058 Xapian::doccount result = plist[0]->get_termfreq_max();
00059 bool all_exact = (result == plist[0]->get_termfreq_min());
00060 bool overflow = false;
00061 for (size_t i = 1; i < n_kids; ++i) {
00062 Xapian::doccount tf_max = plist[i]->get_termfreq_max();
00063 Xapian::doccount old_result = result;
00064 result += tf_max;
00065
00066 if (result < old_result)
00067 overflow = true;
00068 if (all_exact)
00069 all_exact = (tf_max == plist[i]->get_termfreq_min());
00070 if (!all_exact && (overflow || result >= db_size))
00071 return db_size;
00072 }
00073 if (all_exact && (overflow || result > db_size)) {
00074
00075
00076
00077 return db_size - ((result & 1) == (db_size & 1));
00078 }
00079 return result;
00080 }
00081
00082 Xapian::doccount
00083 MultiXorPostList::get_termfreq_est() const
00084 {
00085 if (rare(db_size == 0))
00086 RETURN(0);
00087
00088
00089
00090 double scale = 1.0 / db_size;
00091 double P_est = plist[0]->get_termfreq_est() * scale;
00092 for (size_t i = 1; i < n_kids; ++i) {
00093 double P_i = plist[i]->get_termfreq_est() * scale;
00094 P_est += P_i - 2.0 * P_est * P_i;
00095 }
00096 return static_cast<Xapian::doccount>(P_est * db_size + 0.5);
00097 }
00098
00099 TermFreqs
00100 MultiXorPostList::get_termfreq_est_using_stats(
00101 const Xapian::Weight::Internal & stats) const
00102 {
00103 LOGCALL(MATCH, TermFreqs, "MultiXorPostList::get_termfreq_est_using_stats", stats);
00104
00105
00106
00107 TermFreqs freqs(plist[0]->get_termfreq_est_using_stats(stats));
00108
00109
00110 Assert(stats.collection_size);
00111 double scale = 1.0 / stats.collection_size;
00112 double P_est = freqs.termfreq * scale;
00113 double Pr_est = freqs.reltermfreq * scale;
00114
00115 for (size_t i = 1; i < n_kids; ++i) {
00116 double P_i = freqs.termfreq * scale;
00117 P_est += P_i - 2.0 * P_est * P_i;
00118
00119
00120 if (stats.rset_size != 0) {
00121 double Pr_i = freqs.reltermfreq / stats.rset_size;
00122 Pr_est += Pr_i - 2.0 * Pr_est * Pr_i;
00123 }
00124 }
00125 RETURN(TermFreqs(Xapian::doccount(P_est * stats.collection_size + 0.5),
00126 Xapian::doccount(Pr_est * stats.rset_size + 0.5)));
00127 }
00128
00129 Xapian::weight
00130 MultiXorPostList::get_maxweight() const
00131 {
00132 LOGCALL(MATCH, Xapian::weight, "MultiXorPostList::get_maxweight", NO_ARGS);
00133 RETURN(max_total);
00134 }
00135
00136 Xapian::docid
00137 MultiXorPostList::get_docid() const
00138 {
00139 return did;
00140 }
00141
00142 Xapian::termcount
00143 MultiXorPostList::get_doclength() const
00144 {
00145 Assert(did);
00146 Xapian::termcount doclength = 0;
00147 bool doclength_set = false;
00148 for (size_t i = 0; i < n_kids; ++i) {
00149 if (plist[i]->get_docid() == did) {
00150 if (doclength_set) {
00151 AssertEq(doclength, plist[i]->get_doclength());
00152 } else {
00153 doclength = plist[i]->get_doclength();
00154 doclength_set = true;
00155 }
00156 }
00157 }
00158 Assert(doclength_set);
00159 return doclength;
00160 }
00161
00162 Xapian::weight
00163 MultiXorPostList::get_weight() const
00164 {
00165 Assert(did);
00166 Xapian::weight result = 0;
00167 for (size_t i = 0; i < n_kids; ++i) {
00168 if (plist[i]->get_docid() == did)
00169 result += plist[i]->get_weight();
00170 }
00171 return result;
00172 }
00173
00174 bool
00175 MultiXorPostList::at_end() const
00176 {
00177 return (did == 0);
00178 }
00179
00180 Xapian::weight
00181 MultiXorPostList::recalc_maxweight()
00182 {
00183 LOGCALL(MATCH, Xapian::weight, "MultiXorPostList::recalc_maxweight", NO_ARGS);
00184 max_total = plist[0]->recalc_maxweight();
00185 double min_max = max_total;
00186 for (size_t i = 1; i < n_kids; ++i) {
00187 Xapian::weight new_max = plist[i]->recalc_maxweight();
00188 if (new_max < min_max)
00189 min_max = new_max;
00190 max_total += new_max;
00191 }
00192 if ((n_kids & 1) == 0) {
00193
00194 max_total -= min_max;
00195 }
00196 RETURN(max_total);
00197 }
00198
00199 PostList *
00200 MultiXorPostList::next(Xapian::weight w_min)
00201 {
00202 LOGCALL(MATCH, PostList *, "MultiXorPostList::next", w_min);
00203 Xapian::docid old_did = did;
00204 did = 0;
00205 size_t matching_count = 0;
00206 for (size_t i = 0; i < n_kids; ++i) {
00207 if (old_did == 0 || plist[i]->get_docid() <= old_did) {
00208
00209 PostList * res = plist[i]->next(0);
00210 if (res) {
00211 delete plist[i];
00212 plist[i] = res;
00213 matcher->recalc_maxweight();
00214 }
00215
00216 if (plist[i]->at_end()) {
00217 erase_sublist(i--);
00218 continue;
00219 }
00220 }
00221
00222 Xapian::docid new_did = plist[i]->get_docid();
00223 if (did == 0 || new_did < did) {
00224 did = new_did;
00225 matching_count = 1;
00226 } else if (new_did == did) {
00227 ++matching_count;
00228 }
00229 }
00230
00231 if (n_kids == 1) {
00232 n_kids = 0;
00233 RETURN(plist[0]);
00234 }
00235
00236
00237 if (did == 0)
00238 RETURN(NULL);
00239
00240
00241 if (matching_count & 1)
00242 RETURN(NULL);
00243
00244
00245 RETURN(next(w_min));
00246 }
00247
00248 PostList *
00249 MultiXorPostList::skip_to(Xapian::docid did_min, Xapian::weight w_min)
00250 {
00251 LOGCALL(MATCH, PostList *, "MultiXorPostList::skip_to", did_min | w_min);
00252 Xapian::docid old_did = did;
00253 did = 0;
00254 size_t matching_count = 0;
00255 for (size_t i = 0; i < n_kids; ++i) {
00256 if (old_did == 0 || plist[i]->get_docid() < did_min) {
00257
00258 PostList * res = plist[i]->skip_to(did_min, 0);
00259 if (res) {
00260 delete plist[i];
00261 plist[i] = res;
00262 matcher->recalc_maxweight();
00263 }
00264
00265 if (plist[i]->at_end()) {
00266 erase_sublist(i--);
00267 continue;
00268 }
00269 }
00270
00271 Xapian::docid new_did = plist[i]->get_docid();
00272 if (did == 0 || new_did < did) {
00273 did = new_did;
00274 matching_count = 1;
00275 } else if (new_did == did) {
00276 ++matching_count;
00277 }
00278 }
00279
00280 if (n_kids == 1) {
00281 AssertEq(matching_count, 1);
00282 n_kids = 0;
00283 RETURN(plist[0]);
00284 }
00285
00286
00287 if (did == 0)
00288 RETURN(NULL);
00289
00290
00291 if (matching_count & 1)
00292 RETURN(NULL);
00293
00294
00295 RETURN(next(w_min));
00296 }
00297
00298 string
00299 MultiXorPostList::get_description() const
00300 {
00301 string desc("(");
00302 desc += plist[0]->get_description();
00303 for (size_t i = 1; i < n_kids; ++i) {
00304 desc += " XOR ";
00305 desc += plist[i]->get_description();
00306 }
00307 desc += ')';
00308 return desc;
00309 }
00310
00311 Xapian::termcount
00312 MultiXorPostList::get_wdf() const
00313 {
00314 Xapian::termcount totwdf = 0;
00315 for (size_t i = 0; i < n_kids; ++i) {
00316 if (plist[i]->get_docid() == did)
00317 totwdf += plist[i]->get_wdf();
00318 }
00319 return totwdf;
00320 }
00321
00322 Xapian::termcount
00323 MultiXorPostList::count_matching_subqs() const
00324 {
00325 Xapian::termcount total = 0;
00326 for (size_t i = 0; i < n_kids; ++i) {
00327 if (plist[i]->get_docid() == did)
00328 total += plist[i]->count_matching_subqs();
00329 }
00330 return total;
00331 }