00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025 #include "phrasepostlist.h"
00026
00027 #include "debuglog.h"
00028 #include "positionlist.h"
00029 #include "omassert.h"
00030 #include "str.h"
00031
00032 #include <algorithm>
00033
00037 class PositionListCmpLt {
00038 public:
00041 bool operator()(const PositionList *a, const PositionList *b) {
00042 return a->get_size() < b->get_size();
00043 }
00044 };
00045
00046
00049 bool
00050 NearPostList::test_doc()
00051 {
00052 LOGCALL(MATCH, bool, "NearPostList::test_doc", NO_ARGS);
00053 std::vector<PositionList *> plists;
00054
00055 std::vector<PostList *>::const_iterator i;
00056 for (i = terms.begin(); i != terms.end(); i++) {
00057 PositionList * p = (*i)->read_position_list();
00058
00059 if (!p) return false;
00060 plists.push_back(p);
00061 }
00062
00063 std::sort(plists.begin(), plists.end(), PositionListCmpLt());
00064
00065 Xapian::termpos pos;
00066 do {
00067 plists[0]->next();
00068 if (plists[0]->at_end()) RETURN(false);
00069 pos = plists[0]->get_position();
00070 } while (!do_test(plists, 1, pos, pos));
00071
00072 RETURN(true);
00073 }
00074
00075 bool
00076 NearPostList::do_test(std::vector<PositionList *> &plists, Xapian::termcount i,
00077 Xapian::termcount min, Xapian::termcount max)
00078 {
00079 LOGCALL(MATCH, bool, "NearPostList::do_test", plists | i | min | max);
00080 LOGLINE(MATCH, "docid = " << get_docid() << ", window = " << window);
00081 Xapian::termcount tmp = max + 1;
00082
00083 if (window <= tmp) tmp -= window; else tmp = 0;
00084 plists[i]->skip_to(tmp);
00085 while (!plists[i]->at_end()) {
00086 Xapian::termpos pos = plists[i]->get_position();
00087 LOGLINE(MATCH, "[" << i << "]: " << max - window + 1 << " " << min <<
00088 " " << pos << " " << max << " " << min + window - 1);
00089 if (pos > min + window - 1) RETURN(false);
00090 if (i + 1 == plists.size()) RETURN(true);
00091 if (pos < min) min = pos;
00092 else if (pos > max) max = pos;
00093 if (do_test(plists, i + 1, min, max)) RETURN(true);
00094 plists[i]->next();
00095 }
00096 RETURN(false);
00097 }
00098
00099 Xapian::termcount
00100 NearPostList::get_wdf() const
00101 {
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142 std::vector<PostList *>::const_iterator i = terms.begin();
00143 Xapian::termcount wdf = (*i)->get_wdf();
00144 for (; i != terms.end(); i++) {
00145 wdf = std::min(wdf, (*i)->get_wdf());
00146 }
00147
00148
00149
00150 return std::max(wdf, Xapian::termcount(1));
00151 }
00152
00153 TermFreqs
00154 NearPostList::get_termfreq_est_using_stats(
00155 const Xapian::Weight::Internal & stats) const
00156 {
00157 LOGCALL(MATCH, TermFreqs, "NearPostList::get_termfreq_est_using_stats", stats);
00158
00159 TermFreqs result(source->get_termfreq_est_using_stats(stats));
00160 result.termfreq /= 2;
00161 result.reltermfreq /= 2;
00162 RETURN(result);
00163 }
00164
00165 std::string
00166 NearPostList::get_description() const
00167 {
00168 return "(Near " + str(window) + " " + source->get_description() + ")";
00169 }
00170
00171
00172
00175 bool
00176 PhrasePostList::test_doc()
00177 {
00178 LOGCALL(MATCH, bool, "PhrasePostList::test_doc", NO_ARGS);
00179 std::vector<PositionList *> plists;
00180
00181 std::vector<PostList *>::const_iterator i;
00182 for (i = terms.begin(); i != terms.end(); i++) {
00183 PositionList * p = (*i)->read_position_list();
00184
00185 if (!p) return false;
00186 p->index = i - terms.begin();
00187 plists.push_back(p);
00188 }
00189
00190 std::sort(plists.begin(), plists.end(), PositionListCmpLt());
00191
00192 Xapian::termpos pos;
00193 Xapian::termpos idx, min;
00194 do {
00195 plists[0]->next();
00196 if (plists[0]->at_end()) {
00197 LOGLINE(MATCH, "--MISS--");
00198 RETURN(false);
00199 }
00200 pos = plists[0]->get_position();
00201 idx = plists[0]->index;
00202 min = pos + plists.size() - idx;
00203 if (min > window) min -= window; else min = 0;
00204 } while (!do_test(plists, 1, min, pos + window - idx));
00205 LOGLINE(MATCH, "**HIT**");
00206 RETURN(true);
00207 }
00208
00209 bool
00210 PhrasePostList::do_test(std::vector<PositionList *> &plists, Xapian::termcount i,
00211 Xapian::termcount min, Xapian::termcount max)
00212 {
00213 LOGCALL(MATCH, bool, "PhrasePostList::do_test", plists | i | min | max);
00214 LOGLINE(MATCH, "docid = " << get_docid() << ", window = " << window);
00215 Xapian::termpos idxi = plists[i]->index;
00216 LOGLINE(MATCH, "my idx in phrase is " << idxi);
00217
00218 Xapian::termpos mymin = min + idxi;
00219 Xapian::termpos mymax = max - plists.size() + idxi;
00220 LOGLINE(MATCH, "MIN = " << mymin << " MAX = " << mymax);
00221
00222
00223 for (Xapian::termcount j = 0; j < i; j++) {
00224 Xapian::termpos idxj = plists[j]->index;
00225 if (idxj > idxi) {
00226 Xapian::termpos tmp = plists[j]->get_position() + idxj - idxi;
00227 LOGLINE(MATCH, "ABOVE " << tmp);
00228 if (tmp < mymax) mymax = tmp;
00229 } else {
00230 AssertRel(idxi, !=, idxj);
00231 Xapian::termpos tmp = plists[j]->get_position() + idxi - idxj;
00232 LOGLINE(MATCH, "BELOW " << tmp);
00233 if (tmp > mymin) mymin = tmp;
00234 }
00235 LOGLINE(MATCH, "min = " << mymin << " max = " << mymax);
00236 }
00237 plists[i]->skip_to(mymin);
00238
00239 while (!plists[i]->at_end()) {
00240 Xapian::termpos pos = plists[i]->get_position();
00241 LOGLINE(MATCH, " " << mymin << " " << pos << " " << mymax);
00242 if (pos > mymax) RETURN(false);
00243 if (i + 1 == plists.size()) RETURN(true);
00244 Xapian::termpos tmp = pos + window - idxi;
00245 if (tmp < max) max = tmp;
00246 tmp = pos + plists.size() - idxi;
00247 if (tmp > window) {
00248 tmp -= window;
00249 if (tmp > min) min = tmp;
00250 }
00251 if (do_test(plists, i + 1, min, max)) RETURN(true);
00252 plists[i]->next();
00253 }
00254 RETURN(false);
00255 }
00256
00257 Xapian::termcount
00258 PhrasePostList::get_wdf() const
00259 {
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269 std::vector<PostList *>::const_iterator i = terms.begin();
00270 Xapian::termcount wdf = (*i)->get_wdf();
00271 for (; i != terms.end(); i++) {
00272 wdf = std::min(wdf, (*i)->get_wdf());
00273 }
00274
00275
00276
00277 return std::max(wdf / 2, Xapian::termcount(1));
00278 }
00279
00280 TermFreqs
00281 PhrasePostList::get_termfreq_est_using_stats(
00282 const Xapian::Weight::Internal & stats) const
00283 {
00284 LOGCALL(MATCH, TermFreqs, "PhrasePostList::get_termfreq_est_using_stats", stats);
00285
00286 TermFreqs result(source->get_termfreq_est_using_stats(stats));
00287 result.termfreq /= 3;
00288 result.reltermfreq /= 3;
00289 RETURN(result);
00290 }
00291
00292 std::string
00293 PhrasePostList::get_description() const
00294 {
00295 return "(Phrase " + str(window) + " "
00296 + source->get_description() + ")";
00297 }