00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include "exactphrasepostlist.h"
00024
00025 #include "debuglog.h"
00026 #include "positionlist.h"
00027
00028 #include <algorithm>
00029 #include <vector>
00030
00031 using namespace std;
00032
00033 ExactPhrasePostList::ExactPhrasePostList(PostList *source_,
00034 const vector<PostList*>::const_iterator &terms_begin,
00035 const vector<PostList*>::const_iterator &terms_end)
00036 : SelectPostList(source_), terms(terms_begin, terms_end)
00037 {
00038 size_t n = terms.size();
00039 poslists = new PositionList*[n];
00040 try {
00041 order = new unsigned[n];
00042 } catch (...) {
00043 delete [] poslists;
00044 throw;
00045 }
00046 for (size_t i = 0; i < n; ++i) order[i] = unsigned(i);
00047 }
00048
00049 ExactPhrasePostList::~ExactPhrasePostList()
00050 {
00051 delete [] poslists;
00052 delete [] order;
00053 }
00054
00055 void
00056 ExactPhrasePostList::start_position_list(unsigned i)
00057 {
00058 unsigned index = order[i];
00059 poslists[i] = terms[index]->read_position_list();
00060 poslists[i]->index = index;
00061 }
00062
00063 class TermCompare {
00064 vector<PostList *> & terms;
00065
00066 public:
00067 TermCompare(vector<PostList *> & terms_) : terms(terms_) { }
00068
00069 bool operator()(unsigned a, unsigned b) const {
00070 return terms[a]->get_wdf() < terms[b]->get_wdf();
00071 }
00072 };
00073
00074 bool
00075 ExactPhrasePostList::test_doc()
00076 {
00077 LOGCALL(MATCH, bool, "ExactPhrasePostList::test_doc", NO_ARGS);
00078
00079 if (terms.size() <= 1) RETURN(true);
00080
00081
00082
00083
00084
00085 sort(order, order + terms.size(), TermCompare(terms));
00086
00087
00088
00089
00090
00091 start_position_list(0);
00092 poslists[0]->skip_to(poslists[0]->index);
00093 if (poslists[0]->at_end()) RETURN(false);
00094
00095
00096
00097
00098 start_position_list(1);
00099 if (poslists[0]->get_size() < poslists[1]->get_size()) {
00100 poslists[1]->skip_to(poslists[1]->index);
00101 if (poslists[1]->at_end()) RETURN(false);
00102 swap(poslists[0], poslists[1]);
00103 }
00104
00105 unsigned read_hwm = 1;
00106 Xapian::termpos idx0 = poslists[0]->index;
00107 do {
00108 Xapian::termpos base = poslists[0]->get_position() - idx0;
00109 unsigned i = 1;
00110 while (true) {
00111 if (i > read_hwm) {
00112 read_hwm = i;
00113 start_position_list(i);
00114
00115
00116
00117 }
00118 Xapian::termpos required = base + poslists[i]->index;
00119 poslists[i]->skip_to(required);
00120 if (poslists[i]->at_end()) RETURN(false);
00121 if (poslists[i]->get_position() != required) break;
00122 if (++i == terms.size()) RETURN(true);
00123 }
00124 poslists[0]->next();
00125 } while (!poslists[0]->at_end());
00126 RETURN(false);
00127 }
00128
00129 Xapian::termcount
00130 ExactPhrasePostList::get_wdf() const
00131 {
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142 vector<PostList *>::const_iterator i = terms.begin();
00143 Xapian::termcount wdf = (*i)->get_wdf();
00144 for (; i != terms.end(); i++) {
00145 wdf = min(wdf, (*i)->get_wdf());
00146 }
00147 return wdf;
00148 }
00149
00150 Xapian::doccount
00151 ExactPhrasePostList::get_termfreq_est() const
00152 {
00153
00154
00155
00156 return source->get_termfreq_est() / 4;
00157 }
00158
00159 TermFreqs
00160 ExactPhrasePostList::get_termfreq_est_using_stats(
00161 const Xapian::Weight::Internal & stats) const
00162 {
00163 LOGCALL(MATCH, TermFreqs, "ExactPhrasePostList::get_termfreq_est_using_stats", stats);
00164
00165
00166 TermFreqs result(source->get_termfreq_est_using_stats(stats));
00167 result.termfreq /= 4;
00168 result.reltermfreq /= 4;
00169 RETURN(result);
00170 }
00171
00172 string
00173 ExactPhrasePostList::get_description() const
00174 {
00175 return "(ExactPhrase " + source->get_description() + ")";
00176 }