00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "queryoptimiser.h"
00025
00026 #include "andmaybepostlist.h"
00027 #include "andnotpostlist.h"
00028 #include "emptypostlist.h"
00029 #include "exactphrasepostlist.h"
00030 #include "multiandpostlist.h"
00031 #include "multimatch.h"
00032 #include "omassert.h"
00033 #include "omdebug.h"
00034 #include "omqueryinternal.h"
00035 #include "orpostlist.h"
00036 #include "phrasepostlist.h"
00037 #include "postlist.h"
00038 #include "valuegepostlist.h"
00039 #include "valuerangepostlist.h"
00040 #include "xorpostlist.h"
00041
00042 #include <algorithm>
00043 #include <list>
00044 #include <string>
00045 #include <vector>
00046
00047 using namespace std;
00048
00049 PostList *
00050 QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor)
00051 {
00052 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_subquery",
00053 query << ", " << factor);
00054
00055
00056 if (!query) RETURN(new EmptyPostList());
00057
00058 switch (query->op) {
00059 case Xapian::Query::Internal::OP_LEAF:
00060 RETURN(do_leaf(query, factor));
00061
00062 case Xapian::Query::OP_AND:
00063 case Xapian::Query::OP_FILTER:
00064 case Xapian::Query::OP_NEAR:
00065 case Xapian::Query::OP_PHRASE:
00066 RETURN(do_and_like(query, factor));
00067
00068 case Xapian::Query::OP_OR:
00069 case Xapian::Query::OP_XOR:
00070 case Xapian::Query::OP_ELITE_SET:
00071 RETURN(do_or_like(query, factor));
00072
00073 case Xapian::Query::OP_AND_NOT: {
00074 AssertEq(query->subqs.size(), 2);
00075 PostList * l = do_subquery(query->subqs[0], factor);
00076 PostList * r = do_subquery(query->subqs[1], 0.0);
00077 RETURN(new AndNotPostList(l, r, matcher, db_size));
00078 }
00079
00080 case Xapian::Query::OP_AND_MAYBE: {
00081 AssertEq(query->subqs.size(), 2);
00082 PostList * l = do_subquery(query->subqs[0], factor);
00083 PostList * r = do_subquery(query->subqs[1], factor);
00084 RETURN(new AndMaybePostList(l, r, matcher, db_size));
00085 }
00086
00087 case Xapian::Query::OP_VALUE_RANGE: {
00088 Xapian::valueno valno(query->parameter);
00089 const string & range_begin = query->tname;
00090 const string & range_end = query->str_parameter;
00091 RETURN(new ValueRangePostList(&db, valno, range_begin, range_end));
00092 }
00093
00094 case Xapian::Query::OP_VALUE_GE: {
00095 Xapian::valueno valno(query->parameter);
00096 const string & range_begin = query->tname;
00097 RETURN(new ValueGePostList(&db, valno, range_begin));
00098 }
00099
00100 case Xapian::Query::OP_VALUE_LE: {
00101 Xapian::valueno valno(query->parameter);
00102 const string & range_end = query->tname;
00103 RETURN(new ValueRangePostList(&db, valno, "", range_end));
00104 }
00105
00106 case Xapian::Query::OP_SCALE_WEIGHT: {
00107 AssertEq(query->subqs.size(), 1);
00108 double sub_factor = factor;
00109 if (sub_factor != 0.0) sub_factor *= query->get_dbl_parameter();
00110 RETURN(do_subquery(query->subqs[0], sub_factor));
00111 }
00112
00113 default:
00114 Assert(false);
00115 RETURN(NULL);
00116 }
00117 }
00118
00119 struct PosFilter {
00120 PosFilter(Xapian::Query::Internal::op_t op_, size_t begin_, size_t end_,
00121 Xapian::termcount window_)
00122 : op(op_), begin(begin_), end(end_), window(window_) { }
00123
00124 Xapian::Query::Internal::op_t op;
00125
00127 size_t begin, end;
00128
00129 Xapian::termcount window;
00130 };
00131
00132 PostList *
00133 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor)
00134 {
00135 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_and_like",
00136 query << ", " << factor);
00137
00138 list<PosFilter> pos_filters;
00139 vector<PostList *> plists;
00140 do_and_like(query, factor, plists, pos_filters);
00141 AssertRel(plists.size(), >=, 2);
00142
00143 PostList * pl;
00144 pl = new MultiAndPostList(plists.begin(), plists.end(), matcher, db_size);
00145
00146
00147
00148
00149
00150
00151 list<PosFilter>::iterator i;
00152 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
00153 const PosFilter & filter = *i;
00154
00155
00156
00157 vector<PostList *> terms(plists.begin() + filter.begin,
00158 plists.begin() + filter.end);
00159
00160 Xapian::termcount window = filter.window;
00161 if (filter.op == Xapian::Query::OP_NEAR) {
00162 pl = new NearPostList(pl, window, terms);
00163 } else if (window == filter.end - filter.begin) {
00164 AssertEq(filter.op, Xapian::Query::OP_PHRASE);
00165 pl = new ExactPhrasePostList(pl, terms);
00166 } else {
00167 AssertEq(filter.op, Xapian::Query::OP_PHRASE);
00168 pl = new PhrasePostList(pl, window, terms);
00169 }
00170 }
00171
00172 RETURN(pl);
00173 }
00174
00175 inline bool is_and_like(Xapian::Query::Internal::op_t op) {
00176 return op == Xapian::Query::OP_AND || op == Xapian::Query::OP_FILTER ||
00177 op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE;
00178 }
00179
00180 void
00181 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor,
00182 vector<PostList *> & and_plists,
00183 list<PosFilter> & pos_filters)
00184 {
00185 DEBUGCALL(MATCH, void, "QueryOptimiser::do_and_like",
00186 query << ", " << factor << ", [and_plists], [pos_filters]");
00187
00188 Xapian::Query::Internal::op_t op = query->op;
00189 Assert(is_and_like(op));
00190
00191 bool positional = false;
00192 if (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR) {
00193
00194
00195
00196 if (!db.has_positions()) {
00197 op = Xapian::Query::OP_AND;
00198 } else {
00199 positional = true;
00200 }
00201 }
00202
00203 const Xapian::Query::Internal::subquery_list &queries = query->subqs;
00204 AssertRel(queries.size(), >=, 2);
00205
00206 for (size_t i = 0; i != queries.size(); ++i) {
00207
00208 if (i == 1 && op == Xapian::Query::OP_FILTER) factor = 0.0;
00209
00210 const Xapian::Query::Internal * subq = queries[i];
00211 if (is_and_like(subq->op)) {
00212 do_and_like(subq, factor, and_plists, pos_filters);
00213 } else {
00214 PostList * pl = do_subquery(subq, factor);
00215 and_plists.push_back(pl);
00216 }
00217 }
00218
00219 if (positional) {
00220
00221 size_t end = and_plists.size();
00222 size_t begin = end - queries.size();
00223 Xapian::termcount window = query->parameter;
00224
00225 pos_filters.push_back(PosFilter(op, begin, end, window));
00226 }
00227 }
00228
00234 struct CmpMaxOrTerms {
00243 bool operator()(const PostList *a, const PostList *b) {
00244 if (a->get_termfreq_max() == 0) return false;
00245 if (b->get_termfreq_max() == 0) return true;
00246
00247 #if defined(__i386__) || defined(__mc68000__)
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264 volatile double a_max_wt = a->get_maxweight();
00265 volatile double b_max_wt = b->get_maxweight();
00266 return a_max_wt > b_max_wt;
00267 #else
00268 return a->get_maxweight() > b->get_maxweight();
00269 #endif
00270 }
00271 };
00272
00273 template<class CLASS> struct delete_ptr {
00274 void operator()(CLASS *p) { delete p; }
00275 };
00276
00278 struct ComparePostListTermFreqAscending {
00280 bool operator()(const PostList *a, const PostList *b) {
00281 return a->get_termfreq_est() > b->get_termfreq_est();
00282 }
00283 };
00284
00285 PostList *
00286 QueryOptimiser::do_or_like(const Xapian::Query::Internal *query, double factor)
00287 {
00288 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_or_like",
00289 query << ", " << factor);
00290
00291
00292
00293 Xapian::Query::Internal::op_t op = query->op;
00294 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR ||
00295 op == Xapian::Query::OP_XOR);
00296
00297 const Xapian::Query::Internal::subquery_list &queries = query->subqs;
00298 AssertRel(queries.size(), >=, 2);
00299
00300 vector<PostList *> postlists;
00301 postlists.reserve(queries.size());
00302
00303 Xapian::Query::Internal::subquery_list::const_iterator q;
00304 for (q = queries.begin(); q != queries.end(); ++q) {
00305 postlists.push_back(do_subquery(*q, factor));
00306 }
00307
00308 if (op == Xapian::Query::OP_ELITE_SET) {
00309
00310 Xapian::termcount elite_set_size = query->parameter;
00311 Assert(elite_set_size > 0);
00312
00313 if (postlists.size() > elite_set_size) {
00314
00315
00316 for_each(postlists.begin(), postlists.end(),
00317 mem_fun(&PostList::recalc_maxweight));
00318
00319 nth_element(postlists.begin(),
00320 postlists.begin() + elite_set_size - 1,
00321 postlists.end(), CmpMaxOrTerms());
00322
00323 for_each(postlists.begin() + elite_set_size, postlists.end(),
00324 delete_ptr<PostList>());
00325
00326 if (elite_set_size == 1) RETURN(postlists[0]);
00327
00328 postlists.resize(elite_set_size);
00329 }
00330 }
00331
00332
00333
00334 make_heap(postlists.begin(), postlists.end(),
00335 ComparePostListTermFreqAscending());
00336
00337
00338
00339
00340
00341
00342
00343 AssertRel(postlists.size(), >=, 2);
00344 while (true) {
00345
00346
00347
00348
00349
00350
00351 PostList * r = postlists.front();
00352 pop_heap(postlists.begin(), postlists.end(),
00353 ComparePostListTermFreqAscending());
00354 postlists.pop_back();
00355 PostList * l = postlists.front();
00356
00357 PostList * pl;
00358 if (op == Xapian::Query::OP_XOR) {
00359 pl = new XorPostList(l, r, matcher, db_size);
00360 } else {
00361 pl = new OrPostList(l, r, matcher, db_size);
00362 }
00363
00364 if (postlists.size() == 1) RETURN(pl);
00365
00366 pop_heap(postlists.begin(), postlists.end(),
00367 ComparePostListTermFreqAscending());
00368 postlists.back() = pl;
00369 push_heap(postlists.begin(), postlists.end(),
00370 ComparePostListTermFreqAscending());
00371 }
00372 }