00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "queryoptimiser.h"
00025
00026 #include "andmaybepostlist.h"
00027 #include "andnotpostlist.h"
00028 #include "const_database_wrapper.h"
00029 #include "debuglog.h"
00030 #include "emptypostlist.h"
00031 #include "exactphrasepostlist.h"
00032 #include "externalpostlist.h"
00033 #include "multiandpostlist.h"
00034 #include "multimatch.h"
00035 #include "multixorpostlist.h"
00036 #include "omassert.h"
00037 #include "omqueryinternal.h"
00038 #include "orpostlist.h"
00039 #include "phrasepostlist.h"
00040 #include "postlist.h"
00041 #include "valuegepostlist.h"
00042 #include "valuerangepostlist.h"
00043
00044 #include <algorithm>
00045 #include <list>
00046 #include <string>
00047 #include <vector>
00048
00049 using namespace std;
00050
00051 PostList *
00052 QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor)
00053 {
00054 LOGCALL(MATCH, PostList *, "QueryOptimiser::do_subquery", query | factor);
00055
00056
00057 if (!query) RETURN(new EmptyPostList);
00058
00059 switch (query->op) {
00060 case Xapian::Query::Internal::OP_LEAF:
00061 if (factor != 0.0) {
00062 ++total_subqs;
00063 if (query->tname.empty()) factor = 0.0;
00064 }
00065 RETURN(localsubmatch.postlist_from_op_leaf_query(query, factor));
00066
00067 case Xapian::Query::Internal::OP_EXTERNAL_SOURCE: {
00068 if (factor != 0.0)
00069 ++total_subqs;
00070 Assert(query->external_source);
00071 Xapian::Database wrappeddb(new ConstDatabaseWrapper(&db));
00072 RETURN(new ExternalPostList(wrappeddb,
00073 query->external_source, factor,
00074 matcher));
00075 }
00076
00077 case Xapian::Query::OP_AND:
00078 case Xapian::Query::OP_FILTER:
00079 case Xapian::Query::OP_NEAR:
00080 case Xapian::Query::OP_PHRASE:
00081 RETURN(do_and_like(query, factor));
00082
00083 case Xapian::Query::OP_OR:
00084 case Xapian::Query::OP_XOR:
00085 case Xapian::Query::OP_ELITE_SET:
00086 RETURN(do_or_like(query, factor));
00087
00088 case Xapian::Query::OP_SYNONYM: {
00089
00090
00091 Xapian::termcount save_total_subqs = total_subqs;
00092 PostList * pl = do_synonym(query, factor);
00093 total_subqs = save_total_subqs;
00094 if (factor != 0.0)
00095 ++total_subqs;
00096 RETURN(pl);
00097 }
00098
00099 case Xapian::Query::OP_AND_NOT: {
00100 AssertEq(query->subqs.size(), 2);
00101 PostList * l = do_subquery(query->subqs[0], factor);
00102 PostList * r = do_subquery(query->subqs[1], 0.0);
00103 RETURN(new AndNotPostList(l, r, matcher, db_size));
00104 }
00105
00106 case Xapian::Query::OP_AND_MAYBE: {
00107 AssertEq(query->subqs.size(), 2);
00108 PostList * l = do_subquery(query->subqs[0], factor);
00109 PostList * r = do_subquery(query->subqs[1], factor);
00110 RETURN(new AndMaybePostList(l, r, matcher, db_size));
00111 }
00112
00113 case Xapian::Query::OP_VALUE_RANGE: {
00114 if (factor != 0.0)
00115 ++total_subqs;
00116 Xapian::valueno slot(query->parameter);
00117 const string & range_begin = query->tname;
00118 const string & range_end = query->str_parameter;
00119 const string & lb = db.get_value_lower_bound(slot);
00120 if (!lb.empty() &&
00121 (range_end < lb ||
00122 range_begin > db.get_value_upper_bound(slot))) {
00123 RETURN(new EmptyPostList);
00124 }
00125 RETURN(new ValueRangePostList(&db, slot, range_begin, range_end));
00126 }
00127
00128 case Xapian::Query::OP_VALUE_GE: {
00129 if (factor != 0.0)
00130 ++total_subqs;
00131 Xapian::valueno slot(query->parameter);
00132 const string & range_begin = query->tname;
00133 const string & lb = db.get_value_lower_bound(slot);
00134 if (!lb.empty() &&
00135 range_begin > db.get_value_upper_bound(slot)) {
00136 RETURN(new EmptyPostList);
00137 }
00138 RETURN(new ValueGePostList(&db, slot, range_begin));
00139 }
00140
00141 case Xapian::Query::OP_VALUE_LE: {
00142 if (factor != 0.0)
00143 ++total_subqs;
00144 Xapian::valueno slot(query->parameter);
00145 const string & range_end = query->tname;
00146 if (range_end < db.get_value_lower_bound(slot)) {
00147 RETURN(new EmptyPostList);
00148 }
00149 RETURN(new ValueRangePostList(&db, slot, string(), range_end));
00150 }
00151
00152 case Xapian::Query::OP_SCALE_WEIGHT: {
00153 AssertEq(query->subqs.size(), 1);
00154 double sub_factor = factor;
00155 if (sub_factor != 0.0) sub_factor *= query->get_dbl_parameter();
00156 RETURN(do_subquery(query->subqs[0], sub_factor));
00157 }
00158
00159 default:
00160 Assert(false);
00161 RETURN(NULL);
00162 }
00163 }
00164
00165 struct PosFilter {
00166 PosFilter(Xapian::Query::Internal::op_t op_, size_t begin_, size_t end_,
00167 Xapian::termcount window_)
00168 : op(op_), begin(begin_), end(end_), window(window_) { }
00169
00170 Xapian::Query::Internal::op_t op;
00171
00173 size_t begin, end;
00174
00175 Xapian::termcount window;
00176 };
00177
00178 PostList *
00179 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor)
00180 {
00181 LOGCALL(MATCH, PostList *, "QueryOptimiser::do_and_like", query | factor);
00182
00183 list<PosFilter> pos_filters;
00184 vector<PostList *> plists;
00185 do_and_like(query, factor, plists, pos_filters);
00186 AssertRel(plists.size(), >=, 2);
00187
00188 PostList * pl;
00189 pl = new MultiAndPostList(plists.begin(), plists.end(), matcher, db_size);
00190
00191
00192
00193
00194
00195
00196 list<PosFilter>::iterator i;
00197 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
00198 const PosFilter & filter = *i;
00199
00200 vector<PostList *>::const_iterator terms_begin = plists.begin() + filter.begin;
00201 vector<PostList *>::const_iterator terms_end = plists.begin() + filter.end;
00202
00203 Xapian::termcount window = filter.window;
00204 if (filter.op == Xapian::Query::OP_NEAR) {
00205 pl = new NearPostList(pl, window, terms_begin, terms_end);
00206 } else if (window == filter.end - filter.begin) {
00207 AssertEq(filter.op, Xapian::Query::OP_PHRASE);
00208 pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
00209 } else {
00210 AssertEq(filter.op, Xapian::Query::OP_PHRASE);
00211 pl = new PhrasePostList(pl, window, terms_begin, terms_end);
00212 }
00213 }
00214
00215 RETURN(pl);
00216 }
00217
00218 inline bool is_and_like(Xapian::Query::Internal::op_t op) {
00219 return op == Xapian::Query::OP_AND || op == Xapian::Query::OP_FILTER ||
00220 op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE;
00221 }
00222
00223 void
00224 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor,
00225 vector<PostList *> & and_plists,
00226 list<PosFilter> & pos_filters)
00227 {
00228 LOGCALL_VOID(MATCH, "QueryOptimiser::do_and_like", query | factor | and_plists | pos_filters);
00229
00230 Xapian::Query::Internal::op_t op = query->op;
00231 Assert(is_and_like(op));
00232
00233 bool positional = false;
00234 if (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR) {
00235
00236
00237
00238 if (!db.has_positions()) {
00239 op = Xapian::Query::OP_AND;
00240 } else {
00241 positional = true;
00242 }
00243 }
00244
00245 const Xapian::Query::Internal::subquery_list &queries = query->subqs;
00246 AssertRel(queries.size(), >=, 2);
00247
00248 for (size_t i = 0; i != queries.size(); ++i) {
00249
00250 if (i == 1 && op == Xapian::Query::OP_FILTER) factor = 0.0;
00251
00252 const Xapian::Query::Internal * subq = queries[i];
00253 if (is_and_like(subq->op)) {
00254 do_and_like(subq, factor, and_plists, pos_filters);
00255 } else {
00256 PostList * pl = do_subquery(subq, factor);
00257 and_plists.push_back(pl);
00258 }
00259 }
00260
00261 if (positional) {
00262
00263 size_t end = and_plists.size();
00264 size_t begin = end - queries.size();
00265 Xapian::termcount window = query->parameter;
00266
00267 pos_filters.push_back(PosFilter(op, begin, end, window));
00268 }
00269 }
00270
00276 struct CmpMaxOrTerms {
00285 bool operator()(const PostList *a, const PostList *b) {
00286 if (a->get_termfreq_max() == 0) return false;
00287 if (b->get_termfreq_max() == 0) return true;
00288
00289 #if (defined(__i386__) && !defined(__SSE2_MATH__)) || defined(__mc68000__) || defined(__mc68010__) || defined(__mc68020__) || defined(__mc68030__)
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310 volatile double a_max_wt = a->get_maxweight();
00311 volatile double b_max_wt = b->get_maxweight();
00312 return a_max_wt > b_max_wt;
00313 #else
00314 return a->get_maxweight() > b->get_maxweight();
00315 #endif
00316 }
00317 };
00318
00319 template<class CLASS> struct delete_ptr {
00320 void operator()(CLASS *p) { delete p; }
00321 };
00322
00324 struct ComparePostListTermFreqAscending {
00326 bool operator()(const PostList *a, const PostList *b) {
00327 return a->get_termfreq_est() > b->get_termfreq_est();
00328 }
00329 };
00330
00331 PostList *
00332 QueryOptimiser::do_or_like(const Xapian::Query::Internal *query, double factor)
00333 {
00334 LOGCALL(MATCH, PostList *, "QueryOptimiser::do_or_like", query | factor);
00335
00336
00337
00338 Xapian::Query::Internal::op_t op = query->op;
00339 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR ||
00340 op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM);
00341
00342 const Xapian::Query::Internal::subquery_list &queries = query->subqs;
00343 AssertRel(queries.size(), >=, 2);
00344
00345 vector<PostList *> postlists;
00346 postlists.reserve(queries.size());
00347
00348 Xapian::Query::Internal::subquery_list::const_iterator q;
00349 for (q = queries.begin(); q != queries.end(); ++q) {
00350 postlists.push_back(do_subquery(*q, factor));
00351 }
00352
00353 if (op == Xapian::Query::OP_XOR) {
00354 RETURN(new MultiXorPostList(postlists.begin(), postlists.end(),
00355 matcher, db_size));
00356 }
00357
00358 if (op == Xapian::Query::OP_ELITE_SET) {
00359
00360 Xapian::termcount elite_set_size = query->parameter;
00361 Assert(elite_set_size > 0);
00362
00363 if (postlists.size() > elite_set_size) {
00364
00365
00366 for_each(postlists.begin(), postlists.end(),
00367 mem_fun(&PostList::recalc_maxweight));
00368
00369 nth_element(postlists.begin(),
00370 postlists.begin() + elite_set_size - 1,
00371 postlists.end(), CmpMaxOrTerms());
00372
00373 for_each(postlists.begin() + elite_set_size, postlists.end(),
00374 delete_ptr<PostList>());
00375
00376 if (elite_set_size == 1) RETURN(postlists[0]);
00377
00378 postlists.resize(elite_set_size);
00379 }
00380 }
00381
00382
00383
00384 make_heap(postlists.begin(), postlists.end(),
00385 ComparePostListTermFreqAscending());
00386
00387
00388
00389
00390
00391
00392
00393
00394 AssertRel(postlists.size(), >=, 2);
00395 while (true) {
00396
00397
00398
00399
00400
00401
00402 PostList * r = postlists.front();
00403 pop_heap(postlists.begin(), postlists.end(),
00404 ComparePostListTermFreqAscending());
00405 postlists.pop_back();
00406 PostList * pl = new OrPostList(postlists.front(), r, matcher, db_size);
00407
00408 if (postlists.size() == 1) RETURN(pl);
00409
00410 pop_heap(postlists.begin(), postlists.end(),
00411 ComparePostListTermFreqAscending());
00412 postlists.back() = pl;
00413 push_heap(postlists.begin(), postlists.end(),
00414 ComparePostListTermFreqAscending());
00415 }
00416 }
00417
00418 PostList *
00419 QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor)
00420 {
00421 LOGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym", query | factor);
00422 if (factor == 0.0) {
00423
00424
00425 RETURN(do_or_like(query, 0.0));
00426 }
00427
00428
00429
00430
00431
00432 AssertEq(query->get_wqf(), 0);
00433
00434
00435
00436 RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0),
00437 matcher, factor));
00438 }