00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include "autoptr.h"
00027 #include "omdebug.h"
00028 #include "omqueryinternal.h"
00029 #include "utils.h"
00030 #include "serialise.h"
00031 #include "serialise-double.h"
00032
00033 #include <xapian/error.h>
00034 #include <xapian/termiterator.h>
00035 #include <xapian/version.h>
00036 #include "vectortermlist.h"
00037
00038 #include <vector>
00039 #include <set>
00040 #include <algorithm>
00041 #include <math.h>
00042 #include <limits.h>
00043 #include <cfloat>
00044
00045 using namespace std;
00046
00047
00048
00049 static unsigned int
00050 get_min_subqs(Xapian::Query::Internal::op_t op)
00051 {
00052 switch (op) {
00053 case Xapian::Query::Internal::OP_LEAF:
00054 case Xapian::Query::OP_AND:
00055 case Xapian::Query::OP_OR:
00056 case Xapian::Query::OP_XOR:
00057 case Xapian::Query::OP_NEAR:
00058 case Xapian::Query::OP_PHRASE:
00059 case Xapian::Query::OP_ELITE_SET:
00060 case Xapian::Query::OP_VALUE_RANGE:
00061 case Xapian::Query::OP_VALUE_GE:
00062 case Xapian::Query::OP_VALUE_LE:
00063 return 0;
00064 case Xapian::Query::OP_SCALE_WEIGHT:
00065 return 1;
00066 case Xapian::Query::OP_FILTER:
00067 case Xapian::Query::OP_AND_MAYBE:
00068 case Xapian::Query::OP_AND_NOT:
00069 return 2;
00070 default:
00071 Assert(false);
00072 throw Xapian::InvalidOperationError("get_min_subqs called with invalid operator type");
00073 }
00074 }
00075
00076 static unsigned int
00077 get_max_subqs(Xapian::Query::Internal::op_t op)
00078 {
00079 switch (op) {
00080 case Xapian::Query::Internal::OP_LEAF:
00081 case Xapian::Query::OP_VALUE_RANGE:
00082 case Xapian::Query::OP_VALUE_GE:
00083 case Xapian::Query::OP_VALUE_LE:
00084 return 0;
00085 case Xapian::Query::OP_SCALE_WEIGHT:
00086 return 1;
00087 case Xapian::Query::OP_FILTER:
00088 case Xapian::Query::OP_AND_MAYBE:
00089 case Xapian::Query::OP_AND_NOT:
00090 return 2;
00091 case Xapian::Query::OP_AND:
00092 case Xapian::Query::OP_OR:
00093 case Xapian::Query::OP_XOR:
00094 case Xapian::Query::OP_NEAR:
00095 case Xapian::Query::OP_PHRASE:
00096 case Xapian::Query::OP_ELITE_SET:
00097 return UINT_MAX;
00098 default:
00099 Assert(false);
00100 throw Xapian::InvalidOperationError("get_max_subqs called with invalid operator type");
00101 }
00102 }
00103
00104 static inline bool
00105 is_leaf(Xapian::Query::Internal::op_t op)
00106 {
00107 return (op == Xapian::Query::Internal::OP_LEAF);
00108 }
00109
00110
00111
00130 string
00131 Xapian::Query::Internal::serialise(Xapian::termpos & curpos) const
00132 {
00133 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00134 string result;
00135
00136 if (op == Xapian::Query::Internal::OP_LEAF) {
00137 result += '[';
00138 result += encode_length(tname.length());
00139 result += tname;
00140 if (term_pos != curpos) result += '@' + om_tostring(term_pos);
00141 if (wqf != 1) result += '#' + om_tostring(wqf);
00142 ++curpos;
00143 } else {
00144 result += "(";
00145 for (subquery_list::const_iterator i = subqs.begin();
00146 i != subqs.end();
00147 ++i) {
00148 result += (*i)->serialise(curpos);
00149 }
00150 switch (op) {
00151 case Xapian::Query::Internal::OP_LEAF:
00152 Assert(false);
00153 break;
00154 case Xapian::Query::OP_AND:
00155 result += "&";
00156 break;
00157 case Xapian::Query::OP_OR:
00158 result += "|";
00159 break;
00160 case Xapian::Query::OP_FILTER:
00161 result += "%";
00162 break;
00163 case Xapian::Query::OP_AND_MAYBE:
00164 result += "+";
00165 break;
00166 case Xapian::Query::OP_AND_NOT:
00167 result += "-";
00168 break;
00169 case Xapian::Query::OP_XOR:
00170 result += "^";
00171 break;
00172 case Xapian::Query::OP_NEAR:
00173 result += "~" + om_tostring(parameter);
00174 break;
00175 case Xapian::Query::OP_PHRASE:
00176 result += "\"" + om_tostring(parameter);
00177 break;
00178 case Xapian::Query::OP_ELITE_SET:
00179 result += "*" + om_tostring(parameter);
00180 break;
00181 case Xapian::Query::OP_VALUE_RANGE:
00182 result += "]";
00183 result += encode_length(tname.length());
00184 result += tname;
00185 result += encode_length(str_parameter.length());
00186 result += str_parameter;
00187 result += om_tostring(parameter);
00188 break;
00189 case Xapian::Query::OP_VALUE_GE:
00190 result += "}";
00191 result += encode_length(tname.length());
00192 result += tname;
00193 result += om_tostring(parameter);
00194 break;
00195 case Xapian::Query::OP_VALUE_LE:
00196 result += "{";
00197 result += encode_length(tname.length());
00198 result += tname;
00199 result += om_tostring(parameter);
00200 break;
00201 case Xapian::Query::OP_SCALE_WEIGHT:
00202 result += ".";
00203 result += str_parameter;
00204 break;
00205 }
00206 }
00207 return result;
00208 #else
00209 (void)curpos;
00210 throw Xapian::InternalError("query serialisation not compiled in");
00211 #endif
00212 }
00213
00214 string
00215 Xapian::Query::Internal::get_op_name(Xapian::Query::Internal::op_t op)
00216 {
00217 string name;
00218 switch (op) {
00219 case Xapian::Query::Internal::OP_LEAF: name = "LEAF"; break;
00220 case Xapian::Query::OP_AND: name = "AND"; break;
00221 case Xapian::Query::OP_OR: name = "OR"; break;
00222 case Xapian::Query::OP_FILTER: name = "FILTER"; break;
00223 case Xapian::Query::OP_AND_MAYBE: name = "AND_MAYBE"; break;
00224 case Xapian::Query::OP_AND_NOT: name = "AND_NOT"; break;
00225 case Xapian::Query::OP_XOR: name = "XOR"; break;
00226 case Xapian::Query::OP_NEAR: name = "NEAR"; break;
00227 case Xapian::Query::OP_PHRASE: name = "PHRASE"; break;
00228 case Xapian::Query::OP_ELITE_SET: name = "ELITE_SET"; break;
00229 case Xapian::Query::OP_VALUE_RANGE: name = "VALUE_RANGE"; break;
00230 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break;
00231 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break;
00232 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break;
00233 }
00234 return name;
00235 }
00236
00237 string
00238 Xapian::Query::Internal::get_description() const
00239 {
00240 string opstr;
00241
00242 if (is_leaf(op)) {
00243 if (term_pos != 0) {
00244 opstr += "pos=" + om_tostring(term_pos);
00245 }
00246 if (wqf != 1) {
00247 if (!opstr.empty()) opstr += ",";
00248 opstr += "wqf=" + om_tostring(wqf);
00249 }
00250 if (!opstr.empty()) opstr = ":(" + opstr + ")";
00251 if (tname.empty()) return "<alldocuments>" + opstr;
00252 return tname + opstr;
00253 }
00254
00255 switch (op) {
00256 case Xapian::Query::OP_VALUE_RANGE: {
00257 opstr = get_op_name(op);
00258 opstr += ' ';
00259 opstr += om_tostring(parameter);
00260 opstr += ' ';
00261 opstr += tname;
00262 opstr += ' ';
00263 opstr += str_parameter;
00264 return opstr;
00265 }
00266 case Xapian::Query::OP_VALUE_GE:
00267 case Xapian::Query::OP_VALUE_LE: {
00268 opstr = get_op_name(op);
00269 opstr += ' ';
00270 opstr += om_tostring(parameter);
00271 opstr += ' ';
00272 opstr += tname;
00273 return opstr;
00274 }
00275 case Xapian::Query::OP_SCALE_WEIGHT: {
00276 opstr += om_tostring(get_dbl_parameter());
00277 opstr += " * ";
00278 opstr += subqs[0]->get_description();
00279 return opstr;
00280 }
00281 }
00282
00283 opstr = " " + get_op_name(op) + " ";
00284 if (op == Xapian::Query::OP_NEAR ||
00285 op == Xapian::Query::OP_PHRASE ||
00286 op == Xapian::Query::OP_ELITE_SET)
00287 opstr += om_tostring(parameter) + " ";
00288
00289 string description;
00290 subquery_list::const_iterator i;
00291 for (i = subqs.begin(); i != subqs.end(); i++) {
00292 if (!description.empty()) description += opstr;
00293 description += (**i).get_description();
00294 }
00295
00296 return "(" + description + ")";
00297 }
00298
00299 Xapian::termcount
00300 Xapian::Query::Internal::get_length() const
00301 {
00302 if (is_leaf(op)) return wqf;
00303 Xapian::termcount len = 0;
00304 subquery_list::const_iterator i;
00305 for (i = subqs.begin(); i != subqs.end(); ++i) {
00306 len += (**i).get_length();
00307 }
00308 return len;
00309 }
00310
00312 void
00313 Xapian::Query::Internal::accumulate_terms(
00314 vector<pair<string, Xapian::termpos> > &terms) const
00315 {
00316 if (is_leaf(op)) {
00317
00318
00319 if (!tname.empty())
00320 terms.push_back(make_pair(tname, term_pos));
00321 } else {
00322 subquery_list::const_iterator end = subqs.end();
00323
00324 for (subquery_list::const_iterator i = subqs.begin(); i != end; ++i) {
00325 (*i)->accumulate_terms(terms);
00326 }
00327 }
00328 }
00329
00330 struct LessByTermpos {
00331 typedef const pair<string, Xapian::termpos> argtype;
00332 bool operator()(argtype &left, argtype &right) {
00333 if (left.second != right.second) {
00334 return left.second < right.second;
00335 } else {
00336 return left.first < right.first;
00337 }
00338 }
00339 };
00340
00341 Xapian::TermIterator
00342 Xapian::Query::Internal::get_terms() const
00343 {
00344 vector<pair<string, Xapian::termpos> > terms;
00345 accumulate_terms(terms);
00346
00347 sort(terms.begin(), terms.end(), LessByTermpos());
00348
00349
00350
00351 vector<pair<string, Xapian::termpos> >::iterator newlast =
00352 unique(terms.begin(), terms.end());
00353
00354 terms.erase(newlast, terms.end());
00355
00356 vector<string> result;
00357 vector<pair<string, Xapian::termpos> >::const_iterator i;
00358 for (i = terms.begin(); i != terms.end(); ++i) {
00359 result.push_back(i->first);
00360 }
00361
00362 return Xapian::TermIterator(new VectorTermList(result.begin(),
00363 result.end()));
00364 }
00365
00366 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00367
00368
00369 class QUnserial {
00370 private:
00371 const char *p;
00372 const char *end;
00373 Xapian::termpos curpos;
00374
00375 Xapian::Query::Internal * readquery();
00376 Xapian::Query::Internal * readcompound();
00377
00378 public:
00379 QUnserial(const string & s) : p(s.c_str()), end(p + s.size()), curpos(1) { }
00380 Xapian::Query::Internal * decode();
00381 };
00382
00383 Xapian::Query::Internal *
00384 QUnserial::decode() {
00385 DEBUGLINE(UNKNOWN, "QUnserial::decode(" << p << ")");
00386 AutoPtr<Xapian::Query::Internal> qint(readquery());
00387 if (p != end)
00388 throw Xapian::InvalidArgumentError("Bad serialised query");
00389 return qint.release();
00390 }
00391
00392 Xapian::Query::Internal *
00393 QUnserial::readquery() {
00394 if (p == end)
00395 throw Xapian::InvalidArgumentError("Bad serialised query");
00396 switch (*p++) {
00397 case '[': {
00398 size_t length = decode_length(&p, end, true);
00399 string tname(p, length);
00400 p += length;
00401 Xapian::termpos term_pos = curpos;
00402 Xapian::termcount wqf = 1;
00403 if (p != end) {
00404 if (*p == '@') {
00405 char *tmp;
00406 term_pos = strtol(p + 1, &tmp, 10);
00407 p = tmp;
00408 }
00409 if (*p == '#') {
00410 char *tmp;
00411 wqf = strtol(p + 1, &tmp, 10);
00412 p = tmp;
00413 }
00414 }
00415 ++curpos;
00416 return new Xapian::Query::Internal(tname, wqf, term_pos);
00417 }
00418 case '(':
00419 return readcompound();
00420 default:
00421 DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00422 throw Xapian::InvalidArgumentError("Invalid query string");
00423 }
00424 }
00425
00426 static Xapian::Query::Internal *
00427 qint_from_vector(Xapian::Query::op op,
00428 const vector<Xapian::Query::Internal *> & vec,
00429 Xapian::termcount parameter = 0)
00430 {
00431 Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00432 vector<Xapian::Query::Internal *>::const_iterator i;
00433 for (i = vec.begin(); i != vec.end(); i++) {
00434 qint->add_subquery_nocopy(*i);
00435 }
00436 Xapian::Query::Internal * r = qint->end_construction();
00437
00438
00439 AssertEq(r, qint);
00440 return r;
00441 }
00442
00443 static Xapian::Query::Internal *
00444 qint_from_vector(Xapian::Query::op op,
00445 const vector<Xapian::Query::Internal *> & vec,
00446 Xapian::termcount parameter,
00447 double dbl_parameter)
00448 {
00449 Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00450 qint->set_dbl_parameter(dbl_parameter);
00451 vector<Xapian::Query::Internal *>::const_iterator i;
00452 for (i = vec.begin(); i != vec.end(); i++) {
00453 qint->add_subquery_nocopy(*i);
00454 }
00455 Xapian::Query::Internal * r = qint->end_construction();
00456
00457
00458 AssertEq(r, qint);
00459 return r;
00460 }
00461
00462 Xapian::Query::Internal *
00463 QUnserial::readcompound() {
00464 vector<Xapian::Query::Internal *> subqs;
00465 try {
00466 while (true) {
00467 if (p == end)
00468 throw Xapian::InvalidArgumentError("Bad serialised query");
00469 switch (*p++) {
00470 case '[':
00471 --p;
00472 subqs.push_back(readquery());
00473 break;
00474 case '(': {
00475 subqs.push_back(readcompound());
00476 break;
00477 }
00478 case '&':
00479 return qint_from_vector(Xapian::Query::OP_AND, subqs);
00480 case '|':
00481 return qint_from_vector(Xapian::Query::OP_OR, subqs);
00482 case '%':
00483 return qint_from_vector(Xapian::Query::OP_FILTER, subqs);
00484 case '^':
00485 return qint_from_vector(Xapian::Query::OP_XOR, subqs);
00486 case '+':
00487 return qint_from_vector(Xapian::Query::OP_AND_MAYBE, subqs);
00488 case '-':
00489 return qint_from_vector(Xapian::Query::OP_AND_NOT, subqs);
00490 case '~': {
00491 char *tmp;
00492 Xapian::termcount window(strtol(p, &tmp, 10));
00493 p = tmp;
00494 return qint_from_vector(Xapian::Query::OP_NEAR, subqs, window);
00495 }
00496 case '"': {
00497 char *tmp;
00498 Xapian::termcount window(strtol(p, &tmp, 10));
00499 p = tmp;
00500 return qint_from_vector(Xapian::Query::OP_PHRASE, subqs, window);
00501 }
00502 case '*': {
00503 char *tmp;
00504 Xapian::termcount elite_set_size(strtol(p, &tmp, 10));
00505 p = tmp;
00506 return qint_from_vector(Xapian::Query::OP_ELITE_SET, subqs,
00507 elite_set_size);
00508 }
00509 case ']': {
00510 size_t len = decode_length(&p, end, true);
00511 string start(p, len);
00512 p += len;
00513 len = decode_length(&p, end, true);
00514 string stop(p, len);
00515 p += len;
00516 char *tmp;
00517 Xapian::valueno valno = strtoul(p, &tmp, 10);
00518 p = tmp;
00519 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_RANGE, valno,
00520 start, stop);
00521 }
00522 case '}': {
00523 size_t len = decode_length(&p, end, true);
00524 string start(p, len);
00525 p += len;
00526 char *tmp;
00527 Xapian::valueno valno = strtoul(p, &tmp, 10);
00528 p = tmp;
00529 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_GE, valno,
00530 start);
00531 }
00532 case '{': {
00533 size_t len = decode_length(&p, end, true);
00534 string start(p, len);
00535 p += len;
00536 char *tmp;
00537 Xapian::valueno valno = strtoul(p, &tmp, 10);
00538 p = tmp;
00539 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_LE, valno,
00540 start);
00541 }
00542 case '.': {
00543 double param = unserialise_double(&p, end);
00544 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
00545 subqs, 0, param);
00546 }
00547 default:
00548 DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00549 throw Xapian::InvalidArgumentError("Invalid query string");
00550 }
00551 }
00552 } catch (...) {
00553 vector<Xapian::Query::Internal *>::iterator i;
00554 for (i = subqs.begin(); i != subqs.end(); i++)
00555 delete *i;
00556 throw;
00557 }
00558 }
00559
00560 Xapian::Query::Internal *
00561 Xapian::Query::Internal::unserialise(const string &s)
00562 {
00563 Assert(s.length() > 1);
00564 QUnserial u(s);
00565 Xapian::Query::Internal * qint = u.decode();
00566 AssertEq(s, qint->serialise());
00567 return qint;
00568 }
00569 #else
00570 Xapian::Query::Internal *
00571 Xapian::Query::Internal::unserialise(const string &)
00572 {
00573 throw Xapian::InternalError("query serialisation not compiled in");
00574 }
00575 #endif
00576
00577 Xapian::Query::Internal::Internal(const Xapian::Query::Internal ©me)
00578 : Xapian::Internal::RefCntBase(),
00579 op(copyme.op),
00580 subqs(),
00581 parameter(copyme.parameter),
00582 tname(copyme.tname),
00583 str_parameter(copyme.str_parameter),
00584 term_pos(copyme.term_pos),
00585 wqf(copyme.wqf)
00586 {
00587 for (subquery_list::const_iterator i = copyme.subqs.begin();
00588 i != copyme.subqs.end();
00589 ++i) {
00590 subqs.push_back(new Xapian::Query::Internal(**i));
00591 }
00592 }
00593
00595
00596
00597 Xapian::Query::Internal::Internal(const string & tname_, Xapian::termcount wqf_,
00598 Xapian::termpos term_pos_)
00599 : op(Xapian::Query::Internal::OP_LEAF),
00600 subqs(),
00601 parameter(0),
00602 tname(tname_),
00603 term_pos(term_pos_),
00604 wqf(wqf_)
00605 {
00606 validate_query();
00607 }
00608
00609 Xapian::Query::Internal::Internal(op_t op_, Xapian::termcount parameter_)
00610 : op(op_),
00611 subqs(),
00612 parameter(parameter_),
00613 tname(),
00614 term_pos(0),
00615 wqf(0)
00616 {
00617 if (parameter != 0 && op != OP_PHRASE && op != OP_NEAR && op != OP_ELITE_SET)
00618 throw Xapian::InvalidArgumentError("parameter is only meaningful for OP_NEAR, OP_PHRASE, or OP_ELITE_SET");
00619 }
00620
00621 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00622 const string &begin, const string &end)
00623 : op(op_),
00624 parameter(Xapian::termcount(valno)),
00625 tname(begin),
00626 str_parameter(end)
00627 {
00628 if (op != OP_VALUE_RANGE)
00629 throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_RANGE");
00630 validate_query();
00631 }
00632
00633 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00634 const std::string &value)
00635 : op(op_),
00636 parameter(Xapian::termcount(valno)),
00637 tname(value)
00638 {
00639 if (op != OP_VALUE_GE && op != OP_VALUE_LE)
00640 throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_GE or OP_VALUE_LE");
00641 if (op == OP_VALUE_GE && value.empty()) {
00642
00643 op = OP_LEAF;
00644 parameter = 0;
00645 term_pos = 0;
00646 wqf = 1;
00647 }
00648 validate_query();
00649 }
00650
00651 Xapian::Query::Internal::~Internal()
00652 {
00653 subquery_list::iterator i;
00654 for (i = subqs.begin(); i != subqs.end(); i++) {
00655 delete *i;
00656 }
00657 }
00658
00659 Xapian::Query::Internal *
00660 Xapian::Query::Internal::end_construction()
00661 {
00662 DEBUGCALL(API, void, "Xapian::Query::Internal::end_construction", "");
00663 validate_query();
00664 Xapian::Query::Internal * qint = simplify_query();
00665 if (qint) qint->validate_query();
00666 return qint;
00667 }
00668
00669 void
00670 Xapian::Query::Internal::validate_query() const
00671 {
00672 DEBUGCALL(API, void, "Xapian::Query::Internal::validate_query", "");
00673
00674
00675 if (subqs.size() < get_min_subqs(op) ||
00676 subqs.size() > get_max_subqs(op)) {
00677 throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) +
00678 " requires a minimum of " + om_tostring(get_min_subqs(op)) +
00679 " and a maximum of " + om_tostring(get_max_subqs(op)) +
00680 " sub queries, had " +
00681 om_tostring(subqs.size()) + ".");
00682 }
00683
00684 if (op == OP_SCALE_WEIGHT && get_dbl_parameter() < 0) {
00685 throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) + " requires a non-negative parameter.");
00686 }
00687
00688
00689
00690 Assert(is_leaf(op) ||
00691 op == OP_VALUE_RANGE ||
00692 op == OP_VALUE_GE ||
00693 op == OP_VALUE_LE ||
00694 tname.empty());
00695 }
00696
00697 bool
00698 Xapian::Query::Internal::simplify_matchnothing()
00699 {
00700 subquery_list::iterator sq;
00701 switch (op) {
00702 case OP_PHRASE:
00703 case OP_NEAR:
00704 case OP_AND:
00705 case OP_FILTER:
00706
00707
00708 for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00709 if (*sq == 0) {
00710 for (sq = subqs.begin(); sq != subqs.end(); sq++)
00711 delete *sq;
00712 subqs.clear();
00713 return true;
00714 }
00715 }
00716 break;
00717 case OP_ELITE_SET:
00718 case OP_OR:
00719 case OP_XOR:
00720
00721
00722
00723
00724 sq = subqs.begin();
00725 while (sq != subqs.end() && subqs.size() > 1) {
00726 if (*sq == 0) {
00727 sq = subqs.erase(sq);
00728 } else {
00729 ++sq;
00730 }
00731 }
00732 break;
00733 case OP_AND_MAYBE:
00734 case OP_AND_NOT:
00735
00736
00737
00738
00739
00740
00741 Assert(subqs.size() == 2);
00742 if (subqs[0] == 0 || subqs[1] == 0) {
00743 sq = subqs.begin();
00744 ++sq;
00745 delete *sq;
00746 subqs.erase(sq);
00747 }
00748 break;
00749 case OP_SCALE_WEIGHT:
00750 Assert(subqs.size() == 1);
00751
00752
00753 Assert(subqs[0]);
00754 break;
00755 case OP_LEAF:
00756
00757 break;
00758 }
00759 return false;
00760 }
00761
00762 Xapian::Query::Internal *
00763 Xapian::Query::Internal::simplify_query()
00764 {
00765 DEBUGCALL(API, Xapian::Query::Internal *, "Xapian::Query::Internal::simplify_query", "");
00766
00767
00768 if (simplify_matchnothing()) {
00769 return 0;
00770 }
00771
00772
00773 switch (op) {
00774 case OP_LEAF:
00775 return this;
00776 case OP_VALUE_RANGE:
00777
00778
00779 if (tname > str_parameter) return 0;
00780 return this;
00781 case OP_VALUE_GE:
00782 case OP_VALUE_LE:
00783 return this;
00784 case OP_SCALE_WEIGHT:
00785 if (fabs(get_dbl_parameter() - 1.0) > DBL_EPSILON) return this;
00786
00787
00788 break;
00789 case OP_PHRASE: case OP_NEAR:
00790
00791
00792 if (subqs.size() <= 1) break;
00793
00794
00795 if (!parameter) parameter = subqs.size();
00796
00797
00798 return flatten_subqs();
00799 case OP_ELITE_SET:
00800 if (!parameter) {
00801
00802
00803 if (subqs.size() <= 100) {
00804 parameter = 10;
00805 } else {
00806 parameter = Xapian::termcount(ceil(sqrt(double(subqs.size()))));
00807 Assert(parameter > 10);
00808 }
00809 }
00810 break;
00811 case OP_OR: case OP_AND: case OP_XOR:
00812
00813 if (subqs.size() > 1) collapse_subqs();
00814 break;
00815 default:
00816 break;
00817 }
00818
00819
00820 if (subqs.empty())
00821 return 0;
00822
00823
00824
00825 if (subqs.size() == 1) {
00826 Xapian::Query::Internal * qint = subqs[0];
00827 subqs[0] = 0;
00828 return qint;
00829 }
00830
00831 return this;
00832 }
00833
00834 struct SortPosName {
00835 bool operator()(const Xapian::Query::Internal * left,
00836 const Xapian::Query::Internal * right) const {
00837 Assert(left != 0);
00838 Assert(right != 0);
00839 Assert(is_leaf(left->op));
00840 Assert(is_leaf(right->op));
00841 if (left->term_pos != right->term_pos) {
00842 return left->term_pos < right->term_pos;
00843 } else {
00844 return left->tname < right->tname;
00845 }
00846 }
00847 };
00848
00852 void
00853 Xapian::Query::Internal::collapse_subqs()
00854 {
00855 Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
00856 typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
00857 subqtable sqtab;
00858
00859 subquery_list::iterator sq = subqs.begin();
00860 while (sq != subqs.end()) {
00861 Assert(*sq != 0);
00862 if (is_leaf((*sq)->op)) {
00863 Assert((*sq)->subqs.empty());
00864 subqtable::iterator s = sqtab.find(*sq);
00865 if (s == sqtab.end()) {
00866 sqtab.insert(*sq);
00867 ++sq;
00868 } else {
00869 AssertEq((*s)->tname, (*sq)->tname);
00870 AssertEq((*s)->term_pos, (*sq)->term_pos);
00871 (*s)->wqf += (*sq)->wqf;
00872
00873
00874
00875 delete *sq;
00876 sq = subqs.erase(sq);
00877 }
00878 } else {
00879 ++sq;
00880 }
00881 }
00882 }
00883
00885 Xapian::Query::Internal *
00886 Xapian::Query::Internal::flatten_subqs()
00887 {
00888 Assert(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE);
00889
00890 subquery_list::iterator sq;
00891 for (sq = subqs.begin(); sq != subqs.end(); ++sq) {
00892 if (!is_leaf((*sq)->op)) break;
00893 }
00894
00895 if (sq == subqs.end()) return this;
00896
00897 if ((*sq)->op == Xapian::Query::OP_NEAR ||
00898 (*sq)->op == Xapian::Query::OP_PHRASE) {
00899
00900 throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE");
00901 }
00902
00903 AutoPtr<Xapian::Query::Internal> flattenme(*sq);
00904 *sq = 0;
00905
00906 subquery_list::iterator j;
00907 for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) {
00908 *sq = *j;
00909 *j = 0;
00910 AutoPtr<Xapian::Query::Internal> newq(new Xapian::Query::Internal(*this));
00911 delete *sq;
00912 *sq = 0;
00913 newq = newq->flatten_subqs();
00914 *j = newq.release();
00915 }
00916
00917 if (flattenme->op == OP_AND ||
00918 flattenme->op == OP_OR ||
00919 flattenme->op == OP_XOR) {
00920 size_t i = flattenme->subqs.size();
00921 do {
00922 --i;
00923 Xapian::Query::Internal * q = flattenme->subqs[i];
00924 if (flattenme->op == q->op) {
00925 subquery_list::iterator k;
00926 for (k = q->subqs.begin(), ++k;
00927 k != q->subqs.end();
00928 ++k) {
00929 flattenme->subqs.push_back(0);
00930 flattenme->subqs.back() = *k;
00931 *k = 0;
00932 }
00933 flattenme->subqs[i] = q->subqs[0];
00934 q->subqs.clear();
00935 delete q;
00936 }
00937 } while (i != 0);
00938 }
00939
00940 return flattenme.release();
00941 }
00942
00943 void
00944 Xapian::Query::Internal::add_subquery(const Xapian::Query::Internal * subq)
00945 {
00946 Assert(!is_leaf(op));
00947 if (subq == 0) {
00948 subqs.push_back(0);
00949 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
00950
00951 for (subquery_list::const_iterator i = subq->subqs.begin();
00952 i != subq->subqs.end(); i++) {
00953 add_subquery(*i);
00954 }
00955 } else {
00956 subqs.push_back(new Xapian::Query::Internal(*subq));
00957 }
00958 }
00959
00960 void
00961 Xapian::Query::Internal::add_subquery_nocopy(Xapian::Query::Internal * subq)
00962 {
00963 Assert(!is_leaf(op));
00964 if (subq == 0) {
00965 subqs.push_back(0);
00966 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
00967
00968 for (subquery_list::const_iterator i = subq->subqs.begin();
00969 i != subq->subqs.end(); i++) {
00970 add_subquery(*i);
00971 }
00972 delete subq;
00973 } else {
00974 subqs.push_back(subq);
00975 }
00976 }
00977
00978 void
00979 Xapian::Query::Internal::set_dbl_parameter(double dbl_parameter_)
00980 {
00981
00982
00983
00984
00985 str_parameter = serialise_double(dbl_parameter_);
00986 }
00987
00988 double
00989 Xapian::Query::Internal::get_dbl_parameter() const
00990 {
00991
00992
00993
00994
00995 const char * p = str_parameter.data();
00996 const char * end = p + str_parameter.size();
00997 return unserialise_double(&p, end);
00998 }