00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include "omqueryinternal.h"
00027
00028 #include "debuglog.h"
00029 #include "registryinternal.h"
00030 #include "serialise.h"
00031 #include "serialise-double.h"
00032 #include "str.h"
00033
00034 #include <xapian/error.h>
00035 #include <xapian/postingsource.h>
00036 #include <xapian/termiterator.h>
00037 #include <xapian/version.h>
00038 #include "vectortermlist.h"
00039
00040 #include <algorithm>
00041 #include "autoptr.h"
00042 #include <cfloat>
00043 #include <climits>
00044 #include <cmath>
00045 #include <set>
00046 #include <vector>
00047
00048 using namespace std;
00049
00050
00051
00052 static unsigned int
00053 get_min_subqs(Xapian::Query::Internal::op_t op)
00054 {
00055 switch (op) {
00056 case Xapian::Query::Internal::OP_EXTERNAL_SOURCE:
00057 case Xapian::Query::Internal::OP_LEAF:
00058 case Xapian::Query::OP_AND:
00059 case Xapian::Query::OP_OR:
00060 case Xapian::Query::OP_XOR:
00061 case Xapian::Query::OP_NEAR:
00062 case Xapian::Query::OP_PHRASE:
00063 case Xapian::Query::OP_ELITE_SET:
00064 case Xapian::Query::OP_VALUE_RANGE:
00065 case Xapian::Query::OP_VALUE_GE:
00066 case Xapian::Query::OP_VALUE_LE:
00067 case Xapian::Query::OP_SYNONYM:
00068 return 0;
00069 case Xapian::Query::OP_SCALE_WEIGHT:
00070 return 1;
00071 case Xapian::Query::OP_FILTER:
00072 case Xapian::Query::OP_AND_MAYBE:
00073 case Xapian::Query::OP_AND_NOT:
00074 return 2;
00075 default:
00076 Assert(false);
00077 throw Xapian::InvalidOperationError("get_min_subqs called with invalid operator type");
00078 }
00079 }
00080
00081 static unsigned int
00082 get_max_subqs(Xapian::Query::Internal::op_t op)
00083 {
00084 switch (op) {
00085 case Xapian::Query::Internal::OP_EXTERNAL_SOURCE:
00086 case Xapian::Query::Internal::OP_LEAF:
00087 case Xapian::Query::OP_VALUE_RANGE:
00088 case Xapian::Query::OP_VALUE_GE:
00089 case Xapian::Query::OP_VALUE_LE:
00090 return 0;
00091 case Xapian::Query::OP_SCALE_WEIGHT:
00092 return 1;
00093 case Xapian::Query::OP_FILTER:
00094 case Xapian::Query::OP_AND_MAYBE:
00095 case Xapian::Query::OP_AND_NOT:
00096 return 2;
00097 case Xapian::Query::OP_AND:
00098 case Xapian::Query::OP_OR:
00099 case Xapian::Query::OP_XOR:
00100 case Xapian::Query::OP_NEAR:
00101 case Xapian::Query::OP_PHRASE:
00102 case Xapian::Query::OP_ELITE_SET:
00103 case Xapian::Query::OP_SYNONYM:
00104 return UINT_MAX;
00105 default:
00106 Assert(false);
00107 throw Xapian::InvalidOperationError("get_max_subqs called with invalid operator type");
00108 }
00109 }
00110
00111 static inline bool
00112 is_leaf(Xapian::Query::Internal::op_t op)
00113 {
00114 return (op == Xapian::Query::Internal::OP_LEAF);
00115 }
00116
00117 inline bool
00118 is_distributable(Xapian::Query::Internal::op_t op)
00119 {
00120 switch (op) {
00121 case Xapian::Query::OP_AND:
00122 case Xapian::Query::OP_OR:
00123 case Xapian::Query::OP_XOR:
00124 case Xapian::Query::OP_SYNONYM:
00125 return true;
00126 default:
00127 return false;
00128 }
00129 }
00130
00131
00132
00151 string
00152 Xapian::Query::Internal::serialise(Xapian::termpos & curpos) const
00153 {
00154 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00155 string result;
00156
00157 if (op == Xapian::Query::Internal::OP_LEAF) {
00158 result += '[';
00159 result += encode_length(tname.length());
00160 result += tname;
00161 if (term_pos != curpos) result += '@' + encode_length(term_pos);
00162
00163 if (parameter != 1) result += '#' + encode_length(parameter);
00164 ++curpos;
00165 } else if (op == Xapian::Query::Internal::OP_EXTERNAL_SOURCE) {
00166 string sourcename = external_source->name();
00167 if (sourcename.empty())
00168 throw Xapian::UnimplementedError("This PostingSource doesn't support remote use.");
00169 result += '!';
00170 result += encode_length(sourcename.length());
00171 result += sourcename;
00172 string sourcedata = external_source->serialise();
00173 result += encode_length(sourcedata.length());
00174 result += sourcedata;
00175 } else {
00176 result += "(";
00177 for (subquery_list::const_iterator i = subqs.begin();
00178 i != subqs.end();
00179 ++i) {
00180 result += (*i)->serialise(curpos);
00181 }
00182 switch (op) {
00183 case Xapian::Query::Internal::OP_LEAF:
00184 Assert(false);
00185 break;
00186 case Xapian::Query::Internal::OP_EXTERNAL_SOURCE:
00187 Assert(false);
00188 break;
00189 case Xapian::Query::OP_AND:
00190 result += "&";
00191 break;
00192 case Xapian::Query::OP_OR:
00193 result += "|";
00194 break;
00195 case Xapian::Query::OP_FILTER:
00196 result += "%";
00197 break;
00198 case Xapian::Query::OP_AND_MAYBE:
00199 result += "+";
00200 break;
00201 case Xapian::Query::OP_AND_NOT:
00202 result += "-";
00203 break;
00204 case Xapian::Query::OP_XOR:
00205 result += "^";
00206 break;
00207 case Xapian::Query::OP_NEAR:
00208 result += "~" + encode_length(parameter);
00209 break;
00210 case Xapian::Query::OP_PHRASE:
00211 result += "\"" + encode_length(parameter);
00212 break;
00213 case Xapian::Query::OP_ELITE_SET:
00214 result += "*" + encode_length(parameter);
00215 break;
00216 case Xapian::Query::OP_VALUE_RANGE:
00217 result += "]";
00218 result += encode_length(tname.length());
00219 result += tname;
00220 result += encode_length(str_parameter.length());
00221 result += str_parameter;
00222 result += encode_length(parameter);
00223 break;
00224 case Xapian::Query::OP_VALUE_GE:
00225 result += "}";
00226 result += encode_length(tname.length());
00227 result += tname;
00228 result += encode_length(parameter);
00229 break;
00230 case Xapian::Query::OP_VALUE_LE:
00231 result += "{";
00232 result += encode_length(tname.length());
00233 result += tname;
00234 result += encode_length(parameter);
00235 break;
00236 case Xapian::Query::OP_SCALE_WEIGHT:
00237 result += ".";
00238 result += str_parameter;
00239 break;
00240 case Xapian::Query::OP_SYNONYM:
00241 result += "=";
00242 break;
00243 }
00244 }
00245 return result;
00246 #else
00247 (void)curpos;
00248 throw Xapian::InternalError("query serialisation not compiled in");
00249 #endif
00250 }
00251
00252 string
00253 Xapian::Query::Internal::get_op_name(Xapian::Query::Internal::op_t op)
00254 {
00255 string name;
00256 switch (op) {
00257 case Xapian::Query::Internal::OP_EXTERNAL_SOURCE:
00258 name = "EXTERNAL_SOURCE"; break;
00259 case Xapian::Query::Internal::OP_LEAF: name = "LEAF"; break;
00260 case Xapian::Query::OP_AND: name = "AND"; break;
00261 case Xapian::Query::OP_OR: name = "OR"; break;
00262 case Xapian::Query::OP_FILTER: name = "FILTER"; break;
00263 case Xapian::Query::OP_AND_MAYBE: name = "AND_MAYBE"; break;
00264 case Xapian::Query::OP_AND_NOT: name = "AND_NOT"; break;
00265 case Xapian::Query::OP_XOR: name = "XOR"; break;
00266 case Xapian::Query::OP_NEAR: name = "NEAR"; break;
00267 case Xapian::Query::OP_PHRASE: name = "PHRASE"; break;
00268 case Xapian::Query::OP_ELITE_SET: name = "ELITE_SET"; break;
00269 case Xapian::Query::OP_VALUE_RANGE: name = "VALUE_RANGE"; break;
00270 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break;
00271 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break;
00272 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break;
00273 case Xapian::Query::OP_SYNONYM: name = "SYNONYM"; break;
00274 }
00275 return name;
00276 }
00277
00278 string
00279 Xapian::Query::Internal::get_description() const
00280 {
00281 string opstr;
00282
00283 if (is_leaf(op)) {
00284 if (term_pos != 0) {
00285 opstr += "pos=" + str(term_pos);
00286 }
00287
00288 if (parameter != 1) {
00289 if (!opstr.empty()) opstr += ",";
00290 opstr += "wqf=" + str(parameter);
00291 }
00292 if (!opstr.empty()) opstr = ":(" + opstr + ")";
00293 if (tname.empty()) return "<alldocuments>" + opstr;
00294 return tname + opstr;
00295 }
00296
00297 switch (op) {
00298 case Xapian::Query::OP_VALUE_RANGE:
00299 opstr = get_op_name(op);
00300 opstr += ' ';
00301 opstr += str(parameter);
00302 opstr += ' ';
00303 opstr += tname;
00304 opstr += ' ';
00305 opstr += str_parameter;
00306 return opstr;
00307 case Xapian::Query::OP_VALUE_GE:
00308 case Xapian::Query::OP_VALUE_LE:
00309 opstr = get_op_name(op);
00310 opstr += ' ';
00311 opstr += str(parameter);
00312 opstr += ' ';
00313 opstr += tname;
00314 return opstr;
00315 case Xapian::Query::OP_SCALE_WEIGHT:
00316 opstr += str(get_dbl_parameter());
00317 opstr += " * ";
00318 opstr += subqs[0]->get_description();
00319 return opstr;
00320 case Xapian::Query::Internal::OP_EXTERNAL_SOURCE:
00321 opstr = "PostingSource(";
00322 opstr += external_source->get_description();
00323 opstr += ')';
00324 return opstr;
00325 }
00326
00327 opstr = " " + get_op_name(op) + " ";
00328 if (op == Xapian::Query::OP_NEAR ||
00329 op == Xapian::Query::OP_PHRASE ||
00330 op == Xapian::Query::OP_ELITE_SET)
00331 opstr += str(parameter) + " ";
00332
00333 string description;
00334 subquery_list::const_iterator i;
00335 for (i = subqs.begin(); i != subqs.end(); i++) {
00336 if (!description.empty()) description += opstr;
00337 description += (**i).get_description();
00338 }
00339
00340 return "(" + description + ")";
00341 }
00342
00343 Xapian::termcount
00344 Xapian::Query::Internal::get_length() const
00345 {
00346 if (is_leaf(op)) {
00347
00348 return parameter;
00349 }
00350 Xapian::termcount len = 0;
00351 subquery_list::const_iterator i;
00352 for (i = subqs.begin(); i != subqs.end(); ++i) {
00353 len += (**i).get_length();
00354 }
00355 return len;
00356 }
00357
00359 void
00360 Xapian::Query::Internal::accumulate_terms(
00361 vector<pair<string, Xapian::termpos> > &terms) const
00362 {
00363 if (is_leaf(op)) {
00364
00365
00366 if (!tname.empty())
00367 terms.push_back(make_pair(tname, term_pos));
00368 } else {
00369 subquery_list::const_iterator end = subqs.end();
00370
00371 for (subquery_list::const_iterator i = subqs.begin(); i != end; ++i) {
00372 (*i)->accumulate_terms(terms);
00373 }
00374 }
00375 }
00376
00377 struct LessByTermpos {
00378 typedef const pair<string, Xapian::termpos> argtype;
00379 bool operator()(argtype &left, argtype &right) {
00380 if (left.second != right.second) {
00381 return left.second < right.second;
00382 } else {
00383 return left.first < right.first;
00384 }
00385 }
00386 };
00387
00388 Xapian::TermIterator
00389 Xapian::Query::Internal::get_terms() const
00390 {
00391 vector<pair<string, Xapian::termpos> > terms;
00392 accumulate_terms(terms);
00393
00394 sort(terms.begin(), terms.end(), LessByTermpos());
00395
00396
00397
00398 vector<pair<string, Xapian::termpos> >::iterator newlast =
00399 unique(terms.begin(), terms.end());
00400
00401 terms.erase(newlast, terms.end());
00402
00403 vector<string> result;
00404 vector<pair<string, Xapian::termpos> >::const_iterator i;
00405 for (i = terms.begin(); i != terms.end(); ++i) {
00406 result.push_back(i->first);
00407 }
00408
00409 return Xapian::TermIterator(new VectorTermList(result.begin(),
00410 result.end()));
00411 }
00412
00413 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00414
00415
00416 class QUnserial {
00417 private:
00418 const char *p;
00419 const char *end;
00420 Xapian::termpos curpos;
00421 const Xapian::Registry & reg;
00422
00423 Xapian::Query::Internal * readquery();
00424 Xapian::Query::Internal * readexternal();
00425 Xapian::Query::Internal * readcompound();
00426
00427 public:
00428 QUnserial(const string & s, const Xapian::Registry & reg_)
00429 : p(s.c_str()), end(p + s.size()), curpos(1), reg(reg_) { }
00430 Xapian::Query::Internal * decode();
00431 };
00432
00433 Xapian::Query::Internal *
00434 QUnserial::decode() {
00435 LOGLINE(UNKNOWN, "QUnserial::decode(" << p << ")");
00436 AutoPtr<Xapian::Query::Internal> qint(readquery());
00437 if (p != end)
00438 throw Xapian::InvalidArgumentError("Bad serialised query");
00439 return qint.release();
00440 }
00441
00442 Xapian::Query::Internal *
00443 QUnserial::readquery() {
00444 if (p == end)
00445 throw Xapian::InvalidArgumentError("Bad serialised query");
00446 switch (*p++) {
00447 case '[': {
00448 size_t length = decode_length(&p, end, true);
00449 string tname(p, length);
00450 p += length;
00451 Xapian::termpos term_pos = curpos;
00452 Xapian::termcount wqf = 1;
00453 if (p != end) {
00454 if (*p == '@') {
00455 ++p;
00456 term_pos = decode_length(&p, end, false);
00457 }
00458 if (*p == '#') {
00459 ++p;
00460 wqf = decode_length(&p, end, false);
00461 }
00462 }
00463 ++curpos;
00464 return new Xapian::Query::Internal(tname, wqf, term_pos);
00465 }
00466 case '!':
00467 return readexternal();
00468 case '(':
00469 return readcompound();
00470 default:
00471 LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00472 throw Xapian::InvalidArgumentError("Invalid query string");
00473 }
00474 }
00475
00476 Xapian::Query::Internal *
00477 QUnserial::readexternal()
00478 {
00479 if (p == end)
00480 throw Xapian::InvalidArgumentError("Bad serialised query");
00481
00482 size_t length = decode_length(&p, end, true);
00483 string sourcename(p, length);
00484 const Xapian::PostingSource * source = reg.get_posting_source(sourcename);
00485 if (source == NULL) {
00486 throw Xapian::InvalidArgumentError("PostingSource " + sourcename +
00487 " not registered");
00488 }
00489
00490 p += length;
00491 length = decode_length(&p, end, true);
00492 string sourcedata(p, length);
00493 p += length;
00494
00495 return new Xapian::Query::Internal(source->unserialise(sourcedata), true);
00496 }
00497
00498 static Xapian::Query::Internal *
00499 qint_from_vector(Xapian::Query::op op,
00500 const vector<Xapian::Query::Internal *> & vec,
00501 Xapian::termcount parameter = 0)
00502 {
00503 Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00504 vector<Xapian::Query::Internal *>::const_iterator i;
00505 for (i = vec.begin(); i != vec.end(); i++) {
00506 qint->add_subquery_nocopy(*i);
00507 }
00508 Xapian::Query::Internal * r = qint->end_construction();
00509
00510
00511 AssertEq(r, qint);
00512 return r;
00513 }
00514
00515 static Xapian::Query::Internal *
00516 qint_from_vector(Xapian::Query::op op,
00517 const vector<Xapian::Query::Internal *> & vec,
00518 Xapian::termcount parameter,
00519 double dbl_parameter)
00520 {
00521 Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00522 qint->set_dbl_parameter(dbl_parameter);
00523 vector<Xapian::Query::Internal *>::const_iterator i;
00524 for (i = vec.begin(); i != vec.end(); i++) {
00525 qint->add_subquery_nocopy(*i);
00526 }
00527 Xapian::Query::Internal * r = qint->end_construction();
00528
00529
00530 AssertEq(r, qint);
00531 return r;
00532 }
00533
00534 Xapian::Query::Internal *
00535 QUnserial::readcompound() {
00536 vector<Xapian::Query::Internal *> subqs;
00537 try {
00538 while (true) {
00539 if (p == end)
00540 throw Xapian::InvalidArgumentError("Bad serialised query");
00541 switch (*p++) {
00542 case '[':
00543 --p;
00544 subqs.push_back(readquery());
00545 break;
00546 case '!':
00547 subqs.push_back(readexternal());
00548 break;
00549 case '(': {
00550 subqs.push_back(readcompound());
00551 break;
00552 }
00553 case '&':
00554 return qint_from_vector(Xapian::Query::OP_AND, subqs);
00555 case '|':
00556 return qint_from_vector(Xapian::Query::OP_OR, subqs);
00557 case '%':
00558 return qint_from_vector(Xapian::Query::OP_FILTER, subqs);
00559 case '^':
00560 return qint_from_vector(Xapian::Query::OP_XOR, subqs);
00561 case '+':
00562 return qint_from_vector(Xapian::Query::OP_AND_MAYBE, subqs);
00563 case '-':
00564 return qint_from_vector(Xapian::Query::OP_AND_NOT, subqs);
00565 case '~': {
00566 Xapian::termcount window(decode_length(&p, end, false));
00567 return qint_from_vector(Xapian::Query::OP_NEAR, subqs, window);
00568 }
00569 case '"': {
00570 Xapian::termcount window(decode_length(&p, end, false));
00571 return qint_from_vector(Xapian::Query::OP_PHRASE, subqs, window);
00572 }
00573 case '*': {
00574 Xapian::termcount elite_set_size(decode_length(&p, end, false));
00575 return qint_from_vector(Xapian::Query::OP_ELITE_SET, subqs,
00576 elite_set_size);
00577 }
00578 case ']': {
00579 size_t len = decode_length(&p, end, true);
00580 string start(p, len);
00581 p += len;
00582 len = decode_length(&p, end, true);
00583 string stop(p, len);
00584 p += len;
00585 Xapian::valueno slot(decode_length(&p, end, false));
00586 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_RANGE, slot,
00587 start, stop);
00588 }
00589 case '}': {
00590 size_t len = decode_length(&p, end, true);
00591 string start(p, len);
00592 p += len;
00593 Xapian::valueno slot(decode_length(&p, end, false));
00594 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_GE, slot,
00595 start);
00596 }
00597 case '{': {
00598 size_t len = decode_length(&p, end, true);
00599 string start(p, len);
00600 p += len;
00601 Xapian::valueno slot(decode_length(&p, end, false));
00602 return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_LE, slot,
00603 start);
00604 }
00605 case '.': {
00606 double param = unserialise_double(&p, end);
00607 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
00608 subqs, 0, param);
00609 }
00610 case '=': {
00611 return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs);
00612 }
00613 default:
00614 LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00615 throw Xapian::InvalidArgumentError("Invalid query string");
00616 }
00617 }
00618 } catch (...) {
00619 vector<Xapian::Query::Internal *>::iterator i;
00620 for (i = subqs.begin(); i != subqs.end(); i++)
00621 delete *i;
00622 throw;
00623 }
00624 }
00625
00626 Xapian::Query::Internal *
00627 Xapian::Query::Internal::unserialise(const string &s,
00628 const Xapian::Registry & reg)
00629 {
00630 Assert(s.length() > 1);
00631 QUnserial u(s, reg);
00632 Xapian::Query::Internal * qint = u.decode();
00633 AssertEq(s, qint->serialise());
00634 return qint;
00635 }
00636 #else
00637 Xapian::Query::Internal *
00638 Xapian::Query::Internal::unserialise(const string &, const Xapian::Registry &)
00639 {
00640 throw Xapian::InternalError("query serialisation not compiled in");
00641 }
00642 #endif
00643
00644 Xapian::Query::Internal::Internal(const Xapian::Query::Internal ©me)
00645 : Xapian::Internal::RefCntBase(),
00646 op(copyme.op),
00647 subqs(),
00648 parameter(copyme.parameter),
00649 tname(copyme.tname),
00650 str_parameter(copyme.str_parameter),
00651 term_pos(copyme.term_pos),
00652 external_source(NULL),
00653 external_source_owned(false)
00654 {
00655 for (subquery_list::const_iterator i = copyme.subqs.begin();
00656 i != copyme.subqs.end();
00657 ++i) {
00658 subqs.push_back(new Xapian::Query::Internal(**i));
00659 }
00660 if (copyme.external_source) {
00661 external_source = copyme.external_source->clone();
00662 if (external_source == NULL) {
00663 external_source = copyme.external_source;
00664 external_source_owned = false;
00665 } else {
00666 external_source_owned = true;
00667 }
00668 }
00669 }
00670
00672
00673
00674 Xapian::Query::Internal::Internal(const string & tname_, Xapian::termcount wqf_,
00675 Xapian::termpos term_pos_)
00676 : op(Xapian::Query::Internal::OP_LEAF),
00677 subqs(),
00678 parameter(wqf_),
00679 tname(tname_),
00680 term_pos(term_pos_),
00681 external_source(NULL),
00682 external_source_owned(false)
00683 {
00684 validate_query();
00685 }
00686
00687 Xapian::Query::Internal::Internal(op_t op_, Xapian::termcount parameter_)
00688 : op(op_),
00689 subqs(),
00690 parameter(parameter_),
00691 tname(),
00692 term_pos(0),
00693 external_source(NULL),
00694 external_source_owned(false)
00695 {
00696 if (parameter != 0 && op != OP_PHRASE && op != OP_NEAR && op != OP_ELITE_SET)
00697 throw Xapian::InvalidArgumentError("parameter is only meaningful for OP_NEAR, OP_PHRASE, or OP_ELITE_SET");
00698 }
00699
00700 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno slot,
00701 const string &begin, const string &end)
00702 : op(op_),
00703 parameter(Xapian::termcount(slot)),
00704 tname(begin),
00705 str_parameter(end),
00706 external_source(NULL),
00707 external_source_owned(false)
00708 {
00709 if (op != OP_VALUE_RANGE)
00710 throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_RANGE");
00711 validate_query();
00712 }
00713
00714 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno slot,
00715 const std::string &value)
00716 : op(op_),
00717 parameter(Xapian::termcount(slot)),
00718 tname(value),
00719 external_source(NULL),
00720 external_source_owned(false)
00721 {
00722 if (op != OP_VALUE_GE && op != OP_VALUE_LE)
00723 throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_GE or OP_VALUE_LE");
00724 if (op == OP_VALUE_GE && value.empty()) {
00725
00726 op = OP_LEAF;
00727 parameter = 1;
00728 term_pos = 0;
00729 }
00730 validate_query();
00731 }
00732
00733 Xapian::Query::Internal::Internal(PostingSource * external_source_, bool owned)
00734 : op(OP_EXTERNAL_SOURCE), external_source(external_source_),
00735 external_source_owned(owned)
00736 {
00737 Assert(external_source);
00738 }
00739
00740 Xapian::Query::Internal::~Internal()
00741 {
00742 subquery_list::iterator i;
00743 for (i = subqs.begin(); i != subqs.end(); i++) {
00744 delete *i;
00745 }
00746 if (external_source_owned) {
00747 delete external_source;
00748 }
00749 }
00750
00751 Xapian::Query::Internal *
00752 Xapian::Query::Internal::end_construction()
00753 {
00754 LOGCALL_VOID(MATCH, "Xapian::Query::Internal::end_construction", NO_ARGS);
00755 validate_query();
00756 Xapian::Query::Internal * qint = simplify_query();
00757 if (qint) qint->validate_query();
00758 return qint;
00759 }
00760
00761 void
00762 Xapian::Query::Internal::validate_query() const
00763 {
00764 LOGCALL_VOID(MATCH, "Xapian::Query::Internal::validate_query", NO_ARGS);
00765
00766
00767 if (subqs.size() < get_min_subqs(op) ||
00768 subqs.size() > get_max_subqs(op)) {
00769 throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) +
00770 " requires a minimum of " + str(get_min_subqs(op)) +
00771 " and a maximum of " + str(get_max_subqs(op)) +
00772 " sub queries, had " +
00773 str(subqs.size()) + ".");
00774 }
00775
00776 if (op == OP_SCALE_WEIGHT && get_dbl_parameter() < 0) {
00777 throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) + " requires a non-negative parameter.");
00778 }
00779
00780
00781
00782 Assert(is_leaf(op) ||
00783 op == OP_VALUE_RANGE ||
00784 op == OP_VALUE_GE ||
00785 op == OP_VALUE_LE ||
00786 tname.empty());
00787 }
00788
00789 bool
00790 Xapian::Query::Internal::simplify_matchnothing()
00791 {
00792 subquery_list::iterator sq;
00793 switch (op) {
00794 case OP_PHRASE:
00795 case OP_NEAR:
00796 case OP_AND:
00797 case OP_FILTER:
00798
00799
00800 for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00801 if (*sq == 0) {
00802 for (sq = subqs.begin(); sq != subqs.end(); sq++)
00803 delete *sq;
00804 subqs.clear();
00805 return true;
00806 }
00807 }
00808 break;
00809 case OP_ELITE_SET:
00810 case OP_OR:
00811 case OP_XOR:
00812 case OP_SYNONYM:
00813
00814
00815
00816
00817 sq = subqs.begin();
00818 while (sq != subqs.end() && subqs.size() > 1) {
00819 if (*sq == 0) {
00820 sq = subqs.erase(sq);
00821 } else {
00822 ++sq;
00823 }
00824 }
00825 break;
00826 case OP_AND_MAYBE:
00827 case OP_AND_NOT:
00828
00829
00830
00831
00832
00833
00834 Assert(subqs.size() == 2);
00835 if (subqs[0] == 0 || subqs[1] == 0) {
00836 sq = subqs.begin();
00837 ++sq;
00838 delete *sq;
00839 subqs.erase(sq);
00840 }
00841 break;
00842 case OP_SCALE_WEIGHT:
00843 Assert(subqs.size() == 1);
00844
00845
00846 Assert(subqs[0]);
00847 break;
00848 case OP_LEAF:
00849
00850 break;
00851 }
00852 return false;
00853 }
00854
00855 Xapian::Query::Internal *
00856 Xapian::Query::Internal::simplify_query()
00857 {
00858 LOGCALL(MATCH, Xapian::Query::Internal *, "Xapian::Query::Internal::simplify_query", NO_ARGS);
00859
00860
00861 if (simplify_matchnothing()) {
00862 return 0;
00863 }
00864
00865
00866 switch (op) {
00867 case OP_LEAF:
00868 return this;
00869 case OP_VALUE_RANGE:
00870
00871
00872 if (tname > str_parameter) return 0;
00873 return this;
00874 case OP_VALUE_GE:
00875 case OP_VALUE_LE:
00876 return this;
00877 case OP_SCALE_WEIGHT:
00878 if (fabs(get_dbl_parameter() - 1.0) > DBL_EPSILON) return this;
00879
00880
00881 break;
00882 case OP_PHRASE: case OP_NEAR:
00883
00884
00885 if (subqs.size() <= 1) break;
00886
00887
00888 if (!parameter) parameter = subqs.size();
00889
00890
00891 return flatten_subqs();
00892 case OP_ELITE_SET:
00893 if (!parameter) {
00894
00895
00896 if (subqs.size() <= 100) {
00897 parameter = 10;
00898 } else {
00899 parameter = Xapian::termcount(ceil(sqrt(double(subqs.size()))));
00900 Assert(parameter > 10);
00901 }
00902 }
00903 break;
00904 case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM:
00905
00906 if (subqs.size() > 1) collapse_subqs();
00907 break;
00908 default:
00909 break;
00910 }
00911
00912
00913 if (subqs.empty())
00914 return 0;
00915
00916
00917
00918 if (subqs.size() == 1) {
00919 Xapian::Query::Internal * qint = subqs[0];
00920 subqs[0] = 0;
00921 return qint;
00922 }
00923
00924 return this;
00925 }
00926
00927 struct SortPosName {
00928 bool operator()(const Xapian::Query::Internal * left,
00929 const Xapian::Query::Internal * right) const {
00930 Assert(left != 0);
00931 Assert(right != 0);
00932 Assert(is_leaf(left->op));
00933 Assert(is_leaf(right->op));
00934 if (left->term_pos != right->term_pos) {
00935 return left->term_pos < right->term_pos;
00936 } else {
00937 return left->tname < right->tname;
00938 }
00939 }
00940 };
00941
00945 void
00946 Xapian::Query::Internal::collapse_subqs()
00947 {
00948 Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM);
00949 typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
00950 subqtable sqtab;
00951
00952 subquery_list::iterator sq = subqs.begin();
00953 while (sq != subqs.end()) {
00954 Assert(*sq != 0);
00955 if (is_leaf((*sq)->op)) {
00956 Assert((*sq)->subqs.empty());
00957 subqtable::iterator s = sqtab.find(*sq);
00958 if (s == sqtab.end()) {
00959 sqtab.insert(*sq);
00960 ++sq;
00961 } else {
00962 AssertEq((*s)->tname, (*sq)->tname);
00963 AssertEq((*s)->term_pos, (*sq)->term_pos);
00964
00965 (*s)->parameter += (*sq)->parameter;
00966
00967
00968
00969 delete *sq;
00970 sq = subqs.erase(sq);
00971 }
00972 } else {
00973 ++sq;
00974 }
00975 }
00976 }
00977
00979 Xapian::Query::Internal *
00980 Xapian::Query::Internal::flatten_subqs()
00981 {
00982 Assert(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE);
00983
00984 subquery_list::iterator sq;
00985 for (sq = subqs.begin(); sq != subqs.end(); ++sq) {
00986 if (!is_leaf((*sq)->op)) break;
00987 }
00988
00989 if (sq == subqs.end()) return this;
00990
00991 if ((*sq)->op == Xapian::Query::OP_NEAR ||
00992 (*sq)->op == Xapian::Query::OP_PHRASE) {
00993
00994 throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE");
00995 }
00996
00997 AutoPtr<Xapian::Query::Internal> flattenme(*sq);
00998 *sq = 0;
00999
01000 subquery_list::iterator j;
01001 for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) {
01002 *sq = *j;
01003 *j = 0;
01004 AutoPtr<Xapian::Query::Internal> newq(new Xapian::Query::Internal(*this));
01005 delete *sq;
01006 *sq = 0;
01007 newq.reset(newq->flatten_subqs());
01008 *j = newq.release();
01009 }
01010
01011 if (flattenme->op == OP_AND ||
01012 flattenme->op == OP_OR ||
01013 flattenme->op == OP_XOR) {
01014 size_t i = flattenme->subqs.size();
01015 do {
01016 --i;
01017 Xapian::Query::Internal * q = flattenme->subqs[i];
01018 if (flattenme->op == q->op) {
01019 subquery_list::iterator k;
01020 for (k = q->subqs.begin(), ++k;
01021 k != q->subqs.end();
01022 ++k) {
01023 flattenme->subqs.push_back(0);
01024 flattenme->subqs.back() = *k;
01025 *k = 0;
01026 }
01027 flattenme->subqs[i] = q->subqs[0];
01028 q->subqs.clear();
01029 delete q;
01030 }
01031 } while (i != 0);
01032 }
01033
01034 return flattenme.release();
01035 }
01036
01037 void
01038 Xapian::Query::Internal::add_subquery(const Xapian::Query::Internal * subq)
01039 {
01040 Assert(!is_leaf(op));
01041 if (subq == 0) {
01042 subqs.push_back(0);
01043 } else if (op == subq->op && is_distributable(op)) {
01044
01045 for (subquery_list::const_iterator i = subq->subqs.begin();
01046 i != subq->subqs.end(); i++) {
01047 add_subquery(*i);
01048 }
01049 } else {
01050 subqs.push_back(new Xapian::Query::Internal(*subq));
01051 }
01052 }
01053
01054 void
01055 Xapian::Query::Internal::add_subquery_nocopy(Xapian::Query::Internal * subq)
01056 {
01057 Assert(!is_leaf(op));
01058 if (subq == 0) {
01059 subqs.push_back(0);
01060 } else if (op == subq->op && is_distributable(op)) {
01061
01062 for (subquery_list::const_iterator i = subq->subqs.begin();
01063 i != subq->subqs.end(); i++) {
01064 add_subquery(*i);
01065 }
01066 delete subq;
01067 } else {
01068 subqs.push_back(subq);
01069 }
01070 }
01071
01072 void
01073 Xapian::Query::Internal::set_dbl_parameter(double dbl_parameter_)
01074 {
01075
01076
01077
01078
01079 str_parameter = serialise_double(dbl_parameter_);
01080 }
01081
01082 double
01083 Xapian::Query::Internal::get_dbl_parameter() const
01084 {
01085
01086
01087
01088
01089 const char * p = str_parameter.data();
01090 const char * end = p + str_parameter.size();
01091 return unserialise_double(&p, end);
01092 }