api/omqueryinternal.cc

Go to the documentation of this file.
00001 /* omqueryinternal.cc: Internals of query interface
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
00006  * Copyright 2006,2007,2008 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 
00026 #include "autoptr.h"
00027 #include "omdebug.h"
00028 #include "omqueryinternal.h"
00029 #include "utils.h"
00030 #include "serialise.h"
00031 #include "serialise-double.h"
00032 
00033 #include <xapian/error.h>
00034 #include <xapian/termiterator.h>
00035 #include <xapian/version.h>
00036 #include "vectortermlist.h"
00037 
00038 #include <vector>
00039 #include <set>
00040 #include <algorithm>
00041 #include <math.h>
00042 #include <limits.h>
00043 #include <cfloat>
00044 
00045 using namespace std;
00046 
00047 // Properties for query operations.
00048 
00049 static unsigned int
00050 get_min_subqs(Xapian::Query::Internal::op_t op)
00051 {
00052     switch (op) {
00053         case Xapian::Query::Internal::OP_LEAF:
00054         case Xapian::Query::OP_AND:
00055         case Xapian::Query::OP_OR:
00056         case Xapian::Query::OP_XOR:
00057         case Xapian::Query::OP_NEAR:
00058         case Xapian::Query::OP_PHRASE:
00059         case Xapian::Query::OP_ELITE_SET:
00060         case Xapian::Query::OP_VALUE_RANGE:
00061         case Xapian::Query::OP_VALUE_GE:
00062         case Xapian::Query::OP_VALUE_LE:
00063             return 0;
00064         case Xapian::Query::OP_SCALE_WEIGHT:
00065             return 1;
00066         case Xapian::Query::OP_FILTER:
00067         case Xapian::Query::OP_AND_MAYBE:
00068         case Xapian::Query::OP_AND_NOT:
00069             return 2;
00070         default:
00071             Assert(false);
00072             throw Xapian::InvalidOperationError("get_min_subqs called with invalid operator type");
00073     }
00074 }
00075 
00076 static unsigned int
00077 get_max_subqs(Xapian::Query::Internal::op_t op)
00078 {
00079     switch (op) {
00080         case Xapian::Query::Internal::OP_LEAF:
00081         case Xapian::Query::OP_VALUE_RANGE:
00082         case Xapian::Query::OP_VALUE_GE:
00083         case Xapian::Query::OP_VALUE_LE:
00084             return 0;
00085         case Xapian::Query::OP_SCALE_WEIGHT:
00086             return 1;
00087         case Xapian::Query::OP_FILTER:
00088         case Xapian::Query::OP_AND_MAYBE:
00089         case Xapian::Query::OP_AND_NOT:
00090             return 2;
00091         case Xapian::Query::OP_AND:
00092         case Xapian::Query::OP_OR:
00093         case Xapian::Query::OP_XOR:
00094         case Xapian::Query::OP_NEAR:
00095         case Xapian::Query::OP_PHRASE:
00096         case Xapian::Query::OP_ELITE_SET:
00097             return UINT_MAX;
00098         default:
00099             Assert(false);
00100             throw Xapian::InvalidOperationError("get_max_subqs called with invalid operator type");
00101     }
00102 }
00103 
00104 static inline bool
00105 is_leaf(Xapian::Query::Internal::op_t op)
00106 {
00107     return (op == Xapian::Query::Internal::OP_LEAF);
00108 }
00109 
00110 // Methods for Xapian::Query::Internal
00111 
00130 string
00131 Xapian::Query::Internal::serialise(Xapian::termpos & curpos) const
00132 {
00133 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00134     string result;
00135 
00136     if (op == Xapian::Query::Internal::OP_LEAF) {
00137         result += '[';
00138         result += encode_length(tname.length());
00139         result += tname;
00140         if (term_pos != curpos) result += '@' + om_tostring(term_pos);
00141         if (wqf != 1) result += '#' + om_tostring(wqf);
00142         ++curpos;
00143     } else {
00144         result += "(";
00145         for (subquery_list::const_iterator i = subqs.begin();
00146              i != subqs.end();
00147              ++i) {
00148             result += (*i)->serialise(curpos);
00149         }
00150         switch (op) {
00151             case Xapian::Query::Internal::OP_LEAF:
00152                 Assert(false);
00153                 break;
00154             case Xapian::Query::OP_AND:
00155                 result += "&";
00156                 break;
00157             case Xapian::Query::OP_OR:
00158                 result += "|";
00159                 break;
00160             case Xapian::Query::OP_FILTER:
00161                 result += "%";
00162                 break;
00163             case Xapian::Query::OP_AND_MAYBE:
00164                 result += "+";
00165                 break;
00166             case Xapian::Query::OP_AND_NOT:
00167                 result += "-";
00168                 break;
00169             case Xapian::Query::OP_XOR:
00170                 result += "^";
00171                 break;
00172             case Xapian::Query::OP_NEAR:
00173                 result += "~" + om_tostring(parameter);
00174                 break;
00175             case Xapian::Query::OP_PHRASE:
00176                 result += "\"" + om_tostring(parameter);
00177                 break;
00178             case Xapian::Query::OP_ELITE_SET:
00179                 result += "*" + om_tostring(parameter);
00180                 break;
00181             case Xapian::Query::OP_VALUE_RANGE:
00182                 result += "]";
00183                 result += encode_length(tname.length());
00184                 result += tname;
00185                 result += encode_length(str_parameter.length());
00186                 result += str_parameter;
00187                 result += om_tostring(parameter);
00188                 break;
00189             case Xapian::Query::OP_VALUE_GE:
00190                 result += "}";
00191                 result += encode_length(tname.length());
00192                 result += tname;
00193                 result += om_tostring(parameter);
00194                 break;
00195             case Xapian::Query::OP_VALUE_LE:
00196                 result += "{";
00197                 result += encode_length(tname.length());
00198                 result += tname;
00199                 result += om_tostring(parameter);
00200                 break;
00201             case Xapian::Query::OP_SCALE_WEIGHT:
00202                 result += ".";
00203                 result += str_parameter; // serialise_double(get_dbl_parameter());
00204                 break;
00205         }
00206     }
00207     return result;
00208 #else
00209     (void)curpos;
00210     throw Xapian::InternalError("query serialisation not compiled in");
00211 #endif
00212 }
00213 
00214 string
00215 Xapian::Query::Internal::get_op_name(Xapian::Query::Internal::op_t op)
00216 {
00217     string name;
00218     switch (op) {
00219         case Xapian::Query::Internal::OP_LEAF:  name = "LEAF"; break;
00220         case Xapian::Query::OP_AND:             name = "AND"; break;
00221         case Xapian::Query::OP_OR:              name = "OR"; break;
00222         case Xapian::Query::OP_FILTER:          name = "FILTER"; break;
00223         case Xapian::Query::OP_AND_MAYBE:       name = "AND_MAYBE"; break;
00224         case Xapian::Query::OP_AND_NOT:         name = "AND_NOT"; break;
00225         case Xapian::Query::OP_XOR:             name = "XOR"; break;
00226         case Xapian::Query::OP_NEAR:            name = "NEAR"; break;
00227         case Xapian::Query::OP_PHRASE:          name = "PHRASE"; break;
00228         case Xapian::Query::OP_ELITE_SET:       name = "ELITE_SET"; break;
00229         case Xapian::Query::OP_VALUE_RANGE:     name = "VALUE_RANGE"; break;
00230         case Xapian::Query::OP_VALUE_GE:        name = "VALUE_GE"; break;
00231         case Xapian::Query::OP_VALUE_LE:        name = "VALUE_LE"; break;
00232         case Xapian::Query::OP_SCALE_WEIGHT:    name = "SCALE_WEIGHT"; break;
00233     }
00234     return name;
00235 }
00236 
00237 string
00238 Xapian::Query::Internal::get_description() const
00239 {
00240     string opstr;
00241 
00242     if (is_leaf(op)) {
00243         if (term_pos != 0) {
00244             opstr += "pos=" + om_tostring(term_pos);
00245         }
00246         if (wqf != 1) {
00247             if (!opstr.empty()) opstr += ",";
00248             opstr += "wqf=" + om_tostring(wqf);
00249         }
00250         if (!opstr.empty()) opstr = ":(" + opstr + ")";
00251         if (tname.empty()) return "<alldocuments>" + opstr;
00252         return tname + opstr;
00253     }
00254 
00255     switch (op) {
00256         case Xapian::Query::OP_VALUE_RANGE: {
00257             opstr = get_op_name(op);
00258             opstr += ' ';
00259             opstr += om_tostring(parameter);
00260             opstr += ' ';
00261             opstr += tname;
00262             opstr += ' ';
00263             opstr += str_parameter;
00264             return opstr;
00265         }
00266         case Xapian::Query::OP_VALUE_GE:
00267         case Xapian::Query::OP_VALUE_LE: {
00268             opstr = get_op_name(op);
00269             opstr += ' ';
00270             opstr += om_tostring(parameter);
00271             opstr += ' ';
00272             opstr += tname;
00273             return opstr;
00274         }
00275         case Xapian::Query::OP_SCALE_WEIGHT: {
00276             opstr += om_tostring(get_dbl_parameter());
00277             opstr += " * ";
00278             opstr += subqs[0]->get_description();
00279             return opstr;
00280         }
00281     }
00282 
00283     opstr = " " + get_op_name(op) + " ";
00284     if (op == Xapian::Query::OP_NEAR ||
00285         op == Xapian::Query::OP_PHRASE ||
00286         op == Xapian::Query::OP_ELITE_SET)
00287         opstr += om_tostring(parameter) + " ";
00288 
00289     string description;
00290     subquery_list::const_iterator i;
00291     for (i = subqs.begin(); i != subqs.end(); i++) {
00292         if (!description.empty()) description += opstr;
00293         description += (**i).get_description();
00294     }
00295 
00296     return "(" + description + ")";
00297 }
00298 
00299 Xapian::termcount
00300 Xapian::Query::Internal::get_length() const
00301 {
00302     if (is_leaf(op)) return wqf;
00303     Xapian::termcount len = 0;
00304     subquery_list::const_iterator i;
00305     for (i = subqs.begin(); i != subqs.end(); ++i) {
00306         len += (**i).get_length();
00307     }
00308     return len;
00309 }
00310 
00312 void
00313 Xapian::Query::Internal::accumulate_terms(
00314                         vector<pair<string, Xapian::termpos> > &terms) const
00315 {
00316     if (is_leaf(op)) {
00317         // We're a leaf, so just return our term, but skip Query::MatchAllTerms
00318         // (which is Query("")).
00319         if (!tname.empty())
00320             terms.push_back(make_pair(tname, term_pos));
00321     } else {
00322         subquery_list::const_iterator end = subqs.end();
00323         // Not a leaf, concatenate results from all subqueries.
00324         for (subquery_list::const_iterator i = subqs.begin(); i != end; ++i) {
00325             (*i)->accumulate_terms(terms);
00326         }
00327     }
00328 }
00329 
00330 struct LessByTermpos {
00331     typedef const pair<string, Xapian::termpos> argtype;
00332     bool operator()(argtype &left, argtype &right) {
00333         if (left.second != right.second) {
00334             return left.second < right.second;
00335         } else {
00336             return left.first < right.first;
00337         }
00338     }
00339 };
00340 
00341 Xapian::TermIterator
00342 Xapian::Query::Internal::get_terms() const
00343 {
00344     vector<pair<string, Xapian::termpos> > terms;
00345     accumulate_terms(terms);
00346 
00347     sort(terms.begin(), terms.end(), LessByTermpos());
00348 
00349     // remove adjacent duplicates, and return an iterator pointing
00350     // to just after the last unique element
00351     vector<pair<string, Xapian::termpos> >::iterator newlast =
00352                 unique(terms.begin(), terms.end());
00353     // and remove the rest...  (See Stroustrup 18.6.3)
00354     terms.erase(newlast, terms.end());
00355 
00356     vector<string> result;
00357     vector<pair<string, Xapian::termpos> >::const_iterator i;
00358     for (i = terms.begin(); i != terms.end(); ++i) {
00359         result.push_back(i->first);
00360     }
00361 
00362     return Xapian::TermIterator(new VectorTermList(result.begin(),
00363                                                    result.end()));
00364 }
00365 
00366 #ifdef XAPIAN_HAS_REMOTE_BACKEND
00367 // Methods.
00368 
00369 class QUnserial {
00370   private:
00371     const char *p;
00372     const char *end;
00373     Xapian::termpos curpos;
00374 
00375     Xapian::Query::Internal * readquery();
00376     Xapian::Query::Internal * readcompound();
00377 
00378   public:
00379     QUnserial(const string & s) : p(s.c_str()), end(p + s.size()), curpos(1) { }
00380     Xapian::Query::Internal * decode();
00381 };
00382 
00383 Xapian::Query::Internal *
00384 QUnserial::decode() {
00385     DEBUGLINE(UNKNOWN, "QUnserial::decode(" << p << ")");
00386     AutoPtr<Xapian::Query::Internal> qint(readquery());
00387     if (p != end)
00388         throw Xapian::InvalidArgumentError("Bad serialised query");
00389     return qint.release();
00390 }
00391 
00392 Xapian::Query::Internal *
00393 QUnserial::readquery() {
00394     if (p == end)
00395         throw Xapian::InvalidArgumentError("Bad serialised query");
00396     switch (*p++) {
00397         case '[': {
00398             size_t length = decode_length(&p, end, true);
00399             string tname(p, length);
00400             p += length;
00401             Xapian::termpos term_pos = curpos;
00402             Xapian::termcount wqf = 1;
00403             if (p != end) {
00404                 if (*p == '@') {
00405                     char *tmp; // avoid compiler warning
00406                     term_pos = strtol(p + 1, &tmp, 10);
00407                     p = tmp;
00408                 }
00409                 if (*p == '#') {
00410                     char *tmp; // avoid compiler warning
00411                     wqf = strtol(p + 1, &tmp, 10);
00412                     p = tmp;
00413                 }
00414             }
00415             ++curpos;
00416             return new Xapian::Query::Internal(tname, wqf, term_pos);
00417         }
00418         case '(':
00419             return readcompound();
00420         default:
00421             DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00422             throw Xapian::InvalidArgumentError("Invalid query string");
00423     }
00424 }
00425 
00426 static Xapian::Query::Internal *
00427 qint_from_vector(Xapian::Query::op op,
00428                  const vector<Xapian::Query::Internal *> & vec,
00429                  Xapian::termcount parameter = 0)
00430 {
00431     Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00432     vector<Xapian::Query::Internal *>::const_iterator i;
00433     for (i = vec.begin(); i != vec.end(); i++) {
00434         qint->add_subquery_nocopy(*i);
00435     }
00436     Xapian::Query::Internal * r = qint->end_construction();
00437     // We're only called during unserialisation, so no simplification should
00438     // happen.
00439     AssertEq(r, qint);
00440     return r;
00441 }
00442 
00443 static Xapian::Query::Internal *
00444 qint_from_vector(Xapian::Query::op op,
00445                  const vector<Xapian::Query::Internal *> & vec,
00446                  Xapian::termcount parameter,
00447                  double dbl_parameter)
00448 {
00449     Xapian::Query::Internal * qint = new Xapian::Query::Internal(op, parameter);
00450     qint->set_dbl_parameter(dbl_parameter);
00451     vector<Xapian::Query::Internal *>::const_iterator i;
00452     for (i = vec.begin(); i != vec.end(); i++) {
00453         qint->add_subquery_nocopy(*i);
00454     }
00455     Xapian::Query::Internal * r = qint->end_construction();
00456     // We're only called during unserialisation, so no simplification should
00457     // happen.
00458     AssertEq(r, qint);
00459     return r;
00460 }
00461 
00462 Xapian::Query::Internal *
00463 QUnserial::readcompound() {
00464     vector<Xapian::Query::Internal *> subqs;
00465     try {
00466         while (true) {
00467             if (p == end)
00468                 throw Xapian::InvalidArgumentError("Bad serialised query");
00469             switch (*p++) {
00470                 case '[':
00471                     --p;
00472                     subqs.push_back(readquery());
00473                     break;
00474                 case '(': {
00475                     subqs.push_back(readcompound());
00476                     break;
00477                 }
00478                 case '&':
00479                     return qint_from_vector(Xapian::Query::OP_AND, subqs);
00480                 case '|':
00481                     return qint_from_vector(Xapian::Query::OP_OR, subqs);
00482                 case '%':
00483                     return qint_from_vector(Xapian::Query::OP_FILTER, subqs);
00484                 case '^':
00485                     return qint_from_vector(Xapian::Query::OP_XOR, subqs);
00486                 case '+':
00487                     return qint_from_vector(Xapian::Query::OP_AND_MAYBE, subqs);
00488                 case '-':
00489                     return qint_from_vector(Xapian::Query::OP_AND_NOT, subqs);
00490                 case '~': {
00491                     char *tmp; // avoid compiler warning
00492                     Xapian::termcount window(strtol(p, &tmp, 10));
00493                     p = tmp;
00494                     return qint_from_vector(Xapian::Query::OP_NEAR, subqs, window);
00495                 }
00496                 case '"': {
00497                     char *tmp; // avoid compiler warning
00498                     Xapian::termcount window(strtol(p, &tmp, 10));
00499                     p = tmp;
00500                     return qint_from_vector(Xapian::Query::OP_PHRASE, subqs, window);
00501                 }
00502                 case '*': {
00503                     char *tmp; // avoid compiler warning
00504                     Xapian::termcount elite_set_size(strtol(p, &tmp, 10));
00505                     p = tmp;
00506                     return qint_from_vector(Xapian::Query::OP_ELITE_SET, subqs,
00507                                             elite_set_size);
00508                 }
00509                 case ']': {
00510                     size_t len = decode_length(&p, end, true);
00511                     string start(p, len);
00512                     p += len;
00513                     len = decode_length(&p, end, true);
00514                     string stop(p, len);
00515                     p += len;
00516                     char *tmp; // avoid compiler warning
00517                     Xapian::valueno valno = strtoul(p, &tmp, 10);
00518                     p = tmp;
00519                     return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_RANGE, valno,
00520                                                        start, stop);
00521                 }
00522                 case '}': {
00523                     size_t len = decode_length(&p, end, true);
00524                     string start(p, len);
00525                     p += len;
00526                     char *tmp; // avoid compiler warning
00527                     Xapian::valueno valno = strtoul(p, &tmp, 10);
00528                     p = tmp;
00529                     return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_GE, valno,
00530                                                        start);
00531                 }
00532                 case '{': {
00533                     size_t len = decode_length(&p, end, true);
00534                     string start(p, len);
00535                     p += len;
00536                     char *tmp; // avoid compiler warning
00537                     Xapian::valueno valno = strtoul(p, &tmp, 10);
00538                     p = tmp;
00539                     return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_LE, valno,
00540                                                        start);
00541                 }
00542                 case '.': {
00543                     double param = unserialise_double(&p, end);
00544                     return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
00545                                             subqs, 0, param);
00546                 }
00547                 default:
00548                     DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
00549                     throw Xapian::InvalidArgumentError("Invalid query string");
00550             }
00551         }
00552     } catch (...) {
00553         vector<Xapian::Query::Internal *>::iterator i;
00554         for (i = subqs.begin(); i != subqs.end(); i++)
00555             delete *i;
00556         throw;
00557     }
00558 }
00559 
00560 Xapian::Query::Internal *
00561 Xapian::Query::Internal::unserialise(const string &s)
00562 {
00563     Assert(s.length() > 1);
00564     QUnserial u(s);
00565     Xapian::Query::Internal * qint = u.decode();
00566     AssertEq(s, qint->serialise());
00567     return qint;
00568 }
00569 #else
00570 Xapian::Query::Internal *
00571 Xapian::Query::Internal::unserialise(const string &)
00572 {
00573     throw Xapian::InternalError("query serialisation not compiled in");
00574 }
00575 #endif
00576 
00577 Xapian::Query::Internal::Internal(const Xapian::Query::Internal &copyme)
00578         : Xapian::Internal::RefCntBase(),
00579           op(copyme.op),
00580           subqs(),
00581           parameter(copyme.parameter),
00582           tname(copyme.tname),
00583           str_parameter(copyme.str_parameter),
00584           term_pos(copyme.term_pos),
00585           wqf(copyme.wqf)
00586 {
00587     for (subquery_list::const_iterator i = copyme.subqs.begin();
00588          i != copyme.subqs.end();
00589          ++i) {
00590         subqs.push_back(new Xapian::Query::Internal(**i));
00591     }
00592 }
00593 
00595 // Methods for making new query objects
00596 
00597 Xapian::Query::Internal::Internal(const string & tname_, Xapian::termcount wqf_,
00598                  Xapian::termpos term_pos_)
00599         : op(Xapian::Query::Internal::OP_LEAF),
00600           subqs(),
00601           parameter(0),
00602           tname(tname_),
00603           term_pos(term_pos_),
00604           wqf(wqf_)
00605 {
00606     validate_query();
00607 }
00608 
00609 Xapian::Query::Internal::Internal(op_t op_, Xapian::termcount parameter_)
00610         : op(op_),
00611           subqs(),
00612           parameter(parameter_),
00613           tname(),
00614           term_pos(0),
00615           wqf(0)
00616 {
00617     if (parameter != 0 && op != OP_PHRASE && op != OP_NEAR && op != OP_ELITE_SET)
00618         throw Xapian::InvalidArgumentError("parameter is only meaningful for OP_NEAR, OP_PHRASE, or OP_ELITE_SET");
00619 }
00620 
00621 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00622                                   const string &begin, const string &end)
00623         : op(op_),
00624           parameter(Xapian::termcount(valno)),
00625           tname(begin),
00626           str_parameter(end)
00627 {
00628     if (op != OP_VALUE_RANGE)
00629         throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_RANGE");
00630     validate_query();
00631 }
00632 
00633 Xapian::Query::Internal::Internal(op_t op_, Xapian::valueno valno,
00634                                   const std::string &value)
00635         : op(op_),
00636           parameter(Xapian::termcount(valno)),
00637           tname(value)
00638 {
00639     if (op != OP_VALUE_GE && op != OP_VALUE_LE)
00640         throw Xapian::InvalidArgumentError("This constructor is only meaningful for OP_VALUE_GE or OP_VALUE_LE");
00641     if (op == OP_VALUE_GE && value.empty()) {
00642         // Map '<value> >= ""' to MatchAll.
00643         op = OP_LEAF;
00644         parameter = 0;
00645         term_pos = 0;
00646         wqf = 1;
00647     }
00648     validate_query();
00649 }
00650 
00651 Xapian::Query::Internal::~Internal()
00652 {
00653     subquery_list::iterator i;
00654     for (i = subqs.begin(); i != subqs.end(); i++) {
00655         delete *i;
00656     }
00657 }
00658 
00659 Xapian::Query::Internal *
00660 Xapian::Query::Internal::end_construction()
00661 {
00662     DEBUGCALL(API, void, "Xapian::Query::Internal::end_construction", "");
00663     validate_query();
00664     Xapian::Query::Internal * qint = simplify_query();
00665     if (qint) qint->validate_query();
00666     return qint;
00667 }
00668 
00669 void
00670 Xapian::Query::Internal::validate_query() const
00671 {
00672     DEBUGCALL(API, void, "Xapian::Query::Internal::validate_query", "");
00673 
00674     // Check that the number of subqueries is in acceptable limits for this op
00675     if (subqs.size() < get_min_subqs(op) ||
00676         subqs.size() > get_max_subqs(op)) {
00677         throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) +
00678                 " requires a minimum of " + om_tostring(get_min_subqs(op)) +
00679                 " and a maximum of " + om_tostring(get_max_subqs(op)) +
00680                 " sub queries, had " +
00681                 om_tostring(subqs.size()) + ".");
00682     }
00683 
00684     if (op == OP_SCALE_WEIGHT && get_dbl_parameter() < 0) {
00685         throw Xapian::InvalidArgumentError("Xapian::Query: " + get_op_name(op) + " requires a non-negative parameter.");
00686     }
00687 
00688     // Check that the termname is null in a branch query, unless the op
00689     // is OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE.
00690     Assert(is_leaf(op) ||
00691            op == OP_VALUE_RANGE ||
00692            op == OP_VALUE_GE ||
00693            op == OP_VALUE_LE ||
00694            tname.empty());
00695 }
00696 
00697 bool
00698 Xapian::Query::Internal::simplify_matchnothing()
00699 {
00700     subquery_list::iterator sq;
00701     switch (op) {
00702         case OP_PHRASE:
00703         case OP_NEAR:
00704         case OP_AND:
00705         case OP_FILTER:
00706             // Doing an "AND" type operation - if we've got any MatchNothing
00707             // nodes, we match nothing.
00708             for (sq = subqs.begin(); sq != subqs.end(); sq++) {
00709                 if (*sq == 0) {
00710                     for (sq = subqs.begin(); sq != subqs.end(); sq++)
00711                         delete *sq;
00712                     subqs.clear();
00713                     return true;
00714                 }
00715             }
00716             break;
00717         case OP_ELITE_SET:
00718         case OP_OR:
00719         case OP_XOR:
00720             // Doing an "OR" type operation - if we've got any MatchNothing
00721             // subnodes, drop them; except that we mustn't become an empty
00722             // node due to this, so we never drop a MatchNothing subnode
00723             // if it's the only subnode.
00724             sq = subqs.begin();
00725             while (sq != subqs.end() && subqs.size() > 1) {
00726                 if (*sq == 0) {
00727                     sq = subqs.erase(sq);
00728                 } else {
00729                     ++sq;
00730                 }
00731             }
00732             break;
00733         case OP_AND_MAYBE:
00734         case OP_AND_NOT:
00735             // If left hand side is MatchNothing, we match nothing.
00736             // If right hand side is MatchNothing, replace node with LHS.
00737             // So, if either node is MatchNothing, replace node with LHS.
00738             // Easiest way to do this is to remove the right hand node,
00739             // and let simplify_query() perform the replacement of
00740             // the unary operator with its one remaining child.
00741             Assert(subqs.size() == 2);
00742             if (subqs[0] == 0 || subqs[1] == 0) {
00743                 sq = subqs.begin();
00744                 ++sq;
00745                 delete *sq;
00746                 subqs.erase(sq);
00747             }
00748             break;
00749         case OP_SCALE_WEIGHT:
00750             Assert(subqs.size() == 1);
00751             // We should have already handled OP_SCALE_WEIGHT applied to
00752             // MatchNothing in the relevant constructor.
00753             Assert(subqs[0]);
00754             break;
00755         case OP_LEAF:
00756             // Do nothing.
00757             break;
00758     }
00759     return false;
00760 }
00761 
00762 Xapian::Query::Internal *
00763 Xapian::Query::Internal::simplify_query()
00764 {
00765     DEBUGCALL(API, Xapian::Query::Internal *, "Xapian::Query::Internal::simplify_query", "");
00766 
00767     // Simplify any MatchNothing nodes.
00768     if (simplify_matchnothing()) {
00769         return 0;
00770     }
00771 
00772     // General simplifications, dependent on operator.
00773     switch (op) {
00774         case OP_LEAF:
00775             return this;
00776         case OP_VALUE_RANGE:
00777             // If the start of the range is greater than the end then we won't
00778             // match anything.
00779             if (tname > str_parameter) return 0;
00780             return this;
00781         case OP_VALUE_GE:
00782         case OP_VALUE_LE:
00783             return this;
00784         case OP_SCALE_WEIGHT:
00785             if (fabs(get_dbl_parameter() - 1.0) > DBL_EPSILON) return this;
00786             // If the multiplier is 1, this node doesn't actually do anything,
00787             // so we leave it to be removed.
00788             break;
00789         case OP_PHRASE: case OP_NEAR:
00790             // Empty and single subquery OP_PHRASE and OP_NEAR get handled
00791             // below.
00792             if (subqs.size() <= 1) break;
00793 
00794             // Default to the number of subqueries.
00795             if (!parameter) parameter = subqs.size();
00796 
00797             // Flatten out sub queries of a phrase or near operation.
00798             return flatten_subqs();
00799         case OP_ELITE_SET:
00800             if (!parameter) {
00801                 // Default to sqrt(number of subqueries), or a minimum of 10.
00802                 // Gives a reasonable default.
00803                 if (subqs.size() <= 100) {
00804                     parameter = 10;
00805                 } else {
00806                     parameter = Xapian::termcount(ceil(sqrt(double(subqs.size()))));
00807                     Assert(parameter > 10);
00808                 }
00809             }
00810             break;
00811         case OP_OR: case OP_AND: case OP_XOR:
00812             // Remove duplicates if we can.
00813             if (subqs.size() > 1) collapse_subqs();
00814             break;
00815         default:
00816             break;
00817     }
00818 
00819     // If we have no subqueries, then we're an empty query.
00820     if (subqs.empty())
00821         return 0;
00822 
00823     // Any nodes which are valid with only one subquery can be replaced by
00824     // that solitary subquery.
00825     if (subqs.size() == 1) {
00826         Xapian::Query::Internal * qint = subqs[0];
00827         subqs[0] = 0;
00828         return qint;
00829     }
00830 
00831     return this;
00832 }
00833 
00834 struct SortPosName {
00835     bool operator()(const Xapian::Query::Internal * left,
00836                     const Xapian::Query::Internal * right) const {
00837         Assert(left != 0);
00838         Assert(right != 0);
00839         Assert(is_leaf(left->op));
00840         Assert(is_leaf(right->op));
00841         if (left->term_pos != right->term_pos) {
00842             return left->term_pos < right->term_pos;
00843         } else {
00844             return left->tname < right->tname;
00845         }
00846     }
00847 };
00848 
00852 void
00853 Xapian::Query::Internal::collapse_subqs()
00854 {
00855     Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
00856     typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
00857     subqtable sqtab;
00858 
00859     subquery_list::iterator sq = subqs.begin();
00860     while (sq != subqs.end()) {
00861         Assert(*sq != 0);
00862         if (is_leaf((*sq)->op)) {
00863             Assert((*sq)->subqs.empty());
00864             subqtable::iterator s = sqtab.find(*sq);
00865             if (s == sqtab.end()) {
00866                 sqtab.insert(*sq);
00867                 ++sq;
00868             } else {
00869                 AssertEq((*s)->tname, (*sq)->tname);
00870                 AssertEq((*s)->term_pos, (*sq)->term_pos);
00871                 (*s)->wqf += (*sq)->wqf;
00872                 // Rather than incrementing sq, delete the current
00873                 // element, as it has been merged into the other
00874                 // equivalent term.
00875                 delete *sq;
00876                 sq = subqs.erase(sq);
00877             }
00878         } else {
00879             ++sq;
00880         }
00881     }
00882 }
00883 
00885 Xapian::Query::Internal *
00886 Xapian::Query::Internal::flatten_subqs()
00887 {
00888     Assert(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE);
00889 
00890     subquery_list::iterator sq;
00891     for (sq = subqs.begin(); sq != subqs.end(); ++sq) {
00892         if (!is_leaf((*sq)->op)) break;
00893     }
00894 
00895     if (sq == subqs.end()) return this;
00896 
00897     if ((*sq)->op == Xapian::Query::OP_NEAR ||
00898         (*sq)->op == Xapian::Query::OP_PHRASE) {
00899         // FIXME: A PHRASE (B PHRASE C) -> (A PHRASE B) AND (B PHRASE C)?
00900         throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE");
00901     }
00902 
00903     AutoPtr<Xapian::Query::Internal> flattenme(*sq);
00904     *sq = 0;
00905 
00906     subquery_list::iterator j;
00907     for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) {
00908         *sq = *j;
00909         *j = 0;
00910         AutoPtr<Xapian::Query::Internal> newq(new Xapian::Query::Internal(*this));
00911         delete *sq;
00912         *sq = 0;
00913         newq = newq->flatten_subqs();
00914         *j = newq.release();
00915     }
00916 
00917     if (flattenme->op == OP_AND ||
00918         flattenme->op == OP_OR ||
00919         flattenme->op == OP_XOR) {
00920         size_t i = flattenme->subqs.size();
00921         do {
00922             --i;
00923             Xapian::Query::Internal * q = flattenme->subqs[i];
00924             if (flattenme->op == q->op) {
00925                 subquery_list::iterator k;
00926                 for (k = q->subqs.begin(), ++k;
00927                      k != q->subqs.end();
00928                      ++k) {
00929                     flattenme->subqs.push_back(0);
00930                     flattenme->subqs.back() = *k;
00931                     *k = 0;
00932                 }
00933                 flattenme->subqs[i] = q->subqs[0];
00934                 q->subqs.clear();
00935                 delete q;
00936             }
00937         } while (i != 0);
00938     }
00939 
00940     return flattenme.release();
00941 }
00942 
00943 void
00944 Xapian::Query::Internal::add_subquery(const Xapian::Query::Internal * subq)
00945 {
00946     Assert(!is_leaf(op));
00947     if (subq == 0) {
00948         subqs.push_back(0);
00949     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
00950         // Distribute the subquery.
00951         for (subquery_list::const_iterator i = subq->subqs.begin();
00952              i != subq->subqs.end(); i++) {
00953             add_subquery(*i);
00954         }
00955     } else {
00956         subqs.push_back(new Xapian::Query::Internal(*subq));
00957     }
00958 }
00959 
00960 void
00961 Xapian::Query::Internal::add_subquery_nocopy(Xapian::Query::Internal * subq)
00962 {
00963     Assert(!is_leaf(op));
00964     if (subq == 0) {
00965         subqs.push_back(0);
00966     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
00967         // Distribute the subquery.
00968         for (subquery_list::const_iterator i = subq->subqs.begin();
00969              i != subq->subqs.end(); i++) {
00970             add_subquery(*i);
00971         }
00972         delete subq;
00973     } else {
00974         subqs.push_back(subq);
00975     }
00976 }
00977 
00978 void
00979 Xapian::Query::Internal::set_dbl_parameter(double dbl_parameter_)
00980 {
00981     // We store the double parameter encoded as a string because
00982     // Xapian::Query::Internal is defined in an external API header and we want
00983     // to avoid any risk of ABI breakage (we suspect it would be OK, but it's
00984     // not risking).  FIXME: rework for 1.1.0
00985     str_parameter = serialise_double(dbl_parameter_);
00986 }
00987 
00988 double
00989 Xapian::Query::Internal::get_dbl_parameter() const
00990 {
00991     // We store the double parameter encoded as a string because
00992     // Xapian::Query::Internal is defined in an external API header and we want
00993     // to avoid any risk of ABI breakage (we suspect it would be OK, but it's
00994     // not risking).  FIXME: rework for 1.1.0
00995     const char * p = str_parameter.data();
00996     const char * end = p + str_parameter.size();
00997     return unserialise_double(&p, end);
00998 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.