queryparser/queryparser_internal.cc

Go to the documentation of this file.
00001 /* Driver template for the LEMON parser generator.
00002 ** The author disclaims copyright to this source code.
00003 */
00004 /* First off, code is included which follows the "include" declaration
00005 ** in the input file. */
00006 #line 1 "queryparser/queryparser.lemony"
00007 
00008 /* queryparser.lemony: build a Xapian::Query object from a user query string.
00009  *
00010  * Copyright (C) 2004,2005,2006,2007,2008,2009,2010 Olly Betts
00011  *
00012  * This program is free software; you can redistribute it and/or
00013  * modify it under the terms of the GNU General Public License as
00014  * published by the Free Software Foundation; either version 2 of the
00015  * License, or (at your option) any later version.
00016  *
00017  * This program is distributed in the hope that it will be useful,
00018  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  * GNU General Public License for more details.
00021  *
00022  * You should have received a copy of the GNU General Public License
00023  * along with this program; if not, write to the Free Software
00024  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00025  * USA
00026  */
00027 
00028 #include <config.h>
00029 
00030 #include "omassert.h"
00031 #include "queryparser_internal.h"
00032 #include <xapian/error.h>
00033 #include <xapian/unicode.h>
00034 #include "stringutils.h"
00035 
00036 // Include the list of token values lemon generates.
00037 #include "queryparser_token.h"
00038 
00039 #include <algorithm>
00040 #include <list>
00041 #include <string>
00042 
00043 #include <string.h>
00044 
00045 using namespace std;
00046 
00047 using namespace Xapian;
00048 
00049 inline bool
00050 U_isupper(unsigned ch) {
00051     return (ch < 128 && C_isupper((unsigned char)ch));
00052 }
00053 
00054 inline bool
00055 U_isdigit(unsigned ch) {
00056     return (ch < 128 && C_isdigit((unsigned char)ch));
00057 }
00058 
00059 inline bool
00060 U_isalpha(unsigned ch) {
00061     return (ch < 128 && C_isalpha((unsigned char)ch));
00062 }
00063 
00064 using Xapian::Unicode::is_whitespace;
00065 
00066 inline bool
00067 is_not_whitespace(unsigned ch) {
00068     return !is_whitespace(ch);
00069 }
00070 
00071 using Xapian::Unicode::is_wordchar;
00072 
00073 inline bool
00074 is_not_wordchar(unsigned ch) {
00075     return !is_wordchar(ch);
00076 }
00077 
00078 inline bool
00079 is_digit(unsigned ch) {
00080     return (Unicode::get_category(ch) == Unicode::DECIMAL_DIGIT_NUMBER);
00081 }
00082 
00083 // FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
00084 // and there's the risk of hyphens getting stuck onto the end of terms...
00085 inline bool
00086 is_suffix(unsigned ch) {
00087     return ch == '+' || ch == '#';
00088 }
00089 
00090 inline bool
00091 prefix_needs_colon(const string & prefix, unsigned ch)
00092 {
00093     if (!U_isupper(ch)) return false;
00094     string::size_type len = prefix.length();
00095     return (len > 1 && prefix[len - 1] != ':');
00096 }
00097 
00098 using Unicode::is_currency;
00099 
00101 struct filter_group_id {
00105     list<string> prefixes;
00106 
00110     Xapian::valueno valno;
00111 
00113     explicit filter_group_id(const list<string> & prefixes_)
00114         : prefixes(prefixes_), valno(Xapian::BAD_VALUENO) {}
00115 
00117     explicit filter_group_id(Xapian::valueno valno_)
00118         : prefixes(), valno(valno_) {}
00119 
00121     bool operator<(const filter_group_id & other) const {
00122         if (prefixes != other.prefixes) {
00123             return prefixes < other.prefixes;
00124         }
00125         return valno < other.valno;
00126     }
00127 };
00128 
00135 class Term {
00136     State * state;
00137 
00138   public:
00139     string name;
00140     list<string> prefixes;
00141     string unstemmed;
00142     QueryParser::stem_strategy stem;
00143     termpos pos;
00144 
00145     Term(const string &name_, termpos pos_) : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
00146     Term(const string &name_) : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
00147     Term(const string &name_, const list<string> &prefixes_)
00148         : name(name_), prefixes(prefixes_), stem(QueryParser::STEM_NONE), pos(0) { }
00149     Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
00150     Term(State * state_, const string &name_, const list<string> &prefixes_,
00151          const string &unstemmed_,
00152          QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
00153          termpos pos_ = 0)
00154         : state(state_), name(name_), prefixes(prefixes_), unstemmed(unstemmed_),
00155           stem(stem_), pos(pos_) { }
00156 
00157     string make_term(const string & prefix) const;
00158 
00159     void need_positions() {
00160         if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
00161     }
00162 
00163     termpos get_termpos() const { return pos; }
00164 
00165     filter_group_id get_filter_group_id() const { return filter_group_id(prefixes); }
00166 
00167     Query * as_wildcarded_query(State * state) const;
00168 
00177     Query * as_partial_query(State * state_) const;
00178 
00179     Query get_query() const;
00180 
00181     Query get_query_with_synonyms() const;
00182 
00183     Query get_query_with_auto_synonyms() const;
00184 };
00185 
00187 class State {
00188     QueryParser::Internal * qpi;
00189 
00190   public:
00191     Query query;
00192     const char * error;
00193     unsigned flags;
00194 
00195     State(QueryParser::Internal * qpi_, unsigned flags_)
00196         : qpi(qpi_), error(NULL), flags(flags_) { }
00197 
00198     string stem_term(const string &term) {
00199         return qpi->stemmer(term);
00200     }
00201 
00202     void add_to_stoplist(const Term * term) {
00203         qpi->stoplist.push_back(term->name);
00204     }
00205 
00206     void add_to_unstem(const string & term, const string & unstemmed) {
00207         qpi->unstem.insert(make_pair(term, unstemmed));
00208     }
00209 
00210     valueno value_range(Query & q, Term *a, Term *b) {
00211         list<ValueRangeProcessor *>::const_iterator i;
00212         for (i = qpi->valrangeprocs.begin(); i != qpi->valrangeprocs.end(); ++i) {
00213             string start = a->name;
00214             string end = b->name;
00215             Xapian::valueno valno = (**i)(start, end);
00216             if (valno != Xapian::BAD_VALUENO) {
00217                 delete a;
00218                 delete b;
00219                 q = Query(Query::OP_VALUE_RANGE, valno, start, end);
00220                 return valno;
00221             }
00222         }
00223         // FIXME: Do we want to report an error for this?  If not we need
00224         // to perform the above check in the tokeniser and if none of the
00225         // ValueRangeProcessor classes like the range, we rollback to
00226         // parsing the query without treating this as a range.  Needs
00227         // more thought and probably a look at queries users actually
00228         // enter.
00229         error = "Unknown range operation";
00230         return Xapian::BAD_VALUENO;
00231     }
00232 
00233     Query::op default_op() const { return qpi->default_op; }
00234 
00235     bool is_stopword(const Term *term) const {
00236         return qpi->stopper && (*qpi->stopper)(term->name);
00237     }
00238 
00239     Database get_database() const {
00240         return qpi->db;
00241     }
00242 };
00243 
00244 string
00245 Term::make_term(const string & prefix) const
00246 {
00247     string term;
00248     if (stem == QueryParser::STEM_SOME) term += 'Z';
00249     if (!prefix.empty()) {
00250         term += prefix;
00251         if (prefix_needs_colon(prefix, name[0])) term += ':';
00252     }
00253     if (stem != QueryParser::STEM_NONE) {
00254         term += state->stem_term(name);
00255     } else {
00256         term += name;
00257     }
00258 
00259     if (!unstemmed.empty())
00260         state->add_to_unstem(term, unstemmed);
00261     return term;
00262 }
00263 
00264 Query
00265 Term::get_query_with_synonyms() const
00266 {
00267     Query q = get_query();
00268 
00269     // Handle single-word synonyms with each prefix.
00270     list<string>::const_iterator piter;
00271     for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
00272         // First try the unstemmed term:
00273         string term;
00274         if (!piter->empty()) {
00275             term += *piter;
00276             if (prefix_needs_colon(*piter, name[0])) term += ':';
00277         }
00278         term += name;
00279 
00280         Xapian::Database db = state->get_database();
00281         Xapian::TermIterator syn = db.synonyms_begin(term);
00282         Xapian::TermIterator end = db.synonyms_end(term);
00283         if (syn == end && stem != QueryParser::STEM_NONE) {
00284             // If that has no synonyms, try the stemmed form:
00285             term = 'Z';
00286             if (!piter->empty()) {
00287                 term += *piter;
00288                 if (prefix_needs_colon(*piter, name[0])) term += ':';
00289             }
00290             term += state->stem_term(name);
00291             syn = db.synonyms_begin(term);
00292             end = db.synonyms_end(term);
00293         }
00294         while (syn != end) {
00295             q = Query(Query::OP_OR, q, Query(*syn, 1, pos));
00296             ++syn;
00297         }
00298     }
00299     return q;
00300 }
00301 
00302 Query
00303 Term::get_query_with_auto_synonyms() const
00304 {
00305     if (state->flags & QueryParser::FLAG_AUTO_SYNONYMS)
00306         return get_query_with_synonyms();
00307 
00308     return get_query();
00309 }
00310 
00311 static void
00312 add_to_query(Query *& q, Query::op op, Query * term)
00313 {
00314     Assert(term);
00315     if (q) {
00316         *q = Query(op, *q, *term);
00317         delete term;
00318     } else {
00319         q = term;
00320     }
00321 }
00322 
00323 static void
00324 add_to_query(Query *& q, Query::op op, const Query & term)
00325 {
00326     if (q) {
00327         *q = Query(op, *q, term);
00328     } else {
00329         q = new Query(term);
00330     }
00331 }
00332 
00333 Query
00334 Term::get_query() const
00335 {
00336     Assert(prefixes.size() >= 1);
00337     list<string>::const_iterator piter = prefixes.begin();
00338     Query q(make_term(*piter), 1, pos);
00339     while (++piter != prefixes.end()) {
00340         q = Query(Query::OP_OR, q, Query(make_term(*piter), 1, pos));
00341     }
00342     return q;
00343 }
00344 
00345 Query *
00346 Term::as_wildcarded_query(State * state_) const
00347 {
00348     Database db = state_->get_database();
00349     vector<Query> subqs;
00350     list<string>::const_iterator piter;
00351     for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
00352         string root = *piter;
00353         root += name;
00354         TermIterator t = db.allterms_begin(root);
00355         while (t != db.allterms_end(root)) {
00356             subqs.push_back(Query(*t, 1, pos));
00357             ++t;
00358         }
00359     }
00360     delete this;
00361     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
00362 }
00363 
00364 Query *
00365 Term::as_partial_query(State * state_) const
00366 {
00367     Database db = state_->get_database();
00368     vector<Query> subqs;
00369     list<string>::const_iterator piter;
00370     for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
00371         string root = *piter;
00372         root += name;
00373         TermIterator t = db.allterms_begin(root);
00374         while (t != db.allterms_end(root)) {
00375             subqs.push_back(Query(*t, 1, pos));
00376             ++t;
00377         }
00378         // Add the term, as it would normally be handled, as an alternative.
00379         subqs.push_back(Query(make_term(*piter), 1, pos));
00380     }
00381     delete this;
00382     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
00383 }
00384 
00385 inline bool
00386 is_phrase_generator(unsigned ch)
00387 {
00388     // These characters generate a phrase search.
00389     if (rare(ch == 0)) return false;
00390     if (ch < 128) {
00391         // Ordered mostly by frequency of calls to this function done when
00392         // running queryparsertest.
00393         return (strchr(".-/:\\@", ch) != NULL);
00394     }
00395     switch (Unicode::get_category(ch)) {
00396         case Xapian::Unicode::NON_SPACING_MARK:
00397         case Xapian::Unicode::ENCLOSING_MARK:
00398         case Xapian::Unicode::COMBINING_SPACING_MARK:
00399             return true;
00400         case Xapian::Unicode::FORMAT:
00401             return (ch >= 0x200b &&
00402                     (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff));
00403         default:
00404             return false;
00405     }
00406 }
00407 
00408 inline bool
00409 is_stem_preventer(unsigned ch)
00410 {
00411     return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
00412 }
00413 
00414 inline bool
00415 should_stem(const string & term)
00416 {
00417     const unsigned int SHOULD_STEM_MASK =
00418         (1 << Unicode::LOWERCASE_LETTER) |
00419         (1 << Unicode::TITLECASE_LETTER) |
00420         (1 << Unicode::MODIFIER_LETTER) |
00421         (1 << Unicode::OTHER_LETTER);
00422     Utf8Iterator u(term);
00423     return ((SHOULD_STEM_MASK >> Unicode::get_category(*u)) & 1);
00424 }
00425 
00426 inline unsigned check_infix(unsigned ch) {
00427     if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
00428         // Unicode includes all these except '&' in its word boundary rules,
00429         // as well as 0x2019 (which we handle below) and ':' (for Swedish
00430         // apparently, but we ignore this for now as it's problematic in
00431         // real world cases).
00432         return ch;
00433     }
00434     // 0x2019 is Unicode apostrophe and single closing quote.
00435     // 0x201b is Unicode single opening quote with the tail rising.
00436     if (ch == 0x2019 || ch == 0x201b) return '\'';
00437     return 0;
00438 }
00439 
00440 inline unsigned check_infix_digit(unsigned ch) {
00441     // This list of characters comes from Unicode's word identifying algorithm.
00442     switch (ch) {
00443         case ',':
00444         case '.':
00445         case ';':
00446         case 0x037e: // GREEK QUESTION MARK
00447         case 0x0589: // ARMENIAN FULL STOP
00448         case 0x060D: // ARABIC DATE SEPARATOR
00449         case 0x07F8: // NKO COMMA
00450         case 0x2044: // FRACTION SLASH
00451         case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
00452         case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
00453         case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
00454             return ch;
00455     }
00456     return 0;
00457 }
00458 
00459 struct yyParser;
00460 
00461 // Prototype the functions lemon generates.
00462 static yyParser *ParseAlloc();
00463 static void ParseFree(yyParser *);
00464 static void Parse(yyParser *, int, Term *, State *);
00465 
00466 void
00467 QueryParser::Internal::add_prefix(const string &field, const string &prefix,
00468                                   bool filter)
00469 {
00470     map<string, PrefixInfo>::iterator p = prefixmap.find(field);
00471     if (p == prefixmap.end()) {
00472        prefixmap.insert(make_pair(field, PrefixInfo(filter, prefix)));
00473     } else {
00474        // Check that this is the same type of filter as the existing one(s).
00475        if (p->second.filter != filter) {
00476            throw Xapian::InvalidOperationError("Can't use add_prefix() and add_bool_prefix() on the same field name");
00477        }
00478        p->second.prefixes.push_back(prefix);
00479     }
00480 }
00481 
00482 string
00483 QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end,
00484                                   bool &was_acronym)
00485 {
00486     string term;
00487     // Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
00488     // Don't worry if there's a trailing '.' or not.
00489     if (U_isupper(*it)) {
00490         string t;
00491         Utf8Iterator p = it;
00492         do {
00493             Unicode::append_utf8(t, *p++);
00494         } while (p != end && *p == '.' && ++p != end && U_isupper(*p));
00495         // One letter does not make an acronym!  If we handled a single
00496         // uppercase letter here, we wouldn't catch M&S below.
00497         if (t.length() > 1) {
00498             // Check there's not a (lower case) letter or digit
00499             // immediately after it.
00500             // FIXME: should I.B.M..P.T.O be a range search?
00501             if (p == end || !is_wordchar(*p)) {
00502                 it = p;
00503                 swap(term, t);
00504             }
00505         }
00506     }
00507     was_acronym = !term.empty();
00508 
00509     if (term.empty()) {
00510         unsigned prevch = *it;
00511         Unicode::append_utf8(term, prevch);
00512         while (++it != end) {
00513             unsigned ch = *it;
00514             if (!is_wordchar(ch)) {
00515                 // Treat a single embedded '&' or "'" or similar as a word
00516                 // character (e.g. AT&T, Fred's).  Also, normalise
00517                 // apostrophes to ASCII apostrophe.
00518                 Utf8Iterator p = it;
00519                 ++p;
00520                 if (p == end || !is_wordchar(*p)) break;
00521                 unsigned nextch = *p;
00522                 if (is_digit(prevch) &&
00523                     is_digit(nextch)) {
00524                     ch = check_infix_digit(ch);
00525                 } else {
00526                     ch = check_infix(ch);
00527                 }
00528                 if (!ch) break;
00529             }
00530             Unicode::append_utf8(term, ch);
00531             prevch = ch;
00532         }
00533         if (it != end && is_suffix(*it)) {
00534             string suff_term = term;
00535             Utf8Iterator p = it;
00536             // Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
00537             do {
00538                 if (suff_term.size() - term.size() == 3) {
00539                     suff_term.resize(0);
00540                     break;
00541                 }
00542                 suff_term += *p;
00543             } while (is_suffix(*++p));
00544             if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
00545                 // If the suffixed term doesn't exist, check that the
00546                 // non-suffixed term does.  This also takes care of
00547                 // the case when QueryParser::set_database() hasn't
00548                 // been called.
00549                 bool use_suff_term = false;
00550                 string lc = Unicode::tolower(suff_term);
00551                 if (db.term_exists(lc)) {
00552                     use_suff_term = true;
00553                 } else {
00554                     lc = Unicode::tolower(term);
00555                     if (!db.term_exists(lc)) use_suff_term = true;
00556                 }
00557                 if (use_suff_term) {
00558                     term = suff_term;
00559                     it = p;
00560                 }
00561             }
00562         }
00563     }
00564     return term;
00565 }
00566 
00567 class ParserHandler {
00568     yyParser * parser;
00569 
00570   public:
00571     explicit ParserHandler(yyParser * parser_) : parser(parser_) { }
00572     operator yyParser*() { return parser; }
00573     ~ParserHandler() { ParseFree(parser); }
00574 };
00575 
00576 Query
00577 QueryParser::Internal::parse_query(const string &qs, unsigned flags,
00578                                    const string &default_prefix)
00579 {
00580     // Set value_ranges if we may have to handle value ranges in the query.
00581     bool value_ranges;
00582     value_ranges = !valrangeprocs.empty() && (qs.find("..") != string::npos);
00583 
00584     termpos term_pos = 1;
00585     Utf8Iterator it(qs), end;
00586 
00587     State state(this, flags);
00588 
00589     // To successfully apply more than one spelling correction to a query
00590     // string, we must keep track of the offset due to previous corrections.
00591     int correction_offset = 0;
00592     corrected_query.resize(0);
00593 
00594     // Stack of prefixes, used for phrases and subexpressions.
00595     list<const PrefixInfo *> prefix_stack;
00596 
00597     // If default_prefix is specified, use it.  Otherwise, use any list
00598     // that has been set for the empty prefix.
00599     const PrefixInfo def_pfx(false, default_prefix);
00600     {
00601         const PrefixInfo * default_prefixinfo = &def_pfx;
00602         if (default_prefix.empty()) {
00603             map<string, PrefixInfo>::const_iterator f = prefixmap.find("");
00604             if (f != prefixmap.end()) default_prefixinfo = &(f->second);
00605         }
00606 
00607         // We always have the current prefix on the top of the stack.
00608         prefix_stack.push_back(default_prefixinfo);
00609     }
00610 
00611     ParserHandler pParser(ParseAlloc());
00612 
00613     unsigned newprev = ' ';
00614 main_lex_loop:
00615     enum {
00616         DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP,
00617         EXPLICIT_SYNONYM
00618     } mode = DEFAULT;
00619     while (it != end && !state.error) {
00620         bool last_was_operator = false;
00621         if (mode == EXPLICIT_SYNONYM) mode = DEFAULT;
00622         if (false) {
00623 just_had_operator:
00624             if (it == end) break;
00625             mode = DEFAULT;
00626 just_had_synonym_operator:
00627             last_was_operator = true;
00628         }
00629         if (mode == IN_PHRASED_TERM) mode = DEFAULT;
00630         if (is_whitespace(*it)) {
00631             newprev = ' ';
00632             ++it;
00633             it = find_if(it, end, is_not_whitespace);
00634             if (it == end) break;
00635         }
00636 
00637         if ((mode == DEFAULT || mode == IN_GROUP) && value_ranges) {
00638             // Scan forward to see if this could be the "start of range"
00639             // token.  Sadly this has O(n^2) tendencies, though at least
00640             // "n" is the number of words in a query which is likely to
00641             // remain fairly small.  FIXME: can we tokenise more elegantly?
00642             Utf8Iterator p = it;
00643             unsigned ch = 0;
00644             while (p != end) {
00645                 if (ch == '.' && *p == '.') {
00646                     ++p;
00647                     if (p == end || *p <= ' ' || *p == ')') break;
00648 
00649                     string r;
00650                     do {
00651                         Unicode::append_utf8(r, *it++);
00652                     } while (it != p);
00653                     // Trim off the trailing "..".
00654                     r.resize(r.size() - 2);
00655                     Parse(pParser, RANGE_START, new Term(r), &state);
00656                     r.resize(0);
00657                     // Allow any character except whitespace and ')' in a
00658                     // RANGE_END.  Or should we be consistent with RANGE_START?
00659                     do {
00660                         Unicode::append_utf8(r, *p++);
00661                     } while (p != end && *p > ' ' && *p != ')');
00662                     Parse(pParser, RANGE_END, new Term(r), &state);
00663                     it = p;
00664                     goto main_lex_loop;
00665                 }
00666                 ch = *p;
00667                 if (!(is_wordchar(ch) || is_currency(ch) ||
00668                       (ch < 128 && strchr("%,-./:@", ch)))) break;
00669                 ++p;
00670             }
00671         }
00672 
00673         if (!is_wordchar(*it)) {
00674             unsigned prev = newprev;
00675             unsigned ch = *it++;
00676             newprev = ch;
00677             // Drop out of IN_GROUP mode.
00678             if (mode == IN_GROUP) mode = DEFAULT;
00679             switch (ch) {
00680               case '"': // Quoted phrase.
00681                 if (mode == DEFAULT) {
00682                     // Skip whitespace.
00683                     it = find_if(it, end, is_not_whitespace);
00684                     if (it == end) {
00685                         // Ignore an unmatched " at the end of the query to
00686                         // avoid generating an empty pair of QUOTEs which will
00687                         // cause a parse error.
00688                         goto done;
00689                     }
00690                     if (*it == '"') {
00691                         // Ignore empty "" (but only if we're not already
00692                         // IN_QUOTES as we don't merge two adjacent quoted
00693                         // phrases!)
00694                         newprev = *it++;
00695                         break;
00696                     }
00697                 }
00698                 if (flags & QueryParser::FLAG_PHRASE) {
00699                     Parse(pParser, QUOTE, NULL, &state);
00700                     if (mode == DEFAULT) {
00701                         mode = IN_QUOTES;
00702                     } else {
00703                         // Remove the prefix we pushed for this phrase.
00704                         if (mode == IN_PREFIXED_QUOTES)
00705                             prefix_stack.pop_back();
00706                         mode = DEFAULT;
00707                     }
00708                 }
00709                 break;
00710 
00711               case '+': case '-': // Loved or hated term/phrase/subexpression.
00712                 // Ignore + or - at the end of the query string.
00713                 if (it == end) goto done;
00714                 if (prev > ' ' && prev != '(') {
00715                     // Or if not after whitespace or an open bracket.
00716                     break;
00717                 }
00718                 if (is_whitespace(*it) || *it == '+' || *it == '-') {
00719                     // Ignore + or - followed by a space, or further + or -.
00720                     // Postfix + (such as in C++ and H+) is handled as part of
00721                     // the term lexing code in parse_term().
00722                     newprev = *it++;
00723                     break;
00724                 }
00725                 if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
00726                     int token;
00727                     if (ch == '+') {
00728                         token = LOVE;
00729                     } else if (last_was_operator) {
00730                         token = HATE_AFTER_AND;
00731                     } else {
00732                         token = HATE;
00733                     }
00734                     Parse(pParser, token, NULL, &state);
00735                     goto just_had_operator;
00736                 }
00737                 // Need to prevent the term after a LOVE or HATE starting a
00738                 // term group...
00739                 break;
00740 
00741               case '(': // Bracketed subexpression.
00742                 // Skip whitespace.
00743                 it = find_if(it, end, is_not_whitespace);
00744                 // Ignore ( at the end of the query string.
00745                 if (it == end) goto done;
00746                 if (prev > ' ' && strchr("()+-", prev) == NULL) {
00747                     // Or if not after whitespace or a bracket or '+' or '-'.
00748                     break;
00749                 }
00750                 if (*it == ')') {
00751                     // Ignore empty ().
00752                     newprev = *it++;
00753                     break;
00754                 }
00755                 if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
00756                     prefix_stack.push_back(prefix_stack.back());
00757                     Parse(pParser, BRA, NULL, &state);
00758                 }
00759                 break;
00760 
00761               case ')': // End of bracketed subexpression.
00762                 if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
00763                     // Remove the prefix we pushed for the corresponding BRA.
00764                     // If brackets are unmatched, it's a syntax error, but
00765                     // that's no excuse to SEGV!
00766                     if (prefix_stack.size() > 1) prefix_stack.pop_back();
00767                     Parse(pParser, KET, NULL, &state);
00768                 }
00769                 break;
00770 
00771               case '~': // Synonym expansion.
00772                 // Ignore at the end of the query string.
00773                 if (it == end) goto done;
00774                 if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
00775                     if (prev > ' ' && strchr("+-(", prev) == NULL) {
00776                         // Or if not after whitespace, +, -, or an open bracket.
00777                         break;
00778                     }
00779                     if (!is_wordchar(*it)) {
00780                         // Ignore if not followed by a word character.
00781                         break;
00782                     }
00783                     Parse(pParser, SYNONYM, NULL, &state);
00784                     mode = EXPLICIT_SYNONYM;
00785                     goto just_had_synonym_operator;
00786                 }
00787                 break;
00788             }
00789             // Skip any other characters.
00790             continue;
00791         }
00792 
00793         Assert(is_wordchar(*it));
00794 
00795         size_t term_start_index = it.raw() - qs.data();
00796 
00797         newprev = 'A'; // Any letter will do...
00798 
00799         // A term, a prefix, or a boolean operator.
00800         const PrefixInfo * prefixinfo = NULL;
00801         if ((mode == DEFAULT || mode == IN_GROUP || mode == EXPLICIT_SYNONYM) &&
00802             !prefixmap.empty()) {
00803             // Check for a fieldname prefix (e.g. title:historical).
00804             Utf8Iterator p = find_if(it, end, is_not_wordchar);
00805             if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
00806                 string field;
00807                 p = it;
00808                 while (*p != ':')
00809                     Unicode::append_utf8(field, *p++);
00810                 map<string, PrefixInfo>::const_iterator f;
00811                 f = prefixmap.find(field);
00812                 if (f != prefixmap.end()) {
00813                     // Special handling for prefixed fields, depending on the
00814                     // type of the prefix.
00815                     unsigned ch = *++p;
00816                     prefixinfo = &(f->second);
00817 
00818                     if (prefixinfo->filter) {
00819                         // Drop out of IN_GROUP if we're in it.
00820                         if (mode == IN_GROUP)
00821                             mode = DEFAULT;
00822                         it = p;
00823                         string name;
00824                         if (it != end && *it == '"') {
00825                             // Quoted boolean term (can contain any character).
00826                             ++it;
00827                             while (it != end) {
00828                                 if (*it == '"') {
00829                                     // Interpret "" as an escaped ".
00830                                     if (++it == end || *it != '"')
00831                                         break;
00832                                 }
00833                                 Unicode::append_utf8(name, *it++);
00834                             }
00835                         } else {
00836                             // Can't boolean filter prefix a subexpression, so
00837                             // just use anything following the prefix until the
00838                             // next space or ')' as part of the boolean filter
00839                             // term.
00840                             while (it != end && *it > ' ' && *it != ')')
00841                                 Unicode::append_utf8(name, *it++);
00842                         }
00843                         // Build the unstemmed form in field.
00844                         field += ':';
00845                         field += name;
00846                         const list<string> & prefixes = prefixinfo->prefixes;
00847                         Term * token = new Term(&state, name, prefixes, field);
00848                         Parse(pParser, BOOLEAN_FILTER, token, &state);
00849                         continue;
00850                     }
00851 
00852                     if (ch == '"' && (flags & FLAG_PHRASE)) {
00853                         // Prefixed phrase, e.g.: subject:"space flight"
00854                         mode = IN_PREFIXED_QUOTES;
00855                         Parse(pParser, QUOTE, NULL, &state);
00856                         it = p;
00857                         newprev = ch;
00858                         ++it;
00859                         prefix_stack.push_back(prefixinfo);
00860                         continue;
00861                     }
00862 
00863                     if (ch == '(' && (flags & FLAG_BOOLEAN)) {
00864                         // Prefixed subexpression, e.g.: title:(fast NEAR food)
00865                         mode = DEFAULT;
00866                         Parse(pParser, BRA, NULL, &state);
00867                         it = p;
00868                         newprev = ch;
00869                         ++it;
00870                         prefix_stack.push_back(prefixinfo);
00871                         continue;
00872                     }
00873 
00874                     if (is_wordchar(ch)) {
00875                         // Prefixed term.
00876                         it = p;
00877                     } else {
00878                         // It looks like a prefix but isn't, so parse it as
00879                         // text instead.
00880                         prefixinfo = NULL;
00881                     }
00882                 }
00883             }
00884         }
00885 
00886 phrased_term:
00887         bool was_acronym;
00888         string term = parse_term(it, end, was_acronym);
00889 
00890         // Boolean operators.
00891         if ((mode == DEFAULT || mode == IN_GROUP) &&
00892             (flags & FLAG_BOOLEAN) &&
00893             // Don't want to interpret A.N.D. as an AND operator.
00894             !was_acronym &&
00895             !prefixinfo &&
00896             term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {
00897 
00898             string op = term;
00899             if (flags & FLAG_BOOLEAN_ANY_CASE) {
00900                 for (string::iterator i = op.begin(); i != op.end(); ++i) {
00901                     *i = C_toupper(*i);
00902                 }
00903             }
00904             if (op.size() == 3) {
00905                 if (op == "AND") {
00906                     Parse(pParser, AND, NULL, &state);
00907                     goto just_had_operator;
00908                 }
00909                 if (op == "NOT") {
00910                     Parse(pParser, NOT, NULL, &state);
00911                     goto just_had_operator;
00912                 }
00913                 if (op == "XOR") {
00914                     Parse(pParser, XOR, NULL, &state);
00915                     goto just_had_operator;
00916                 }
00917                 if (op == "ADJ") {
00918                     if (it != end && *it == '/') {
00919                         size_t width = 0;
00920                         Utf8Iterator p = it;
00921                         while (++p != end && U_isdigit(*p)) {
00922                             width = (width * 10) + (*p - '0');
00923                         }
00924                         if (width && (p == end || is_whitespace(*p))) {
00925                             it = p;
00926                             Parse(pParser, ADJ, new Term(width), &state);
00927                             goto just_had_operator;
00928                         }
00929                     }
00930 
00931                     Parse(pParser, ADJ, NULL, &state);
00932                     goto just_had_operator;
00933                 }
00934             } else if (op.size() == 2) {
00935                 if (op == "OR") {
00936                     Parse(pParser, OR, NULL, &state);
00937                     goto just_had_operator;
00938                 }
00939             } else if (op.size() == 4) {
00940                 if (op == "NEAR") {
00941                     if (it != end && *it == '/') {
00942                         size_t width = 0;
00943                         Utf8Iterator p = it;
00944                         while (++p != end && U_isdigit(*p)) {
00945                             width = (width * 10) + (*p - '0');
00946                         }
00947                         if (width && (p == end || is_whitespace(*p))) {
00948                             it = p;
00949                             Parse(pParser, NEAR, new Term(width), &state);
00950                             goto just_had_operator;
00951                         }
00952                     }
00953 
00954                     Parse(pParser, NEAR, NULL, &state);
00955                     goto just_had_operator;
00956                 }
00957             }
00958         }
00959 
00960         // If no prefix is set, use the default one.
00961         if (!prefixinfo) prefixinfo = prefix_stack.back();
00962 
00963         Assert(!prefixinfo->filter);
00964 
00965         {
00966             string unstemmed_term(term);
00967             term = Unicode::tolower(term);
00968 
00969             // Reuse stem_strategy - STEM_SOME here means "stem terms except
00970             // when used with positional operators".
00971             stem_strategy stem_term = stem_action;
00972             if (stem_term != STEM_NONE) {
00973                 if (!stemmer.internal.get()) {
00974                     // No stemmer is set.
00975                     stem_term = STEM_NONE;
00976                 } else if (stem_term == STEM_SOME) {
00977                     if (!should_stem(unstemmed_term) ||
00978                         (it != end && is_stem_preventer(*it))) {
00979                         // Don't stem this particular term.
00980                         stem_term = STEM_NONE;
00981                     }
00982                 }
00983             }
00984 
00985             Term * term_obj = new Term(&state, term, prefixinfo->prefixes,
00986                                        unstemmed_term, stem_term, term_pos++);
00987 
00988             // Check spelling, if we're a normal term, and any of the prefixes
00989             // are empty.
00990             if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
00991                 list<string>::const_iterator prefixiter;
00992                 for (prefixiter = prefixinfo->prefixes.begin();
00993                      prefixiter != prefixinfo->prefixes.end();
00994                      ++prefixiter) {
00995                     if (!prefixiter->empty())
00996                         continue;
00997                     if (!db.term_exists(term)) {
00998                         string suggestion = db.get_spelling_suggestion(term);
00999                         if (!suggestion.empty()) {
01000                             if (corrected_query.empty()) corrected_query = qs;
01001                             size_t term_end_index = it.raw() - qs.data();
01002                             size_t n = term_end_index - term_start_index;
01003                             size_t pos = term_start_index + correction_offset;
01004                             corrected_query.replace(pos, n, suggestion);
01005                             correction_offset += suggestion.size();
01006                             correction_offset -= n;
01007                         }
01008                     }
01009                     break;
01010                 }
01011             }
01012 
01013             if (mode == IN_PHRASED_TERM) {
01014                 Parse(pParser, PHR_TERM, term_obj, &state);
01015             } else {
01016                 if (mode == DEFAULT || mode == IN_GROUP) {
01017                     if (it != end) {
01018                         if ((flags & FLAG_WILDCARD) && *it == '*') {
01019                             Utf8Iterator p(it);
01020                             ++p;
01021                             if (p == end || !is_wordchar(*p)) {
01022                                 it = p;
01023                                 // Wildcard at end of term (also known as
01024                                 // "right truncation").
01025                                 Parse(pParser, WILD_TERM, term_obj, &state);
01026                                 continue;
01027                             }
01028                         }
01029                     } else {
01030                         if (flags & FLAG_PARTIAL) {
01031                             // Final term of a partial match query, with no
01032                             // following characters - treat as a wildcard.
01033                             Parse(pParser, PARTIAL_TERM, term_obj, &state);
01034                             continue;
01035                         }
01036                     }
01037                 }
01038 
01039                 // See if the next token will be PHR_TERM - if so, this one
01040                 // needs to be TERM not GROUP_TERM.
01041                 if (mode == IN_GROUP && is_phrase_generator(*it)) {
01042                     // FIXME: can we clean this up?
01043                     Utf8Iterator p = it;
01044                     do {
01045                         ++p;
01046                     } while (p != end && is_phrase_generator(*p));
01047                     // Don't generate a phrase unless the phrase generators are
01048                     // immediately followed by another term.
01049                     if (p != end && is_wordchar(*p)) {
01050                         mode = DEFAULT;
01051                     }
01052                 }
01053 
01054                 Parse(pParser, (mode == IN_GROUP ? GROUP_TERM : TERM),
01055                       term_obj, &state);
01056                 if (mode != DEFAULT && mode != IN_GROUP) continue;
01057             }
01058         }
01059 
01060         if (it == end) break;
01061 
01062         if (is_phrase_generator(*it)) {
01063             // Skip multiple phrase generators.
01064             do {
01065                 ++it;
01066             } while (it != end && is_phrase_generator(*it));
01067             // Don't generate a phrase unless the phrase generators are
01068             // immediately followed by another term.
01069             if (it != end && is_wordchar(*it)) {
01070                 mode = IN_PHRASED_TERM;
01071                 term_start_index = it.raw() - qs.data();
01072                 goto phrased_term;
01073             }
01074         } else if (mode == DEFAULT || mode == IN_GROUP) {
01075             mode = DEFAULT;
01076             if (!last_was_operator && is_whitespace(*it)) {
01077                 newprev = ' ';
01078                 // Skip multiple whitespace.
01079                 do {
01080                     ++it;
01081                 } while (it != end && is_whitespace(*it));
01082                 // Don't generate a group unless the terms are only separated
01083                 // by whitespace.
01084                 if (it != end && is_wordchar(*it)) {
01085                     mode = IN_GROUP;
01086                 }
01087             }
01088         }
01089     }
01090 done:
01091     if (!state.error) {
01092         // Implicitly close any unclosed quotes...
01093         if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
01094             Parse(pParser, QUOTE, NULL, &state);
01095         Parse(pParser, 0, NULL, &state);
01096     }
01097 
01098     errmsg = state.error;
01099     return state.query;
01100 }
01101 
01102 struct ProbQuery {
01103     Query * query;
01104     Query * love;
01105     Query * hate;
01106     // filter is a map from prefix to a query for that prefix.  Queries with
01107     // the same prefix are combined with OR, and the results of this are
01108     // combined with AND to get the full filter.
01109     map<filter_group_id, Query> filter;
01110 
01111     ProbQuery() : query(0), love(0), hate(0) { }
01112     ~ProbQuery() {
01113         delete query;
01114         delete love;
01115         delete hate;
01116     }
01117 
01118     Query merge_filters() const {
01119         map<filter_group_id, Query>::const_iterator i = filter.begin();
01120         Assert(i != filter.end());
01121         Query q = i->second;
01122         while (++i != filter.end()) {
01123             q = Query(Query::OP_AND, q, i->second);
01124         }
01125         return q;
01126     }
01127 };
01128 
01129 class TermGroup {
01130     vector<Term *> terms;
01131 
01132   public:
01133     TermGroup() { }
01134 
01136     void add_term(Term * term) {
01137         terms.push_back(term);
01138     }
01139 
01141     Query * as_group(State *state) const;
01142 
01146     void destroy() { delete this; }
01147 
01148   protected:
01152     ~TermGroup() {
01153         vector<Term*>::const_iterator i;
01154         for (i = terms.begin(); i != terms.end(); ++i) {
01155             delete *i;
01156         }
01157     }
01158 };
01159 
01160 Query *
01161 TermGroup::as_group(State *state) const
01162 {
01163     Query::op default_op = state->default_op();
01164     vector<Query> subqs;
01165     subqs.reserve(terms.size());
01166     if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
01167         // Check for multi-word synonyms.
01168         Database db = state->get_database();
01169 
01170         string key;
01171         vector<Term*>::const_iterator begin = terms.begin();
01172         vector<Term*>::const_iterator i = begin;
01173         while (i != terms.end()) {
01174             TermIterator synkey(db.synonym_keys_begin((*i)->name));
01175             TermIterator synend(db.synonym_keys_end((*i)->name));
01176             if (synkey == synend) {
01177                 // No multi-synonym matches.
01178                 if (state->is_stopword(*i)) {
01179                     state->add_to_stoplist(*i);
01180                 } else {
01181                     subqs.push_back((*i)->get_query_with_auto_synonyms());
01182                 }
01183                 begin = ++i;
01184                 continue;
01185             }
01186             key.resize(0);
01187             while (i != terms.end()) {
01188                 if (!key.empty()) key += ' ';
01189                 key += (*i)->name;
01190                 ++i;
01191                 synkey.skip_to(key);
01192                 if (synkey == synend || !startswith(*synkey, key)) break;
01193             }
01194             // Greedily try to match as many consecutive words as possible.
01195             TermIterator syn, end;
01196             while (true) {
01197                 syn = db.synonyms_begin(key);
01198                 end = db.synonyms_end(key);
01199                 if (syn != end) break;
01200                 if (--i == begin) break;
01201                 key.resize(key.size() - (*i)->name.size() - 1);
01202             }
01203             if (i == begin) {
01204                 // No multi-synonym matches.
01205                 if (state->is_stopword(*i)) {
01206                     state->add_to_stoplist(*i);
01207                 } else {
01208                     subqs.push_back((*i)->get_query_with_auto_synonyms());
01209                 }
01210                 begin = ++i;
01211                 continue;
01212             }
01213 
01214             vector<Query> subqs2;
01215             vector<Term*>::const_iterator j;
01216             for (j = begin; j != i; ++j) {
01217                 if (state->is_stopword(*j)) {
01218                     state->add_to_stoplist(*j);
01219                 } else {
01220                     subqs2.push_back((*j)->get_query());
01221                 }
01222             }
01223             Query q_original_terms(default_op, subqs2.begin(), subqs2.end());
01224             subqs2.clear();
01225 
01226             // Use the position of the first term for the synonyms.
01227             Xapian::termpos pos = (*begin)->pos;
01228             begin = i;
01229             while (syn != end) {
01230                 subqs2.push_back(Query(*syn, 1, pos));
01231                 ++syn;
01232             }
01233             Query q_synonym_terms(Query::OP_OR, subqs2.begin(), subqs2.end());
01234             subqs2.clear();
01235             subqs.push_back(Query(Query::OP_OR,
01236                                   q_original_terms, q_synonym_terms));
01237         }
01238     } else {
01239         vector<Term*>::const_iterator i;
01240         for (i = terms.begin(); i != terms.end(); ++i) {
01241             if (state->is_stopword(*i)) {
01242                 state->add_to_stoplist(*i);
01243             } else {
01244                 subqs.push_back((*i)->get_query_with_auto_synonyms());
01245             }
01246         }
01247     }
01248     delete this;
01249     if (subqs.empty()) return NULL;
01250     return new Query(default_op, subqs.begin(), subqs.end());
01251 }
01252 
01253 class TermList {
01254     vector<Term *> terms;
01255     size_t window;
01256 
01266     bool uniform_prefixes;
01267 
01271     list<string> prefixes;
01272 
01274     Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const {
01275         Query * q = NULL;
01276         size_t n_terms = terms.size();
01277         Xapian::termcount w = w_delta + terms.size();
01278         if (uniform_prefixes) {
01279             list<string>::const_iterator piter;
01280             for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
01281                 vector<Query> subqs;
01282                 subqs.reserve(n_terms);
01283                 vector<Term *>::const_iterator titer;
01284                 for (titer = terms.begin(); titer != terms.end(); ++titer) {
01285                     Term * t = *titer;
01286                     subqs.push_back(Query(t->make_term(*piter), 1, t->pos));
01287                 }
01288                 add_to_query(q, Query::OP_OR,
01289                              Query(op, subqs.begin(), subqs.end(), w));
01290             }
01291         } else {
01292             vector<Query> subqs;
01293             subqs.reserve(n_terms);
01294             vector<Term *>::const_iterator titer;
01295             for (titer = terms.begin(); titer != terms.end(); ++titer) {
01296                 subqs.push_back((*titer)->get_query());
01297             }
01298             q = new Query(op, subqs.begin(), subqs.end(), w);
01299         }
01300 
01301         delete this;
01302         return q;
01303     }
01304 
01305   public:
01306     TermList() : window(0), uniform_prefixes(true) { }
01307 
01309     void add_positional_term(Term * term) {
01310         if (terms.empty()) {
01311             prefixes = term->prefixes;
01312         } else if (uniform_prefixes && prefixes != term->prefixes)  {
01313             prefixes.clear();
01314             uniform_prefixes = false;
01315         }
01316         term->need_positions();
01317         terms.push_back(term);
01318     }
01319 
01320     void adjust_window(size_t alternative_window) {
01321         if (alternative_window > window) window = alternative_window;
01322     }
01323 
01325     Query * as_phrase_query() const {
01326         return as_opwindow_query(Query::OP_PHRASE, 0);
01327     }
01328 
01330     Query * as_near_query() const {
01331         // The common meaning of 'a NEAR b' is "a within 10 terms of b", which
01332         // means a window size of 11.  For more than 2 terms, we just add one
01333         // to the window size for each extra term.
01334         size_t w = window;
01335         if (w == 0) w = 10;
01336         return as_opwindow_query(Query::OP_NEAR, w - 1);
01337     }
01338 
01340     Query * as_adj_query() const {
01341         // The common meaning of 'a ADJ b' is "a at most 10 terms before b",
01342         // which means a window size of 11.  For more than 2 terms, we just add
01343         // one to the window size for each extra term.
01344         size_t w = window;
01345         if (w == 0) w = 10;
01346         return as_opwindow_query(Query::OP_PHRASE, w - 1);
01347     }
01348 
01352     void destroy() { delete this; }
01353 
01354   protected:
01358     ~TermList() {
01359         vector<Term *>::const_iterator t;
01360         for (t = terms.begin(); t != terms.end(); ++t) {
01361             delete *t;
01362         }
01363     }
01364 };
01365 
01366 // Helper macro for converting a boolean operation into a Xapian::Query.
01367 #define BOOL_OP_TO_QUERY(E, A, OP, B, OP_TXT) \
01368     do {\
01369         if (!A || !B) {\
01370             state->error = "Syntax: <expression> "OP_TXT" <expression>";\
01371             yy_parse_failed(yypParser);\
01372             return;\
01373         }\
01374         E = new Query(OP, *A, *B);\
01375         delete A;\
01376         delete B;\
01377     } while (0)
01378 
01379 #line 1381 "queryparser/queryparser_internal.cc"
01380 /* Next is all token values, in a form suitable for use by makeheaders.
01381 ** This section will be null unless lemon is run with the -m switch.
01382 */
01383 /* 
01384 ** These constants (all generated automatically by the parser generator)
01385 ** specify the various kinds of tokens (terminals) that the parser
01386 ** understands. 
01387 **
01388 ** Each symbol here is a terminal symbol in the grammar.
01389 */
01390 /* Make sure the INTERFACE macro is defined.
01391 */
01392 #ifndef INTERFACE
01393 # define INTERFACE 1
01394 #endif
01395 /* The next thing included is series of defines which control
01396 ** various aspects of the generated parser.
01397 **    YYCODETYPE         is the data type used for storing terminal
01398 **                       and nonterminal numbers.  "unsigned char" is
01399 **                       used if there are fewer than 250 terminals
01400 **                       and nonterminals.  "int" is used otherwise.
01401 **    YYNOCODE           is a number of type YYCODETYPE which corresponds
01402 **                       to no legal terminal or nonterminal number.  This
01403 **                       number is used to fill in empty slots of the hash 
01404 **                       table.
01405 **    YYFALLBACK         If defined, this indicates that one or more tokens
01406 **                       have fall-back values which should be used if the
01407 **                       original value of the token will not parse.
01408 **    YYACTIONTYPE       is the data type used for storing terminal
01409 **                       and nonterminal numbers.  "unsigned char" is
01410 **                       used if there are fewer than 250 rules and
01411 **                       states combined.  "int" is used otherwise.
01412 **    ParseTOKENTYPE     is the data type used for minor tokens given 
01413 **                       directly to the parser from the tokenizer.
01414 **    YYMINORTYPE        is the data type used for all minor tokens.
01415 **                       This is typically a union of many types, one of
01416 **                       which is ParseTOKENTYPE.  The entry in the union
01417 **                       for base tokens is called "yy0".
01418 **    YYSTACKDEPTH       is the maximum depth of the parser's stack.
01419 **    ParseARG_SDECL     A static variable declaration for the %extra_argument
01420 **    ParseARG_PDECL     A parameter declaration for the %extra_argument
01421 **    ParseARG_STORE     Code to store %extra_argument into yypParser
01422 **    ParseARG_FETCH     Code to extract %extra_argument from yypParser
01423 **    YYNSTATE           the combined number of states.
01424 **    YYNRULE            the number of rules in the grammar
01425 **    YYERRORSYMBOL      is the code number of the error symbol.  If not
01426 **                       defined, then do no error processing.
01427 */
01428 #define YYCODETYPE unsigned char
01429 #define YYNOCODE 39
01430 #define YYACTIONTYPE unsigned char
01431 #define ParseTOKENTYPE Term *
01432 typedef union {
01433   ParseTOKENTYPE yy0;
01434   int yy8;
01435   ProbQuery * yy12;
01436   TermList * yy27;
01437   Query * yy73;
01438   TermGroup * yy76;
01439   int yy77;
01440 } YYMINORTYPE;
01441 #define YYSTACKDEPTH 100
01442 #define ParseARG_SDECL State * state;
01443 #define ParseARG_PDECL ,State * state
01444 #define ParseARG_FETCH State * state = yypParser->state
01445 #define ParseARG_STORE yypParser->state = state
01446 #define YYNSTATE 75
01447 #define YYNRULE 52
01448 #define YYERRORSYMBOL 23
01449 #define YYERRSYMDT yy77
01450 #define YY_NO_ACTION      (YYNSTATE+YYNRULE+2)
01451 #define YY_ACCEPT_ACTION  (YYNSTATE+YYNRULE+1)
01452 #define YY_ERROR_ACTION   (YYNSTATE+YYNRULE)
01453 
01454 /* Next are that tables used to determine what action to take based on the
01455 ** current state and lookahead token.  These tables are used to implement
01456 ** functions that take a state number and lookahead value and return an
01457 ** action integer.  
01458 **
01459 ** Suppose the action integer is N.  Then the action is determined as
01460 ** follows
01461 **
01462 **   0 <= N < YYNSTATE                  Shift N.  That is, push the lookahead
01463 **                                      token onto the stack and goto state N.
01464 **
01465 **   YYNSTATE <= N < YYNSTATE+YYNRULE   Reduce by rule N-YYNSTATE.
01466 **
01467 **   N == YYNSTATE+YYNRULE              A syntax error has occurred.
01468 **
01469 **   N == YYNSTATE+YYNRULE+1            The parser accepts its input.
01470 **
01471 **   N == YYNSTATE+YYNRULE+2            No such action.  Denotes unused
01472 **                                      slots in the yy_action[] table.
01473 **
01474 ** The action table is constructed as a single large table named yy_action[].
01475 ** Given state S and lookahead X, the action is computed as
01476 **
01477 **      yy_action[ yy_shift_ofst[S] + X ]
01478 **
01479 ** If the index value yy_shift_ofst[S]+X is out of range or if the value
01480 ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X or if yy_shift_ofst[S]
01481 ** is equal to YY_SHIFT_USE_DFLT, it means that the action is not in the table
01482 ** and that yy_default[S] should be used instead.  
01483 **
01484 ** The formula above is for computing the action when the lookahead is
01485 ** a terminal symbol.  If the lookahead is a non-terminal (as occurs after
01486 ** a reduce action) then the yy_reduce_ofst[] array is used in place of
01487 ** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of
01488 ** YY_SHIFT_USE_DFLT.
01489 **
01490 ** The following are the tables generated in this section:
01491 **
01492 **  yy_action[]        A single table containing all actions.
01493 **  yy_lookahead[]     A table containing the lookahead for each entry in
01494 **                     yy_action.  Used to detect hash collisions.
01495 **  yy_shift_ofst[]    For each state, the offset into yy_action for
01496 **                     shifting terminals.
01497 **  yy_reduce_ofst[]   For each state, the offset into yy_action for
01498 **                     shifting non-terminals after a reduce.
01499 **  yy_default[]       Default action for each state.
01500 */
01501 static const YYACTIONTYPE yy_action[] = {
01502  /*     0 */   128,    1,    2,    3,   13,   43,   46,   60,   70,   19,
01503  /*    10 */    30,   32,   34,   37,   71,    4,    6,   62,   65,   73,
01504  /*    20 */    54,   69,   75,   26,   23,   24,   68,   44,   21,   25,
01505  /*    30 */    40,   12,    2,    5,   13,   43,   46,   60,   70,   29,
01506  /*    40 */    30,   32,   34,   37,   12,    2,    7,   13,   43,   46,
01507  /*    50 */    60,   70,   31,   30,   32,   34,   37,   12,    2,    9,
01508  /*    60 */    13,   43,   46,   60,   70,   33,   30,   32,   34,   37,
01509  /*    70 */    12,    2,   11,   13,   43,   46,   60,   70,   35,   30,
01510  /*    80 */    32,   34,   37,   41,    2,    3,   13,   43,   46,   60,
01511  /*    90 */    70,   36,   30,   32,   34,   37,   12,    2,   72,   13,
01512  /*   100 */    43,   46,   60,   70,   38,   30,   32,   34,   37,   12,
01513  /*   110 */     2,   74,   13,   43,   46,   60,   70,   76,   30,   32,
01514  /*   120 */    34,   37,   10,    4,    6,   62,   65,   39,   54,   69,
01515  /*   130 */    49,   56,   23,   24,   68,   44,   42,   25,   40,   59,
01516  /*   140 */    47,   62,   65,   45,   54,   69,   48,   28,   23,   24,
01517  /*   150 */    68,   44,   55,   25,   40,   27,   92,  102,  102,   92,
01518  /*   160 */    54,   15,   92,   92,   23,   24,  102,  102,   92,   25,
01519  /*   170 */    40,  106,   92,  106,  106,  106,  106,   18,   20,    8,
01520  /*   180 */    10,    4,    6,   92,   17,   16,   54,   52,   92,   92,
01521  /*   190 */    23,   24,   51,  106,   92,   25,   40,   54,   52,   92,
01522  /*   200 */    92,   23,   24,   58,   54,   52,   25,   40,   23,   24,
01523  /*   210 */    64,   92,   92,   25,   40,   54,   52,   92,   92,   23,
01524  /*   220 */    24,   67,   92,   92,   25,   40,   92,   14,   22,   92,
01525  /*   230 */    30,   32,   34,   37,   50,   92,   92,   53,   92,   30,
01526  /*   240 */    32,   34,   37,   57,   92,   92,   53,   92,   30,   32,
01527  /*   250 */    34,   37,   54,   15,   92,   92,   23,   24,   92,   92,
01528  /*   260 */    92,   25,   40,   61,   22,   92,   30,   32,   34,   37,
01529  /*   270 */    92,   63,   92,   92,   53,   92,   30,   32,   34,   37,
01530  /*   280 */    66,   92,   92,   53,   92,   30,   32,   34,   37,  107,
01531  /*   290 */    92,  107,  107,  107,  107,   92,   18,   20,   92,   92,
01532  /*   300 */    92,   92,   92,   17,   16,   92,   92,   92,   92,   92,
01533  /*   310 */    92,  107,
01534 };
01535 static const YYCODETYPE yy_lookahead[] = {
01536  /*     0 */    24,   25,   26,   27,   28,   29,   30,   31,   32,   12,
01537  /*    10 */    34,   35,   36,   37,    5,    4,    5,    8,    9,   10,
01538  /*    20 */    11,   12,    0,   33,   15,   16,   17,   18,   12,   20,
01539  /*    30 */    21,   25,   26,   27,   28,   29,   30,   31,   32,   12,
01540  /*    40 */    34,   35,   36,   37,   25,   26,   27,   28,   29,   30,
01541  /*    50 */    31,   32,   14,   34,   35,   36,   37,   25,   26,   27,
01542  /*    60 */    28,   29,   30,   31,   32,   13,   34,   35,   36,   37,
01543  /*    70 */    25,   26,   27,   28,   29,   30,   31,   32,    6,   34,
01544  /*    80 */    35,   36,   37,   25,   26,   27,   28,   29,   30,   31,
01545  /*    90 */    32,   12,   34,   35,   36,   37,   25,   26,   27,   28,
01546  /*   100 */    29,   30,   31,   32,    7,   34,   35,   36,   37,   25,
01547  /*   110 */    26,   27,   28,   29,   30,   31,   32,    0,   34,   35,
01548  /*   120 */    36,   37,    3,    4,    5,    8,    9,   12,   11,   12,
01549  /*   130 */     8,    9,   15,   16,   17,   18,   22,   20,   21,   17,
01550  /*   140 */    18,    8,    9,   19,   11,   12,   19,   12,   15,   16,
01551  /*   150 */    17,   18,   12,   20,   21,   20,   38,    8,    9,   38,
01552  /*   160 */    11,   12,   38,   38,   15,   16,   17,   18,   38,   20,
01553  /*   170 */    21,    0,   38,    2,    3,    4,    5,    6,    7,    2,
01554  /*   180 */     3,    4,    5,   38,   13,   14,   11,   12,   38,   38,
01555  /*   190 */    15,   16,   17,   22,   38,   20,   21,   11,   12,   38,
01556  /*   200 */    38,   15,   16,   17,   11,   12,   20,   21,   15,   16,
01557  /*   210 */    17,   38,   38,   20,   21,   11,   12,   38,   38,   15,
01558  /*   220 */    16,   17,   38,   38,   20,   21,   38,   31,   32,   38,
01559  /*   230 */    34,   35,   36,   37,   29,   38,   38,   32,   38,   34,
01560  /*   240 */    35,   36,   37,   29,   38,   38,   32,   38,   34,   35,
01561  /*   250 */    36,   37,   11,   12,   38,   38,   15,   16,   38,   38,
01562  /*   260 */    38,   20,   21,   31,   32,   38,   34,   35,   36,   37,
01563  /*   270 */    38,   29,   38,   38,   32,   38,   34,   35,   36,   37,
01564  /*   280 */    29,   38,   38,   32,   38,   34,   35,   36,   37,    0,
01565  /*   290 */    38,    2,    3,    4,    5,   38,    6,    7,   38,   38,
01566  /*   300 */    38,   38,   38,   13,   14,   38,   38,   38,   38,   38,
01567  /*   310 */    38,   22,
01568 };
01569 #define YY_SHIFT_USE_DFLT (-4)
01570 static const short yy_shift_ofst[] = {
01571  /*     0 */   117,   22,   -4,  177,    9,   -4,  133,   -4,  133,  119,
01572  /*    10 */   133,   11,   -4,  149,   -4,  290,   -4,   -4,   -3,   -4,
01573  /*    20 */    16,   -4,   -4,   -4,   -4,   27,  135,   -4,   -4,   -4,
01574  /*    30 */    38,   -4,   52,   -4,   72,   79,   -4,   97,  115,   -4,
01575  /*    40 */   133,  114,   -4,   -4,  124,   -4,  122,  127,   -4,  175,
01576  /*    50 */    -4,   -4,  290,   -4,  140,   -4,  186,   -4,   -4,   -4,
01577  /*    60 */   241,   -4,  193,   -4,   -4,  204,   -4,   -4,   -4,  171,
01578  /*    70 */   289,  133,   -4,  133,   -4,
01579 };
01580 #define YY_REDUCE_USE_DFLT (-25)
01581 static const short yy_reduce_ofst[] = {
01582  /*     0 */   -24,  -25,  -25,  -25,    6,  -25,   19,  -25,   32,  -25,
01583  /*    10 */    45,  -25,  -25,  196,  -25,  -25,  -25,  -25,  -25,  -25,
01584  /*    20 */   -25,  -25,  -25,  -25,  -25,  -10,  -25,  -25,  -25,  -25,
01585  /*    30 */   -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,
01586  /*    40 */    58,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  -25,  205,
01587  /*    50 */   -25,  -25,  -25,  -25,  -25,  -25,  214,  -25,  -25,  -25,
01588  /*    60 */   232,  -25,  242,  -25,  -25,  251,  -25,  -25,  -25,  -25,
01589  /*    70 */   -25,   71,  -25,   84,  -25,
01590 };
01591 static const YYACTIONTYPE yy_default[] = {
01592  /*     0 */    85,   84,   77,  127,   85,   78,   85,   79,   85,   82,
01593  /*    10 */    85,   83,   84,   86,   91,  104,  119,  121,  127,  123,
01594  /*    20 */   127,  125,  105,  108,  109,  127,  127,  110,  118,  117,
01595  /*    30 */   111,  120,  112,  122,  113,  127,  124,  114,  127,  126,
01596  /*    40 */    85,   84,  115,   87,  127,   88,  127,  127,   89,  127,
01597  /*    50 */    93,  101,  106,  107,  127,  116,  127,   95,   97,   99,
01598  /*    60 */   103,   90,  127,   92,  100,  127,   94,   96,   98,  104,
01599  /*    70 */   105,   85,   80,   85,   81,
01600 };
01601 #define YY_SZ_ACTTAB (int)(sizeof(yy_action)/sizeof(yy_action[0]))
01602 
01603 /* The next table maps tokens into fallback tokens.  If a construct
01604 ** like the following:
01605 ** 
01606 **      %fallback ID X Y Z.
01607 **
01608 ** appears in the grammar, then ID becomes a fallback token for X, Y,
01609 ** and Z.  Whenever one of the tokens X, Y, or Z is input to the parser
01610 ** but it does not parse, the type of the token is changed to ID and
01611 ** the parse is retried before an error is thrown.
01612 */
01613 #ifdef YYFALLBACK
01614 static const YYCODETYPE yyFallback[] = {
01615 };
01616 #endif /* YYFALLBACK */
01617 
01618 /* The following structure represents a single element of the
01619 ** parser's stack.  Information stored includes:
01620 **
01621 **   +  The state number for the parser at this level of the stack.
01622 **
01623 **   +  The value of the token stored at this level of the stack.
01624 **      (In other words, the "major" token.)
01625 **
01626 **   +  The semantic value stored at this level of the stack.  This is
01627 **      the information used by the action routines in the grammar.
01628 **      It is sometimes called the "minor" token.
01629 */
01630 struct yyStackEntry {
01631   yyStackEntry() {
01632     stateno = 0;
01633     major = 0;
01634   }
01635   yyStackEntry(int stateno_, int major_, YYMINORTYPE minor_)
01636   {
01637     stateno = stateno_;
01638     major = major_;
01639     minor = minor_;
01640   }
01641   int stateno;       /* The state-number */
01642   int major;         /* The major token value.  This is the code
01643                      ** number for the token at this stack level */
01644   YYMINORTYPE minor; /* The user-supplied minor token value.  This
01645                      ** is the value of the token  */
01646 };
01647 
01648 /* The state of the parser is completely contained in an instance of
01649 ** the following structure */
01650 struct yyParser {
01651   int yyerrcnt;                 /* Shifts left before out of the error */
01652   ParseARG_SDECL                /* A place to hold %extra_argument */
01653   vector<yyStackEntry> yystack; /* The parser's stack */
01654 };
01655 typedef struct yyParser yyParser;
01656 
01657 /* Prototype this here so we can call it from a rule action (ick). */
01658 static void yy_parse_failed(yyParser *);
01659 
01660 #include "omdebug.h"
01661 
01662 #ifdef XAPIAN_DEBUG_VERBOSE
01663 /* For tracing shifts, the names of all terminals and nonterminals
01664 ** are required.  The following table supplies these names */
01665 static const char *const yyTokenName[] = {
01666   "$",             "ERROR",         "OR",            "XOR",         
01667   "AND",           "NOT",           "NEAR",          "ADJ",         
01668   "LOVE",          "HATE",          "HATE_AFTER_AND",  "SYNONYM",     
01669   "TERM",          "GROUP_TERM",    "PHR_TERM",      "WILD_TERM",   
01670   "PARTIAL_TERM",  "BOOLEAN_FILTER",  "RANGE_START",   "RANGE_END",   
01671   "QUOTE",         "BRA",           "KET",           "error",       
01672   "query",         "expr",          "prob_expr",     "bool_arg",    
01673   "prob",          "term",          "stop_prob",     "stop_term",   
01674   "compound_term",  "phrase",        "phrased_term",  "group",       
01675   "near_expr",     "adj_expr",    
01676 };
01677 
01678 /* For tracing reduce actions, the names of all rules are required.
01679 */
01680 static const char *const yyRuleName[] = {
01681  /*   0 */ "query ::= expr",
01682  /*   1 */ "query ::=",
01683  /*   2 */ "expr ::= prob_expr",
01684  /*   3 */ "expr ::= bool_arg AND bool_arg",
01685  /*   4 */ "expr ::= bool_arg NOT bool_arg",
01686  /*   5 */ "expr ::= bool_arg AND NOT bool_arg",
01687  /*   6 */ "expr ::= bool_arg AND HATE_AFTER_AND bool_arg",
01688  /*   7 */ "expr ::= bool_arg OR bool_arg",
01689  /*   8 */ "expr ::= bool_arg XOR bool_arg",
01690  /*   9 */ "bool_arg ::= expr",
01691  /*  10 */ "bool_arg ::=",
01692  /*  11 */ "prob_expr ::= prob",
01693  /*  12 */ "prob_expr ::= term",
01694  /*  13 */ "prob ::= RANGE_START RANGE_END",
01695  /*  14 */ "prob ::= stop_prob RANGE_START RANGE_END",
01696  /*  15 */ "prob ::= stop_term stop_term",
01697  /*  16 */ "prob ::= prob stop_term",
01698  /*  17 */ "prob ::= LOVE term",
01699  /*  18 */ "prob ::= stop_prob LOVE term",
01700  /*  19 */ "prob ::= HATE term",
01701  /*  20 */ "prob ::= stop_prob HATE term",
01702  /*  21 */ "prob ::= HATE BOOLEAN_FILTER",
01703  /*  22 */ "prob ::= stop_prob HATE BOOLEAN_FILTER",
01704  /*  23 */ "prob ::= BOOLEAN_FILTER",
01705  /*  24 */ "prob ::= stop_prob BOOLEAN_FILTER",
01706  /*  25 */ "prob ::= LOVE BOOLEAN_FILTER",
01707  /*  26 */ "prob ::= stop_prob LOVE BOOLEAN_FILTER",
01708  /*  27 */ "stop_prob ::= prob",
01709  /*  28 */ "stop_prob ::= stop_term",
01710  /*  29 */ "stop_term ::= TERM",
01711  /*  30 */ "stop_term ::= compound_term",
01712  /*  31 */ "term ::= TERM",
01713  /*  32 */ "term ::= compound_term",
01714  /*  33 */ "compound_term ::= WILD_TERM",
01715  /*  34 */ "compound_term ::= PARTIAL_TERM",
01716  /*  35 */ "compound_term ::= QUOTE phrase QUOTE",
01717  /*  36 */ "compound_term ::= phrased_term",
01718  /*  37 */ "compound_term ::= group",
01719  /*  38 */ "compound_term ::= near_expr",
01720  /*  39 */ "compound_term ::= adj_expr",
01721  /*  40 */ "compound_term ::= BRA expr KET",
01722  /*  41 */ "compound_term ::= SYNONYM TERM",
01723  /*  42 */ "phrase ::= TERM",
01724  /*  43 */ "phrase ::= phrase TERM",
01725  /*  44 */ "phrased_term ::= TERM PHR_TERM",
01726  /*  45 */ "phrased_term ::= phrased_term PHR_TERM",
01727  /*  46 */ "group ::= TERM GROUP_TERM",
01728  /*  47 */ "group ::= group GROUP_TERM",
01729  /*  48 */ "near_expr ::= TERM NEAR TERM",
01730  /*  49 */ "near_expr ::= near_expr NEAR TERM",
01731  /*  50 */ "adj_expr ::= TERM ADJ TERM",
01732  /*  51 */ "adj_expr ::= adj_expr ADJ TERM",
01733 };
01734 
01735 /*
01736 ** This function returns the symbolic name associated with a token
01737 ** value.
01738 */
01739 static const char *ParseTokenName(int tokenType){
01740   if( tokenType>0 && size_t(tokenType)<(sizeof(yyTokenName)/sizeof(yyTokenName[0])) ){
01741     return yyTokenName[tokenType];
01742   }
01743   return "Unknown";
01744 }
01745 
01746 /*
01747 ** This function returns the symbolic name associated with a rule
01748 ** value.
01749 */
01750 static const char *ParseRuleName(int ruleNum){
01751   if( ruleNum>0 && size_t(ruleNum)<(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
01752     return yyRuleName[ruleNum];
01753   }
01754   return "Unknown";
01755 }
01756 #endif /* XAPIAN_DEBUG_VERBOSE */
01757 
01758 /* 
01759 ** This function allocates a new parser.
01760 ** The only argument is a pointer to a function which works like
01761 ** malloc.
01762 **
01763 ** Inputs:
01764 ** None.
01765 **
01766 ** Outputs:
01767 ** A pointer to a parser.  This pointer is used in subsequent calls
01768 ** to Parse and ParseFree.
01769 */
01770 static yyParser *ParseAlloc(){
01771   return new yyParser;
01772 }
01773 
01774 /* The following function deletes the value associated with a
01775 ** symbol.  The symbol can be either a terminal or nonterminal.
01776 ** "yymajor" is the symbol code, and "yypminor" is a pointer to
01777 ** the value.
01778 */
01779 static void yy_destructor(YYCODETYPE yymajor, YYMINORTYPE *yypminor){
01780   switch( yymajor ){
01781     /* Here is inserted the actions which take place when a
01782     ** terminal or non-terminal is destroyed.  This can happen
01783     ** when the symbol is popped from the stack during a
01784     ** reduce or during error processing or when a parser is 
01785     ** being destroyed before it is finished parsing.
01786     **
01787     ** Note: during a reduce, the only symbols destroyed are those
01788     ** which appear on the RHS of the rule, but which are not used
01789     ** inside the C code.
01790     */
01791     case 1:
01792     case 2:
01793     case 3:
01794     case 4:
01795     case 5:
01796     case 6:
01797     case 7:
01798     case 8:
01799     case 9:
01800     case 10:
01801     case 11:
01802     case 12:
01803     case 13:
01804     case 14:
01805     case 15:
01806     case 16:
01807     case 17:
01808     case 18:
01809     case 19:
01810     case 20:
01811     case 21:
01812     case 22:
01813 #line 1376 "queryparser/queryparser.lemony"
01814 {delete (yypminor->yy0);}
01815 #line 1817 "queryparser/queryparser_internal.cc"
01816       break;
01817     case 25:
01818     case 26:
01819     case 27:
01820     case 29:
01821     case 31:
01822     case 32:
01823 #line 1447 "queryparser/queryparser.lemony"
01824 {delete (yypminor->yy73);}
01825 #line 1827 "queryparser/queryparser_internal.cc"
01826       break;
01827     case 28:
01828     case 30:
01829 #line 1543 "queryparser/queryparser.lemony"
01830 {delete (yypminor->yy12);}
01831 #line 1833 "queryparser/queryparser_internal.cc"
01832       break;
01833     case 33:
01834     case 34:
01835     case 36:
01836     case 37:
01837 #line 1744 "queryparser/queryparser.lemony"
01838 {(yypminor->yy27)->destroy();}
01839 #line 1841 "queryparser/queryparser_internal.cc"
01840       break;
01841     case 35:
01842 #line 1778 "queryparser/queryparser.lemony"
01843 {(yypminor->yy76)->destroy();}
01844 #line 1846 "queryparser/queryparser_internal.cc"
01845       break;
01846     default:  break;   /* If no destructor action specified: do nothing */
01847   }
01848 }
01849 
01850 /*
01851 ** Pop the parser's stack once.
01852 **
01853 ** If there is a destructor routine associated with the token which
01854 ** is popped from the stack, then call it.
01855 **
01856 ** Return the major token number for the symbol popped.
01857 */
01858 static int yy_pop_parser_stack(yyParser *pParser){
01859   YYCODETYPE yymajor;
01860   if( pParser->yystack.empty() ) return 0;
01861   yyStackEntry *yytos = &pParser->yystack.back();
01862 
01863   DEBUGLINE(QUERYPARSER, "Popping " << ParseTokenName(yytos->major));
01864   yymajor = (YYCODETYPE)yytos->major;
01865   yy_destructor( yymajor, &yytos->minor);
01866   pParser->yystack.pop_back();
01867   return yymajor;
01868 }
01869 
01870 /* 
01871 ** Deallocate and destroy a parser.  Destructors are all called for
01872 ** all stack elements before shutting the parser down.
01873 **
01874 ** Inputs:
01875 ** A pointer to the parser.  This should be a pointer
01876 ** obtained from ParseAlloc.
01877 */
01878 static void ParseFree(
01879   yyParser *pParser           /* The parser to be deleted */
01880 ){
01881   if( pParser==0 ) return;
01882   while( !pParser->yystack.empty() ) yy_pop_parser_stack(pParser);
01883   delete pParser;
01884 }
01885 
01886 /*
01887 ** Find the appropriate action for a parser given the terminal
01888 ** look-ahead token iLookAhead.
01889 **
01890 ** If the look-ahead token is YYNOCODE, then check to see if the action is
01891 ** independent of the look-ahead.  If it is, return the action, otherwise
01892 ** return YY_NO_ACTION.
01893 */
01894 static int yy_find_shift_action(
01895   yyParser *pParser,        /* The parser */
01896   int iLookAhead            /* The look-ahead token */
01897 ){
01898   int i;
01899   /* if( pParser->yystack.empty() ) return YY_NO_ACTION;  */
01900   int stateno = pParser->yystack.back().stateno;
01901  
01902   i = yy_shift_ofst[stateno];
01903   if( i==YY_SHIFT_USE_DFLT ){
01904     return yy_default[stateno];
01905   }
01906   if( iLookAhead==YYNOCODE ){
01907     return YY_NO_ACTION;
01908   }
01909   i += iLookAhead;
01910   if( i<0 || i>=YY_SZ_ACTTAB || yy_lookahead[i]!=iLookAhead ){
01911 #ifdef YYFALLBACK
01912     int iFallback;            /* Fallback token */
01913     if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
01914            && (iFallback = yyFallback[iLookAhead])!=0 ){
01915       DEBUGLINE(QUERYPARSER,
01916                 "FALLBACK " << ParseTokenName(iLookAhead) << " => " <<
01917                 ParseTokenName(iFallback));
01918       return yy_find_shift_action(pParser, iFallback);
01919     }
01920 #endif
01921     return yy_default[stateno];
01922   }else{
01923     return yy_action[i];
01924   }
01925 }
01926 
01927 /*
01928 ** Find the appropriate action for a parser given the non-terminal
01929 ** look-ahead token iLookAhead.
01930 **
01931 ** If the look-ahead token is YYNOCODE, then check to see if the action is
01932 ** independent of the look-ahead.  If it is, return the action, otherwise
01933 ** return YY_NO_ACTION.
01934 */
01935 static int yy_find_reduce_action(
01936   yyParser *pParser,        /* The parser */
01937   int iLookAhead            /* The look-ahead token */
01938 ){
01939   int i;
01940   int stateno = pParser->yystack.back().stateno;
01941  
01942   i = yy_reduce_ofst[stateno];
01943   if( i==YY_REDUCE_USE_DFLT ){
01944     return yy_default[stateno];
01945   }
01946   if( iLookAhead==YYNOCODE ){
01947     return YY_NO_ACTION;
01948   }
01949   i += iLookAhead;
01950   if( i<0 || i>=YY_SZ_ACTTAB || yy_lookahead[i]!=iLookAhead ){
01951     return yy_default[stateno];
01952   }else{
01953     return yy_action[i];
01954   }
01955 }
01956 
01957 /*
01958 ** Perform a shift action.
01959 */
01960 static void yy_shift(
01961   yyParser *yypParser,          /* The parser to be shifted */
01962   int yyNewState,               /* The new state to shift in */
01963   int yyMajor,                  /* The major token to shift in */
01964   YYMINORTYPE *yypMinor         /* Pointer to the minor token to shift in */
01965 ){
01966   /* Here code is inserted which will execute if the parser
01967   ** stack every overflows.  We use std::vector<> for our stack
01968   ** so we'll never need this code.
01969   */
01970 #if 0
01971 #endif
01972 #ifdef XAPIAN_DEBUG_VERBOSE
01973   unsigned i;
01974   DEBUGLINE(QUERYPARSER, "Shift " << yyNewState);
01975   string stack("Stack:");
01976   for (i = 0; i < yypParser->yystack.size(); i++) {
01977     stack += ' ';
01978     stack += ParseTokenName(yypParser->yystack[i].major);
01979   }
01980   DEBUGLINE(QUERYPARSER, stack);
01981 #endif
01982   yypParser->yystack.push_back(yyStackEntry(yyNewState, yyMajor, *yypMinor));
01983 }
01984 
01985 /* The following table contains information about every rule that
01986 ** is used during the reduce.
01987 */
01988 static const struct {
01989   YYCODETYPE lhs;         /* Symbol on the left-hand side of the rule */
01990   unsigned char nrhs;     /* Number of right-hand side symbols in the rule */
01991 } yyRuleInfo[] = {
01992   { 24, 1 },
01993   { 24, 0 },
01994   { 25, 1 },
01995   { 25, 3 },
01996   { 25, 3 },
01997   { 25, 4 },
01998   { 25, 4 },
01999   { 25, 3 },
02000   { 25, 3 },
02001   { 27, 1 },
02002   { 27, 0 },
02003   { 26, 1 },
02004   { 26, 1 },
02005   { 28, 2 },
02006   { 28, 3 },
02007   { 28, 2 },
02008   { 28, 2 },
02009   { 28, 2 },
02010   { 28, 3 },
02011   { 28, 2 },
02012   { 28, 3 },
02013   { 28, 2 },
02014   { 28, 3 },
02015   { 28, 1 },
02016   { 28, 2 },
02017   { 28, 2 },
02018   { 28, 3 },
02019   { 30, 1 },
02020   { 30, 1 },
02021   { 31, 1 },
02022   { 31, 1 },
02023   { 29, 1 },
02024   { 29, 1 },
02025   { 32, 1 },
02026   { 32, 1 },
02027   { 32, 3 },
02028   { 32, 1 },
02029   { 32, 1 },
02030   { 32, 1 },
02031   { 32, 1 },
02032   { 32, 3 },
02033   { 32, 2 },
02034   { 33, 1 },
02035   { 33, 2 },
02036   { 34, 2 },
02037   { 34, 2 },
02038   { 35, 2 },
02039   { 35, 2 },
02040   { 36, 3 },
02041   { 36, 3 },
02042   { 37, 3 },
02043   { 37, 3 },
02044 };
02045 
02046 static void yy_accept(yyParser*);  /* Forward Declaration */
02047 
02048 /*
02049 ** Perform a reduce action and the shift that must immediately
02050 ** follow the reduce.
02051 */
02052 static void yy_reduce(
02053   yyParser *yypParser,         /* The parser */
02054   int yyruleno                 /* Number of the rule by which to reduce */
02055 ){
02056   int yygoto;                     /* The next state */
02057   int yyact;                      /* The next action */
02058   YYMINORTYPE yygotominor;        /* The LHS of the rule reduced */
02059   yyStackEntry *yymsp;            /* The top of the parser's stack */
02060   int yysize;                     /* Amount to pop the stack */
02061   ParseARG_FETCH;
02062   yymsp = &yypParser->yystack.back();
02063 #ifdef XAPIAN_DEBUG_VERBOSE
02064   DEBUGLINE(QUERYPARSER, "Reduce [" << ParseRuleName(yyruleno) << "].");
02065 #endif
02066 
02067   switch( yyruleno ){
02068   /* Beginning here are the reduction cases.  A typical example
02069   ** follows:
02070   **   case 0:
02071   **  #line <lineno> <grammarfile>
02072   **     { ... }           // User supplied code
02073   **  #line <lineno> <thisfile>
02074   **     break;
02075   */
02076       case 0:
02077 #line 1429 "queryparser/queryparser.lemony"
02078 {
02079     // Save the parsed query in the State structure so we can return it.
02080     if (yymsp[0].minor.yy73) {
02081         state->query = *yymsp[0].minor.yy73;
02082         delete yymsp[0].minor.yy73;
02083     } else {
02084         state->query = Query();
02085     }
02086 }
02087 #line 2089 "queryparser/queryparser_internal.cc"
02088         break;
02089       case 1:
02090 #line 1439 "queryparser/queryparser.lemony"
02091 {
02092     // Handle a query string with no terms in.
02093     state->query = Query();
02094 }
02095 #line 2097 "queryparser/queryparser_internal.cc"
02096         break;
02097       case 2:
02098       case 9:
02099 #line 1450 "queryparser/queryparser.lemony"
02100 { yygotominor.yy73 = yymsp[0].minor.yy73; }
02101 #line 2103 "queryparser/queryparser_internal.cc"
02102         break;
02103       case 3:
02104 #line 1453 "queryparser/queryparser.lemony"
02105 { BOOL_OP_TO_QUERY(yygotominor.yy73, yymsp[-2].minor.yy73, Query::OP_AND, yymsp[0].minor.yy73, "AND");   yy_destructor(4,&yymsp[-1].minor);
02106 }
02107 #line 2109 "queryparser/queryparser_internal.cc"
02108         break;
02109       case 4:
02110 #line 1455 "queryparser/queryparser.lemony"
02111 {
02112     // 'NOT foo' -> '<alldocuments> NOT foo'
02113     if (!yymsp[-2].minor.yy73 && (state->flags & QueryParser::FLAG_PURE_NOT)) {
02114         yymsp[-2].minor.yy73 = new Query("", 1, 0);
02115     }
02116     BOOL_OP_TO_QUERY(yygotominor.yy73, yymsp[-2].minor.yy73, Query::OP_AND_NOT, yymsp[0].minor.yy73, "NOT");
02117   yy_destructor(5,&yymsp[-1].minor);
02118 }
02119 #line 2121 "queryparser/queryparser_internal.cc"
02120         break;
02121       case 5:
02122 #line 1464 "queryparser/queryparser.lemony"
02123 { BOOL_OP_TO_QUERY(yygotominor.yy73, yymsp[-3].minor.yy73, Query::OP_AND_NOT, yymsp[0].minor.yy73, "AND NOT");   yy_destructor(4,&yymsp[-2].minor);
02124   yy_destructor(5,&yymsp[-1].minor);
02125 }
02126 #line 2128 "queryparser/queryparser_internal.cc"
02127         break;
02128       case 6:
02129 #line 1467 "queryparser/queryparser.lemony"
02130 { BOOL_OP_TO_QUERY(yygotominor.yy73, yymsp[-3].minor.yy73, Query::OP_AND_NOT, yymsp[0].minor.yy73, "AND");   yy_destructor(4,&yymsp[-2].minor);
02131   yy_destructor(10,&yymsp[-1].minor);
02132 }
02133 #line 2135 "queryparser/queryparser_internal.cc"
02134         break;
02135       case 7:
02136 #line 1470 "queryparser/queryparser.lemony"
02137 { BOOL_OP_TO_QUERY(yygotominor.yy73, yymsp[-2].minor.yy73, Query::OP_OR, yymsp[0].minor.yy73, "OR");   yy_destructor(2,&yymsp[-1].minor);
02138 }
02139 #line 2141 "queryparser/queryparser_internal.cc"
02140         break;
02141       case 8:
02142 #line 1473 "queryparser/queryparser.lemony"
02143 { BOOL_OP_TO_QUERY(yygotominor.yy73, yymsp[-2].minor.yy73, Query::OP_XOR, yymsp[0].minor.yy73, "XOR");   yy_destructor(3,&yymsp[-1].minor);
02144 }
02145 #line 2147 "queryparser/queryparser_internal.cc"
02146         break;
02147       case 10:
02148 #line 1482 "queryparser/queryparser.lemony"
02149 {
02150     // Set the argument to NULL, which enables the bool_arg-using rules in
02151     // expr above to report uses of AND, OR, etc which don't have two
02152     // arguments.
02153     yygotominor.yy73 = NULL;
02154 }
02155 #line 2157 "queryparser/queryparser_internal.cc"
02156         break;
02157       case 11:
02158 #line 1494 "queryparser/queryparser.lemony"
02159 {
02160     yygotominor.yy73 = yymsp[0].minor.yy12->query;
02161     yymsp[0].minor.yy12->query = NULL;
02162     // Handle any "+ terms".
02163     if (yymsp[0].minor.yy12->love) {
02164         if (yymsp[0].minor.yy12->love->empty()) {
02165             // +<nothing>.
02166             delete yygotominor.yy73;
02167             yygotominor.yy73 = yymsp[0].minor.yy12->love;
02168         } else if (yygotominor.yy73) {
02169             swap(yygotominor.yy73, yymsp[0].minor.yy12->love);
02170             add_to_query(yygotominor.yy73, Query::OP_AND_MAYBE, yymsp[0].minor.yy12->love);
02171         } else {
02172             yygotominor.yy73 = yymsp[0].minor.yy12->love;
02173         }
02174         yymsp[0].minor.yy12->love = NULL;
02175     }
02176     // Handle any boolean filters.
02177     if (!yymsp[0].minor.yy12->filter.empty()) {
02178         if (yygotominor.yy73) {
02179             add_to_query(yygotominor.yy73, Query::OP_FILTER, yymsp[0].minor.yy12->merge_filters());
02180         } else {
02181             // Make the query a boolean one.
02182             yygotominor.yy73 = new Query(Query::OP_SCALE_WEIGHT, yymsp[0].minor.yy12->merge_filters(), 0.0);
02183         }
02184     }
02185     // Handle any "- terms".
02186     if (yymsp[0].minor.yy12->hate && !yymsp[0].minor.yy12->hate->empty()) {
02187         if (!yygotominor.yy73) {
02188             // Can't just hate!
02189             yy_parse_failed(yypParser);
02190             return;
02191         }
02192         *yygotominor.yy73 = Query(Query::OP_AND_NOT, *yygotominor.yy73, *yymsp[0].minor.yy12->hate);
02193     }
02194     // FIXME what if yygotominor.yy73 && yygotominor.yy73->empty() (all terms are stopwords)?
02195     delete yymsp[0].minor.yy12;
02196 }
02197 #line 2199 "queryparser/queryparser_internal.cc"
02198         break;
02199       case 12:
02200       case 30:
02201       case 32:
02202 #line 1533 "queryparser/queryparser.lemony"
02203 {
02204     yygotominor.yy73 = yymsp[0].minor.yy73;
02205 }
02206 #line 2208 "queryparser/queryparser_internal.cc"
02207         break;
02208       case 13:
02209 #line 1545 "queryparser/queryparser.lemony"
02210 {
02211     Query range;
02212     Xapian::valueno valno = state->value_range(range, yymsp[-1].minor.yy0, yymsp[0].minor.yy0);
02213     if (valno == BAD_VALUENO) {
02214         yy_parse_failed(yypParser);
02215         return;
02216     }
02217     yygotominor.yy12 = new ProbQuery;
02218     yygotominor.yy12->filter[filter_group_id(valno)] = range;
02219 }
02220 #line 2222 "queryparser/queryparser_internal.cc"
02221         break;
02222       case 14:
02223 #line 1556 "queryparser/queryparser.lemony"
02224 {
02225     Query range;
02226     Xapian::valueno valno = state->value_range(range, yymsp[-1].minor.yy0, yymsp[0].minor.yy0);
02227     if (valno == BAD_VALUENO) {
02228         yy_parse_failed(yypParser);
02229         return;
02230     }
02231     yygotominor.yy12 = yymsp[-2].minor.yy12;
02232     Query & q = yygotominor.yy12->filter[filter_group_id(valno)];
02233     q = Query(Query::OP_OR, q, range);
02234 }
02235 #line 2237 "queryparser/queryparser_internal.cc"
02236         break;
02237       case 15:
02238 #line 1568 "queryparser/queryparser.lemony"
02239 {
02240     yygotominor.yy12 = new ProbQuery;
02241     yygotominor.yy12->query = yymsp[-1].minor.yy73;
02242     if (yymsp[0].minor.yy73) add_to_query(yygotominor.yy12->query, state->default_op(), yymsp[0].minor.yy73);
02243 }
02244 #line 2246 "queryparser/queryparser_internal.cc"
02245         break;
02246       case 16:
02247 #line 1574 "queryparser/queryparser.lemony"
02248 {
02249     yygotominor.yy12 = yymsp[-1].minor.yy12;
02250     // If yymsp[0].minor.yy73 is a stopword, there's nothing to do here.
02251     if (yymsp[0].minor.yy73) add_to_query(yygotominor.yy12->query, state->default_op(), yymsp[0].minor.yy73);
02252 }
02253 #line 2255 "queryparser/queryparser_internal.cc"
02254         break;
02255       case 17:
02256 #line 1580 "queryparser/queryparser.lemony"
02257 {
02258     yygotominor.yy12 = new ProbQuery;
02259     if (state->default_op() == Query::OP_AND) {
02260         yygotominor.yy12->query = yymsp[0].minor.yy73;
02261     } else {
02262         yygotominor.yy12->love = yymsp[0].minor.yy73;
02263     }
02264   yy_destructor(8,&yymsp[-1].minor);
02265 }
02266 #line 2268 "queryparser/queryparser_internal.cc"
02267         break;
02268       case 18:
02269 #line 1589 "queryparser/queryparser.lemony"
02270 {
02271     yygotominor.yy12 = yymsp[-2].minor.yy12;
02272     if (state->default_op() == Query::OP_AND) {
02273         /* The default op is AND, so we just put loved terms into the query
02274          * (in this case the only effect of love is to ignore the stopword
02275          * list). */
02276         add_to_query(yygotominor.yy12->query, Query::OP_AND, yymsp[0].minor.yy73);
02277     } else {
02278         add_to_query(yygotominor.yy12->love, Query::OP_AND, yymsp[0].minor.yy73);
02279     }
02280   yy_destructor(8,&yymsp[-1].minor);
02281 }
02282 #line 2284 "queryparser/queryparser_internal.cc"
02283         break;
02284       case 19:
02285 #line 1601 "queryparser/queryparser.lemony"
02286 {
02287     yygotominor.yy12 = new ProbQuery;
02288     yygotominor.yy12->hate = yymsp[0].minor.yy73;
02289   yy_destructor(9,&yymsp[-1].minor);
02290 }
02291 #line 2293 "queryparser/queryparser_internal.cc"
02292         break;
02293       case 20:
02294 #line 1606 "queryparser/queryparser.lemony"
02295 {
02296     yygotominor.yy12 = yymsp[-2].minor.yy12;
02297     add_to_query(yygotominor.yy12->hate, Query::OP_OR, yymsp[0].minor.yy73);
02298   yy_destructor(9,&yymsp[-1].minor);
02299 }
02300 #line 2302 "queryparser/queryparser_internal.cc"
02301         break;
02302       case 21:
02303 #line 1611 "queryparser/queryparser.lemony"
02304 {
02305     yygotominor.yy12 = new ProbQuery;
02306     yygotominor.yy12->hate = new Query(yymsp[0].minor.yy0->get_query());
02307     delete yymsp[0].minor.yy0;
02308   yy_destructor(9,&yymsp[-1].minor);
02309 }
02310 #line 2312 "queryparser/queryparser_internal.cc"
02311         break;
02312       case 22:
02313 #line 1617 "queryparser/queryparser.lemony"
02314 {
02315     yygotominor.yy12 = yymsp[-2].minor.yy12;
02316     add_to_query(yygotominor.yy12->hate, Query::OP_OR, yymsp[0].minor.yy0->get_query());
02317     delete yymsp[0].minor.yy0;
02318   yy_destructor(9,&yymsp[-1].minor);
02319 }
02320 #line 2322 "queryparser/queryparser_internal.cc"
02321         break;
02322       case 23:
02323 #line 1623 "queryparser/queryparser.lemony"
02324 {
02325     yygotominor.yy12 = new ProbQuery;
02326     yygotominor.yy12->filter[yymsp[0].minor.yy0->get_filter_group_id()] = yymsp[0].minor.yy0->get_query();
02327     delete yymsp[0].minor.yy0;
02328 }
02329 #line 2331 "queryparser/queryparser_internal.cc"
02330         break;
02331       case 24:
02332 #line 1629 "queryparser/queryparser.lemony"
02333 {
02334     yygotominor.yy12 = yymsp[-1].minor.yy12;
02335     // We OR filters with the same prefix...
02336     Query & q = yygotominor.yy12->filter[yymsp[0].minor.yy0->get_filter_group_id()];
02337     q = Query(Query::OP_OR, q, yymsp[0].minor.yy0->get_query());
02338     delete yymsp[0].minor.yy0;
02339 }
02340 #line 2342 "queryparser/queryparser_internal.cc"
02341         break;
02342       case 25:
02343 #line 1637 "queryparser/queryparser.lemony"
02344 {
02345     // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
02346     yygotominor.yy12 = new ProbQuery;
02347     yygotominor.yy12->filter[yymsp[0].minor.yy0->get_filter_group_id()] = yymsp[0].minor.yy0->get_query();
02348     delete yymsp[0].minor.yy0;
02349   yy_destructor(8,&yymsp[-1].minor);
02350 }
02351 #line 2353 "queryparser/queryparser_internal.cc"
02352         break;
02353       case 26:
02354 #line 1644 "queryparser/queryparser.lemony"
02355 {
02356     // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
02357     yygotominor.yy12 = yymsp[-2].minor.yy12;
02358     // We OR filters with the same prefix...
02359     Query & q = yygotominor.yy12->filter[yymsp[0].minor.yy0->get_filter_group_id()];
02360     q = Query(Query::OP_OR, q, yymsp[0].minor.yy0->get_query());
02361     delete yymsp[0].minor.yy0;
02362   yy_destructor(8,&yymsp[-1].minor);
02363 }
02364 #line 2366 "queryparser/queryparser_internal.cc"
02365         break;
02366       case 27:
02367 #line 1659 "queryparser/queryparser.lemony"
02368 { yygotominor.yy12 = yymsp[0].minor.yy12; }
02369 #line 2371 "queryparser/queryparser_internal.cc"
02370         break;
02371       case 28:
02372 #line 1661 "queryparser/queryparser.lemony"
02373 {
02374     yygotominor.yy12 = new ProbQuery;
02375     yygotominor.yy12->query = yymsp[0].minor.yy73;
02376 }
02377 #line 2379 "queryparser/queryparser_internal.cc"
02378         break;
02379       case 29:
02380 #line 1675 "queryparser/queryparser.lemony"
02381 {
02382     if (state->is_stopword(yymsp[0].minor.yy0)) {
02383         yygotominor.yy73 = NULL;
02384         state->add_to_stoplist(yymsp[0].minor.yy0);
02385     } else {
02386         yygotominor.yy73 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
02387     }
02388     delete yymsp[0].minor.yy0;
02389 }
02390 #line 2392 "queryparser/queryparser_internal.cc"
02391         break;
02392       case 31:
02393 #line 1694 "queryparser/queryparser.lemony"
02394 {
02395     yygotominor.yy73 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
02396     delete yymsp[0].minor.yy0;
02397 }
02398 #line 2400 "queryparser/queryparser_internal.cc"
02399         break;
02400       case 33:
02401 #line 1711 "queryparser/queryparser.lemony"
02402 { yygotominor.yy73 = yymsp[0].minor.yy0->as_wildcarded_query(state); }
02403 #line 2405 "queryparser/queryparser_internal.cc"
02404         break;
02405       case 34:
02406 #line 1714 "queryparser/queryparser.lemony"
02407 { yygotominor.yy73 = yymsp[0].minor.yy0->as_partial_query(state); }
02408 #line 2410 "queryparser/queryparser_internal.cc"
02409         break;
02410       case 35:
02411 #line 1717 "queryparser/queryparser.lemony"
02412 { yygotominor.yy73 = yymsp[-1].minor.yy27->as_phrase_query();   yy_destructor(20,&yymsp[-2].minor);
02413   yy_destructor(20,&yymsp[0].minor);
02414 }
02415 #line 2417 "queryparser/queryparser_internal.cc"
02416         break;
02417       case 36:
02418 #line 1720 "queryparser/queryparser.lemony"
02419 { yygotominor.yy73 = yymsp[0].minor.yy27->as_phrase_query(); }
02420 #line 2422 "queryparser/queryparser_internal.cc"
02421         break;
02422       case 37:
02423 #line 1722 "queryparser/queryparser.lemony"
02424 {
02425     yygotominor.yy73 = yymsp[0].minor.yy76->as_group(state);
02426 }
02427 #line 2429 "queryparser/queryparser_internal.cc"
02428         break;
02429       case 38:
02430 #line 1727 "queryparser/queryparser.lemony"
02431 { yygotominor.yy73 = yymsp[0].minor.yy27->as_near_query(); }
02432 #line 2434 "queryparser/queryparser_internal.cc"
02433         break;
02434       case 39:
02435 #line 1730 "queryparser/queryparser.lemony"
02436 { yygotominor.yy73 = yymsp[0].minor.yy27->as_adj_query(); }
02437 #line 2439 "queryparser/queryparser_internal.cc"
02438         break;
02439       case 40:
02440 #line 1733 "queryparser/queryparser.lemony"
02441 { yygotominor.yy73 = yymsp[-1].minor.yy73;   yy_destructor(21,&yymsp[-2].minor);
02442   yy_destructor(22,&yymsp[0].minor);
02443 }
02444 #line 2446 "queryparser/queryparser_internal.cc"
02445         break;
02446       case 41:
02447 #line 1735 "queryparser/queryparser.lemony"
02448 {
02449     yygotominor.yy73 = new Query(yymsp[0].minor.yy0->get_query_with_synonyms());
02450     delete yymsp[0].minor.yy0;
02451   yy_destructor(11,&yymsp[-1].minor);
02452 }
02453 #line 2455 "queryparser/queryparser_internal.cc"
02454         break;
02455       case 42:
02456 #line 1746 "queryparser/queryparser.lemony"
02457 {
02458     yygotominor.yy27 = new TermList;
02459     yygotominor.yy27->add_positional_term(yymsp[0].minor.yy0);
02460 }
02461 #line 2463 "queryparser/queryparser_internal.cc"
02462         break;
02463       case 43:
02464       case 45:
02465 #line 1751 "queryparser/queryparser.lemony"
02466 {
02467     yygotominor.yy27 = yymsp[-1].minor.yy27;
02468     yygotominor.yy27->add_positional_term(yymsp[0].minor.yy0);
02469 }
02470 #line 2472 "queryparser/queryparser_internal.cc"
02471         break;
02472       case 44:
02473 #line 1763 "queryparser/queryparser.lemony"
02474 {
02475     yygotominor.yy27 = new TermList;
02476     yygotominor.yy27->add_positional_term(yymsp[-1].minor.yy0);
02477     yygotominor.yy27->add_positional_term(yymsp[0].minor.yy0);
02478 }
02479 #line 2481 "queryparser/queryparser_internal.cc"
02480         break;
02481       case 46:
02482 #line 1780 "queryparser/queryparser.lemony"
02483 {
02484     yygotominor.yy76 = new TermGroup;
02485     yygotominor.yy76->add_term(yymsp[-1].minor.yy0);
02486     yygotominor.yy76->add_term(yymsp[0].minor.yy0);
02487 }
02488 #line 2490 "queryparser/queryparser_internal.cc"
02489         break;
02490       case 47:
02491 #line 1786 "queryparser/queryparser.lemony"
02492 {
02493     yygotominor.yy76 = yymsp[-1].minor.yy76;
02494     yygotominor.yy76->add_term(yymsp[0].minor.yy0);
02495 }
02496 #line 2498 "queryparser/queryparser_internal.cc"
02497         break;
02498       case 48:
02499       case 50:
02500 #line 1797 "queryparser/queryparser.lemony"
02501 {
02502     yygotominor.yy27 = new TermList;
02503     yygotominor.yy27->add_positional_term(yymsp[-2].minor.yy0);
02504     yygotominor.yy27->add_positional_term(yymsp[0].minor.yy0);
02505     if (yymsp[-1].minor.yy0) {
02506         yygotominor.yy27->adjust_window(yymsp[-1].minor.yy0->get_termpos());
02507         delete yymsp[-1].minor.yy0;
02508     }
02509 }
02510 #line 2512 "queryparser/queryparser_internal.cc"
02511         break;
02512       case 49:
02513       case 51:
02514 #line 1807 "queryparser/queryparser.lemony"
02515 {
02516     yygotominor.yy27 = yymsp[-2].minor.yy27;
02517     yygotominor.yy27->add_positional_term(yymsp[0].minor.yy0);
02518     if (yymsp[-1].minor.yy0) {
02519         yygotominor.yy27->adjust_window(yymsp[-1].minor.yy0->get_termpos());
02520         delete yymsp[-1].minor.yy0;
02521     }
02522 }
02523 #line 2525 "queryparser/queryparser_internal.cc"
02524         break;
02525   }
02526   yygoto = yyRuleInfo[yyruleno].lhs;
02527   yysize = yyRuleInfo[yyruleno].nrhs;
02528   yypParser->yystack.resize(yypParser->yystack.size() - yysize);
02529   yyact = yy_find_reduce_action(yypParser,yygoto);
02530   if( yyact < YYNSTATE ){
02531     yy_shift(yypParser,yyact,yygoto,&yygotominor);
02532   }else if( yyact == YYNSTATE + YYNRULE + 1 ){
02533     yy_accept(yypParser);
02534   }
02535 }
02536 
02537 /*
02538 ** The following code executes when the parse fails
02539 */
02540 static void yy_parse_failed(
02541   yyParser *yypParser           /* The parser */
02542 ){
02543   ParseARG_FETCH;
02544   DEBUGLINE(QUERYPARSER, "Fail!");
02545   while( !yypParser->yystack.empty() ) yy_pop_parser_stack(yypParser);
02546   /* Here code is inserted which will be executed whenever the
02547   ** parser fails */
02548 #line 1380 "queryparser/queryparser.lemony"
02549 
02550     // If we've not already set an error message, set a default one.
02551     if (!state->error) state->error = "parse error";
02552 #line 2555 "queryparser/queryparser_internal.cc"
02553   ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
02554 }
02555 
02556 /*
02557 ** The following code executes when a syntax error first occurs.
02558 */
02559 static void yy_syntax_error(
02560   yyParser *yypParser,           /* The parser */
02561   int yymajor,                   /* The major type of the error token */
02562   YYMINORTYPE yyminor            /* The minor type of the error token */
02563 ){
02564   ParseARG_FETCH;
02565   (void)yymajor;
02566   (void)yyminor;
02567 #define TOKEN (yyminor.yy0)
02568   ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
02569 }
02570 
02571 /*
02572 ** The following is executed when the parser accepts
02573 */
02574 static void yy_accept(
02575   yyParser *yypParser           /* The parser */
02576 ){
02577   ParseARG_FETCH;
02578   DEBUGLINE(QUERYPARSER, "Accept!");
02579   while( !yypParser->yystack.empty() ) yy_pop_parser_stack(yypParser);
02580   /* Here code is inserted which will be executed whenever the
02581   ** parser accepts */
02582   ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
02583 }
02584 
02585 /* The main parser program.
02586 ** The first argument is a pointer to a structure obtained from
02587 ** "ParseAlloc" which describes the current state of the parser.
02588 ** The second argument is the major token number.  The third is
02589 ** the minor token.  The fourth optional argument is whatever the
02590 ** user wants (and specified in the grammar) and is available for
02591 ** use by the action routines.
02592 **
02593 ** Inputs:
02594 ** <ul>
02595 ** <li> A pointer to the parser (an opaque structure.)
02596 ** <li> The major token number.
02597 ** <li> The minor token number.
02598 ** <li> An option argument of a grammar-specified type.
02599 ** </ul>
02600 **
02601 ** Outputs:
02602 ** None.
02603 */
02604 static void Parse(
02605   yyParser *yypParser,         /* The parser */
02606   int yymajor,                 /* The major token code number */
02607   ParseTOKENTYPE yyminor       /* The value for the token */
02608   ParseARG_PDECL               /* Optional %extra_argument parameter */
02609 ){
02610   YYMINORTYPE yyminorunion;
02611   int yyact;            /* The parser action. */
02612   int yyendofinput;     /* True if we are at the end of input */
02613   int yyerrorhit = 0;   /* True if yymajor has invoked an error */
02614 
02615   /* (re)initialize the parser, if necessary */
02616   if( yypParser->yystack.empty() ){
02617     if( yymajor==0 ) return;
02618     yypParser->yystack.push_back(yyStackEntry());
02619     yypParser->yyerrcnt = -1;
02620   }
02621   yyminorunion.yy0 = yyminor;
02622   yyendofinput = (yymajor==0);
02623   ParseARG_STORE;
02624 
02625   DEBUGLINE(QUERYPARSER, "Input " << ParseTokenName(yymajor) << " " <<
02626             (yyminor ? yyminor->name : "<<null>>"));
02627 
02628   do{
02629     yyact = yy_find_shift_action(yypParser,yymajor);
02630     if( yyact<YYNSTATE ){
02631       yy_shift(yypParser,yyact,yymajor,&yyminorunion);
02632       yypParser->yyerrcnt--;
02633       if( yyendofinput && !yypParser->yystack.empty() ){
02634         yymajor = 0;
02635       }else{
02636         yymajor = YYNOCODE;
02637       }
02638     }else if( yyact < YYNSTATE + YYNRULE ){
02639       yy_reduce(yypParser,yyact-YYNSTATE);
02640     }else if( yyact == YY_ERROR_ACTION ){
02641       int yymx;
02642       DEBUGLINE(QUERYPARSER, "Syntax Error!");
02643 #ifdef YYERRORSYMBOL
02644       /* A syntax error has occurred.
02645       ** The response to an error depends upon whether or not the
02646       ** grammar defines an error token "ERROR".  
02647       **
02648       ** This is what we do if the grammar does define ERROR:
02649       **
02650       **  * Call the %syntax_error function.
02651       **
02652       **  * Begin popping the stack until we enter a state where
02653       **    it is legal to shift the error symbol, then shift
02654       **    the error symbol.
02655       **
02656       **  * Set the error count to three.
02657       **
02658       **  * Begin accepting and shifting new tokens.  No new error
02659       **    processing will occur until three tokens have been
02660       **    shifted successfully.
02661       **
02662       */
02663       if( yypParser->yyerrcnt<0 ){
02664         yy_syntax_error(yypParser,yymajor,yyminorunion);
02665       }
02666       yymx = yypParser->yystack.back().major;
02667       if( yymx==YYERRORSYMBOL || yyerrorhit ){
02668         DEBUGLINE(QUERYPARSER, "Discard input token " << ParseTokenName(yymajor));
02669         yy_destructor((YYCODETYPE)yymajor,&yyminorunion);
02670         yymajor = YYNOCODE;
02671       }else{
02672          while(
02673           !yypParser->yystack.empty() &&
02674           yymx != YYERRORSYMBOL &&
02675           (yyact = yy_find_shift_action(yypParser,YYERRORSYMBOL)) >= YYNSTATE
02676         ){
02677           yy_pop_parser_stack(yypParser);
02678         }
02679         if( yypParser->yystack.empty() || yymajor==0 ){
02680           yy_destructor((YYCODETYPE)yymajor,&yyminorunion);
02681           yy_parse_failed(yypParser);
02682           yymajor = YYNOCODE;
02683         }else if( yymx!=YYERRORSYMBOL ){
02684           YYMINORTYPE u2;
02685           u2.YYERRSYMDT = 0;
02686           yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2);
02687         }
02688       }
02689       yypParser->yyerrcnt = 3;
02690       yyerrorhit = 1;
02691 #else  /* YYERRORSYMBOL is not defined */
02692       /* This is what we do if the grammar does not define ERROR:
02693       **
02694       **  * Report an error message, and throw away the input token.
02695       **
02696       **  * If the input token is $, then fail the parse.
02697       **
02698       ** As before, subsequent error messages are suppressed until
02699       ** three input tokens have been successfully shifted.
02700       */
02701       if( yypParser->yyerrcnt<=0 ){
02702         yy_syntax_error(yypParser,yymajor,yyminorunion);
02703       }
02704       yypParser->yyerrcnt = 3;
02705       yy_destructor((YYCODETYPE)yymajor,&yyminorunion);
02706       if( yyendofinput ){
02707         yy_parse_failed(yypParser);
02708       }
02709       yymajor = YYNOCODE;
02710 #endif
02711     }else{
02712       yy_accept(yypParser);
02713       yymajor = YYNOCODE;
02714     }
02715   }while( yymajor!=YYNOCODE && !yypParser->yystack.empty() );
02716   return;
02717 }
02718 
02719 // Select C++ syntax highlighting in vim editor: vim: syntax=cpp

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.