xapian-core  1.4.31
queryparser_internal.cc
Go to the documentation of this file.
1 /*
2 ** 2000-05-29
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** Driver template for the LEMON parser generator.
13 **
14 ** Synced with upstream:
15 ** https://www.sqlite.org/src/artifact/468a155e8729cfbccfe1d85bf60d064f1dab76167a51149ec5c7928a2de63953
16 **
17 ** The "lemon" program processes an LALR(1) input grammar file, then uses
18 ** this template to construct a parser. The "lemon" program inserts text
19 ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
20 ** interstitial "-" characters) contained in this template is changed into
21 ** the value of the %name directive from the grammar. Otherwise, the content
22 ** of this template is copied straight through into the generate parser
23 ** source file.
24 **
25 ** The following is the concatenation of all %include directives from the
26 ** input grammar file:
27 */
28 /************ Begin %include sections from the grammar ************************/
29 #line 1 "queryparser/queryparser.lemony"
30 
34 /* Copyright (C) 2004-2023 Olly Betts
35  * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
36  * Copyright (C) 2010 Adam Sjøgren
37  *
38  * This program is free software; you can redistribute it and/or
39  * modify it under the terms of the GNU General Public License as
40  * published by the Free Software Foundation; either version 2 of the
41  * License, or (at your option) any later version.
42  *
43  * This program is distributed in the hope that it will be useful,
44  * but WITHOUT ANY WARRANTY; without even the implied warranty of
45  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46  * GNU General Public License for more details.
47  *
48  * You should have received a copy of the GNU General Public License
49  * along with this program; if not, write to the Free Software
50  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
51  * USA
52  */
53 
54 #include <config.h>
55 
56 #include "queryparser_internal.h"
57 
58 #include "api/queryinternal.h"
59 #include "omassert.h"
60 #include "str.h"
61 #include "stringutils.h"
62 #include "xapian/error.h"
63 #include "xapian/unicode.h"
64 
65 // Include the list of token values lemon generates.
66 #include "queryparser_token.h"
67 
68 #include "word-breaker.h"
69 
70 #include <algorithm>
71 #include <cstring>
72 #include <limits>
73 #include <list>
74 #include <string>
75 #include <vector>
76 
77 // We create the yyParser on the stack.
78 #define Parse_ENGINEALWAYSONSTACK
79 
80 using namespace std;
81 
82 using namespace Xapian;
83 
84 inline bool
85 U_isupper(unsigned ch) {
86  return ch < 128 && C_isupper(static_cast<unsigned char>(ch));
87 }
88 
89 inline bool
90 U_isdigit(unsigned ch) {
91  return ch < 128 && C_isdigit(static_cast<unsigned char>(ch));
92 }
93 
94 inline bool
95 U_isalpha(unsigned ch) {
96  return ch < 128 && C_isalpha(static_cast<unsigned char>(ch));
97 }
98 
100 
101 inline bool
102 is_not_whitespace(unsigned ch) {
103  return !is_whitespace(ch);
104 }
105 
107 
108 inline bool
109 is_not_wordchar(unsigned ch) {
110  return !is_wordchar(ch);
111 }
112 
113 inline bool
114 is_digit(unsigned ch) {
116 }
117 
118 // FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
119 // and there's the risk of hyphens getting stuck onto the end of terms...
120 inline bool
121 is_suffix(unsigned ch) {
122  return ch == '+' || ch == '#';
123 }
124 
125 inline bool
126 is_double_quote(unsigned ch) {
127  // We simply treat all double quotes as equivalent, which is a bit crude,
128  // but it isn't clear that it would actually better to require them to
129  // match up exactly.
130  //
131  // 0x201c is Unicode opening double quote.
132  // 0x201d is Unicode closing double quote.
133  return ch == '"' || ch == 0x201c || ch == 0x201d;
134 }
135 
136 inline bool
137 prefix_needs_colon(const string & prefix, unsigned ch)
138 {
139  if (!U_isupper(ch) && ch != ':') return false;
140  string::size_type len = prefix.length();
141  return (len > 1 && prefix[len - 1] != ':');
142 }
143 
145 
146 inline bool
148 {
149  return (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR);
150 }
151 
152 class Terms;
153 
160 class Term {
162 
163  public:
164  string name;
166  string unstemmed;
170 
171  Term(const string &name_, termpos pos_)
172  : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
173  explicit Term(const string &name_)
174  : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
175  Term(const string &name_, const FieldInfo * field_info_)
176  : name(name_), field_info(field_info_),
177  stem(QueryParser::STEM_NONE), pos(0) { }
178  explicit Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
179  Term(State * state_, const string &name_, const FieldInfo * field_info_,
180  const string &unstemmed_,
181  QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
182  termpos pos_ = 0)
183  : state(state_), name(name_), field_info(field_info_),
184  unstemmed(unstemmed_), stem(stem_), pos(pos_) { }
185  // For RANGE tokens.
186  Term(const Xapian::Query & q, const string & grouping)
187  : name(grouping), query(q) { }
188 
189  string make_term(const string & prefix) const;
190 
191  void need_positions() {
192  if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
193  }
194 
195  termpos get_termpos() const { return pos; }
196 
197  string get_grouping() const {
198  return field_info->grouping;
199  }
200 
201  Query * as_wildcarded_query(State * state) const;
202 
211  Query * as_partial_query(State * state_) const;
212 
214  Query* as_unbroken_query() const;
215 
217  void as_positional_unbroken(Terms* terms) const;
218 
220  Query as_range_query() const;
221 
222  Query get_query() const;
223 
224  Query get_query_with_synonyms() const;
225 
226  Query get_query_with_auto_synonyms() const;
227 };
228 
230 class State {
232 
233  public:
235  const char* error = NULL;
236  unsigned flags;
238 
239  State(QueryParser::Internal * qpi_, unsigned flags_)
240  : qpi(qpi_), flags(flags_), effective_default_op(qpi_->default_op)
241  {
242  if ((flags & QueryParser::FLAG_NO_POSITIONS)) {
243  if (is_positional(effective_default_op)) {
244  effective_default_op = Query::OP_AND;
245  }
246  }
247  }
248 
249  string stem_term(const string &term) {
250  return qpi->stemmer(term);
251  }
252 
253  void add_to_stoplist(const Term * term) {
254  qpi->stoplist.push_back(term->name);
255  }
256 
257  void add_to_unstem(const string & term, const string & unstemmed) {
258  qpi->unstem.insert(make_pair(term, unstemmed));
259  }
260 
261  Term * range(const string &a, const string &b) {
262  for (auto i : qpi->rangeprocs) {
263  Xapian::Query range_query = (i.proc)->check_range(a, b);
264  Xapian::Query::op op = range_query.get_type();
265  switch (op) {
267  break;
271  if (i.default_grouping) {
273  static_cast<Xapian::Internal::QueryValueBase*>(
274  range_query.internal.get());
275  Xapian::valueno slot = base->get_slot();
276  return new Term(range_query, str(slot));
277  }
278  // FALLTHRU
280  return new Term(range_query, i.grouping);
281  default:
282  return new Term(range_query, string());
283  }
284  }
285  return NULL;
286  }
287 
289  return effective_default_op;
290  }
291 
292  bool is_stopword(const Term *term) const {
293  return qpi->stopper.get() && (*qpi->stopper)(term->name);
294  }
295 
297  return qpi->db;
298  }
299 
300  const Stopper * get_stopper() const {
301  return qpi->stopper.get();
302  }
303 
304  size_t stoplist_size() const {
305  return qpi->stoplist.size();
306  }
307 
308  void stoplist_resize(size_t s) {
309  qpi->stoplist.resize(s);
310  }
311 
313  return qpi->max_wildcard_expansion;
314  }
315 
316  int get_max_wildcard_type() const {
317  return qpi->max_wildcard_type;
318  }
319 
321  return qpi->max_partial_expansion;
322  }
323 
324  int get_max_partial_type() const {
325  return qpi->max_partial_type;
326  }
327 };
328 
329 string
330 Term::make_term(const string & prefix) const
331 {
332  string term;
333  if (stem != QueryParser::STEM_NONE && stem != QueryParser::STEM_ALL)
334  term += 'Z';
335  if (!prefix.empty()) {
336  term += prefix;
337  if (prefix_needs_colon(prefix, name[0])) term += ':';
338  }
339  if (stem != QueryParser::STEM_NONE) {
340  term += state->stem_term(name);
341  } else {
342  term += name;
343  }
344 
345  if (!unstemmed.empty())
346  state->add_to_unstem(term, unstemmed);
347  return term;
348 }
349 
350 // Iterator shim to allow building a synonym query from a TermIterator pair.
353 
355 
357 
358  public:
360  Xapian::termpos pos_ = 0,
361  const Xapian::Query * first_ = NULL)
362  : i(i_), pos(pos_), first(first_) { }
363 
365  if (first)
366  first = NULL;
367  else
368  ++i;
369  return *this;
370  }
371 
372  const Xapian::Query operator*() const {
373  if (first) return *first;
374  return Xapian::Query(*i, 1, pos);
375  }
376 
377  bool operator==(const SynonymIterator & o) const {
378  return i == o.i && first == o.first;
379  }
380 
381  bool operator!=(const SynonymIterator & o) const {
382  return !(*this == o);
383  }
384 
385  typedef std::input_iterator_tag iterator_category;
390 };
391 
392 Query
394 {
395  // Handle single-word synonyms with each prefix.
396  const auto& prefixes = field_info->prefixes;
397  if (prefixes.empty()) {
398  Assert(field_info->proc.get());
399  return (*field_info->proc)(name);
400  }
401 
402  Query q = get_query();
403 
404  for (auto&& prefix : prefixes) {
405  // First try the unstemmed term:
406  string term;
407  if (!prefix.empty()) {
408  term += prefix;
409  if (prefix_needs_colon(prefix, name[0])) term += ':';
410  }
411  term += name;
412 
413  Xapian::Database db = state->get_database();
414  Xapian::TermIterator syn = db.synonyms_begin(term);
415  Xapian::TermIterator end = db.synonyms_end(term);
416  if (syn == end && stem != QueryParser::STEM_NONE) {
417  // If that has no synonyms, try the stemmed form:
418  term = 'Z';
419  if (!prefix.empty()) {
420  term += prefix;
421  if (prefix_needs_colon(prefix, name[0])) term += ':';
422  }
423  term += state->stem_term(name);
424  syn = db.synonyms_begin(term);
425  end = db.synonyms_end(term);
426  }
427  q = Query(q.OP_SYNONYM,
428  SynonymIterator(syn, pos, &q),
429  SynonymIterator(end));
430  }
431  return q;
432 }
433 
434 Query
436 {
437  const unsigned MASK_ENABLE_AUTO_SYNONYMS =
438  QueryParser::FLAG_AUTO_SYNONYMS |
439  QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS;
440  if (state->flags & MASK_ENABLE_AUTO_SYNONYMS)
441  return get_query_with_synonyms();
442 
443  return get_query();
444 }
445 
446 static void
448 {
449  Assert(term);
450  if (q) {
451  if (op == Query::OP_OR) {
452  *q |= *term;
453  } else if (op == Query::OP_AND) {
454  *q &= *term;
455  } else {
456  *q = Query(op, *q, *term);
457  }
458  delete term;
459  } else {
460  q = term;
461  }
462 }
463 
464 static void
465 add_to_query(Query *& q, Query::op op, const Query & term)
466 {
467  if (q) {
468  if (op == Query::OP_OR) {
469  *q |= term;
470  } else if (op == Query::OP_AND) {
471  *q &= term;
472  } else {
473  *q = Query(op, *q, term);
474  }
475  } else {
476  q = new Query(term);
477  }
478 }
479 
480 Query
482 {
483  const auto& prefixes = field_info->prefixes;
484  if (prefixes.empty()) {
485  Assert(field_info->proc.get());
486  return (*field_info->proc)(name);
487  }
488  auto piter = prefixes.begin();
489  Query q(make_term(*piter), 1, pos);
490  while (++piter != prefixes.end()) {
491  q |= Query(make_term(*piter), 1, pos);
492  }
493  return q;
494 }
495 
496 Query *
498 {
499  const auto& prefixes = field_info->prefixes;
501  int max_type = state_->get_max_wildcard_type();
502  vector<Query> subqs;
503  subqs.reserve(prefixes.size());
504  for (string root : prefixes) {
505  root += name;
506  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
507  subqs.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
508  Query::OP_OR));
509  }
510  Query * q = new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
511  delete this;
512  return q;
513 }
514 
515 Query *
517 {
519  int max_type = state_->get_max_partial_type();
520  vector<Query> subqs_partial; // A synonym of all the partial terms.
521  vector<Query> subqs_full; // A synonym of all the full terms.
522 
523  for (const string& prefix : field_info->prefixes) {
524  string root = prefix;
525  root += name;
526  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
527  subqs_partial.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
528  Query::OP_OR));
529  if (!state->is_stopword(this)) {
530  // Add the term, as it would normally be handled, as an alternative
531  // (unless it is a stopword).
532  subqs_full.push_back(Query(make_term(prefix), 1, pos));
533  }
534  }
535  Query * q = new Query(Query::OP_OR,
536  Query(Query::OP_SYNONYM,
537  subqs_partial.begin(), subqs_partial.end()),
538  Query(Query::OP_SYNONYM,
539  subqs_full.begin(), subqs_full.end()));
540  delete this;
541  return q;
542 }
543 
544 Query *
546 {
547  vector<Query> prefix_subqs;
548  vector<Query> ngram_subqs;
549  const auto& prefixes = field_info->prefixes;
550  for (const string& prefix : prefixes) {
551  for (NgramIterator tk(name); tk != NgramIterator(); ++tk) {
552  ngram_subqs.push_back(Query(prefix + *tk, 1, pos));
553  }
554  prefix_subqs.push_back(Query(Query::OP_AND,
555  ngram_subqs.begin(), ngram_subqs.end()));
556  ngram_subqs.clear();
557  }
558  Query * q = new Query(Query::OP_OR,
559  prefix_subqs.begin(), prefix_subqs.end());
560  delete this;
561  return q;
562 }
563 
564 Query
566 {
567  Query q = query;
568  delete this;
569  return q;
570 }
571 
572 inline bool
574 {
575  // These characters generate a phrase search.
576  // Ordered mostly by frequency of calls to this function done when
577  // running the testcases in api_queryparser.cc.
578  return (ch && ch < 128 && strchr(".-/:\\@", ch) != NULL);
579 }
580 
581 inline bool
582 is_stem_preventer(unsigned ch)
583 {
584  return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
585 }
586 
587 inline bool
588 should_stem(const string & term)
589 {
590  const unsigned int SHOULD_STEM_MASK =
593  (1 << Unicode::MODIFIER_LETTER) |
594  (1 << Unicode::OTHER_LETTER);
595  Utf8Iterator u(term);
596  return ((SHOULD_STEM_MASK >> Unicode::get_category(*u)) & 1);
597 }
598 
602 const unsigned UNICODE_IGNORE = numeric_limits<unsigned>::max();
603 
604 inline unsigned check_infix(unsigned ch) {
605  if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
606  // Unicode includes all these except '&' in its word boundary rules,
607  // as well as 0x2019 (which we handle below) and ':' (for Swedish
608  // apparently, but we ignore this for now as it's problematic in
609  // real world cases:
610  // https://en.wikipedia.org/wiki/Colon_(punctuation)#Usage_in_other_languages
611  // ).
612  return ch;
613  }
614  if (ch >= 0x200b) {
615  // 0x2019 is Unicode apostrophe and single closing quote.
616  // 0x201b is Unicode single opening quote with the tail rising.
617  if (ch == 0x2019 || ch == 0x201b)
618  return '\'';
619  if (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff)
620  return UNICODE_IGNORE;
621  }
622  return 0;
623 }
624 
625 inline unsigned check_infix_digit(unsigned ch) {
626  // This list of characters comes from Unicode's word identifying algorithm.
627  switch (ch) {
628  case ',':
629  case '.':
630  case ';':
631  case 0x037e: // GREEK QUESTION MARK
632  case 0x0589: // ARMENIAN FULL STOP
633  case 0x060D: // ARABIC DATE SEPARATOR
634  case 0x07F8: // NKO COMMA
635  case 0x2044: // FRACTION SLASH
636  case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
637  case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
638  case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
639  return ch;
640  }
641  if (ch >= 0x200b && (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff))
642  return UNICODE_IGNORE;
643  return 0;
644 }
645 
646 // Prototype a function lemon generates, but which we want to call before that
647 // in the generated source code file.
648 struct yyParser;
649 static void yy_parse_failed(yyParser *);
650 
651 void
652 QueryParser::Internal::add_prefix(const string &field, const string &prefix)
653 {
654  map<string, FieldInfo>::iterator p = field_map.find(field);
655  if (p == field_map.end()) {
656  field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, prefix)));
657  } else {
658  // Check that this is the same type of filter as the existing one(s).
659  if (p->second.type != NON_BOOLEAN) {
660  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter");
661  }
662  if (p->second.proc.get())
663  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
664  p->second.prefixes.push_back(prefix);
665  }
666 }
667 
668 void
669 QueryParser::Internal::add_prefix(const string &field, FieldProcessor *proc)
670 {
671  map<string, FieldInfo>::iterator p = field_map.find(field);
672  if (p == field_map.end()) {
673  field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, proc)));
674  } else {
675  // Check that this is the same type of filter as the existing one(s).
676  if (p->second.type != NON_BOOLEAN) {
677  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter");
678  }
679  if (!p->second.prefixes.empty())
680  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
681  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported");
682  }
683 }
684 
685 void
686 QueryParser::Internal::add_boolean_prefix(const string &field,
687  const string &prefix,
688  const string* grouping)
689 {
690  // Don't allow the empty prefix to be set as boolean as it doesn't
691  // really make sense.
692  if (field.empty())
693  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
694  if (!grouping) grouping = &field;
695  filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE;
696  map<string, FieldInfo>::iterator p = field_map.find(field);
697  if (p == field_map.end()) {
698  field_map.insert(make_pair(field, FieldInfo(type, prefix, *grouping)));
699  } else {
700  // Check that this is the same type of filter as the existing one(s).
701  if (p->second.type != type) {
702  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME
703  }
704  if (p->second.proc.get())
705  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
706  p->second.prefixes.push_back(prefix); // FIXME grouping
707  }
708 }
709 
710 void
711 QueryParser::Internal::add_boolean_prefix(const string &field,
712  FieldProcessor *proc,
713  const string* grouping)
714 {
715  // Don't allow the empty prefix to be set as boolean as it doesn't
716  // really make sense.
717  if (field.empty())
718  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
719  if (!grouping) grouping = &field;
720  filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE;
721  map<string, FieldInfo>::iterator p = field_map.find(field);
722  if (p == field_map.end()) {
723  field_map.insert(make_pair(field, FieldInfo(type, proc, *grouping)));
724  } else {
725  // Check that this is the same type of filter as the existing one(s).
726  if (p->second.type != type) {
727  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME
728  }
729  if (!p->second.prefixes.empty())
730  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
731  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported");
732  }
733 }
734 
735 string
736 QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end,
737  bool try_word_break, bool& needs_word_break,
738  bool &was_acronym)
739 {
740  string term;
741  // Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
742  // Don't worry if there's a trailing '.' or not.
743  if (U_isupper(*it)) {
744  string t;
745  Utf8Iterator p = it;
746  do {
747  Unicode::append_utf8(t, *p++);
748  } while (p != end && *p == '.' && ++p != end && U_isupper(*p));
749  // One letter does not make an acronym! If we handled a single
750  // uppercase letter here, we wouldn't catch M&S below.
751  if (t.length() > 1) {
752  // Check there's not a (lower case) letter or digit
753  // immediately after it.
754  // FIXME: should I.B.M..P.T.O be a range search?
755  if (p == end || !is_wordchar(*p)) {
756  it = p;
757  swap(term, t);
758  }
759  }
760  }
761  was_acronym = !term.empty();
762 
763  if (try_word_break && term.empty() && is_unbroken_script(*it)) {
764  const char* start = it.raw();
765  get_unbroken(it);
766  term.assign(start, it.raw() - start);
767  needs_word_break = true;
768  }
769 
770  if (term.empty()) {
771  unsigned prevch = *it;
772  Unicode::append_utf8(term, prevch);
773  while (++it != end) {
774  if (try_word_break && is_unbroken_script(*it)) break;
775  unsigned ch = *it;
776  if (!is_wordchar(ch)) {
777  // Treat a single embedded '&' or "'" or similar as a word
778  // character (e.g. AT&T, Fred's). Also, normalise
779  // apostrophes to ASCII apostrophe.
780  Utf8Iterator p = it;
781  ++p;
782  if (p == end || !is_wordchar(*p)) break;
783  unsigned nextch = *p;
784  if (is_digit(prevch) && is_digit(nextch)) {
785  ch = check_infix_digit(ch);
786  } else {
787  ch = check_infix(ch);
788  }
789  if (!ch) break;
790  if (ch == UNICODE_IGNORE)
791  continue;
792  }
793  Unicode::append_utf8(term, ch);
794  prevch = ch;
795  }
796  if (it != end && is_suffix(*it)) {
797  string suff_term = term;
798  Utf8Iterator p = it;
799  // Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
800  do {
801  if (suff_term.size() - term.size() == 3) {
802  suff_term.resize(0);
803  break;
804  }
805  suff_term += *p;
806  } while (is_suffix(*++p));
807  if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
808  // If the suffixed term doesn't exist, check that the
809  // non-suffixed term does. This also takes care of
810  // the case when QueryParser::set_database() hasn't
811  // been called.
812  bool use_suff_term = false;
813  string lc = Unicode::tolower(suff_term);
814  if (db.term_exists(lc)) {
815  use_suff_term = true;
816  } else {
817  lc = Unicode::tolower(term);
818  if (!db.term_exists(lc)) use_suff_term = true;
819  }
820  if (use_suff_term) {
821  term = suff_term;
822  it = p;
823  }
824  }
825  }
826  }
827  return term;
828 }
829 
830 #line 1421 "queryparser/queryparser.lemony"
831 
832 
833 struct ProbQuery {
834  Query* query = NULL;
835  Query* love = NULL;
836  Query* hate = NULL;
837  // filter is a map from prefix to a query for that prefix. Queries with
838  // the same prefix are combined with OR, and the results of this are
839  // combined with AND to get the full filter.
840  map<string, Query> filter;
841 
843 
844  explicit
845  ProbQuery(Query* query_) : query(query_) {}
846 
848  delete query;
849  delete love;
850  delete hate;
851  }
852 
853  void add_filter(const string& grouping, const Query & q) {
854  filter[grouping] = q;
855  }
856 
857  void append_filter(const string& grouping, const Query & qnew) {
858  auto it = filter.find(grouping);
859  if (it == filter.end()) {
860  filter.insert(make_pair(grouping, qnew));
861  } else {
862  Query & q = it->second;
863  // We OR multiple filters with the same prefix if they're
864  // exclusive, otherwise we AND them.
865  bool exclusive = !grouping.empty();
866  if (exclusive) {
867  q |= qnew;
868  } else {
869  q &= qnew;
870  }
871  }
872  }
873 
874  void add_filter_range(const string& grouping, const Query & range) {
875  filter[grouping] = range;
876  }
877 
878  void append_filter_range(const string& grouping, const Query & range) {
879  Query & q = filter[grouping];
880  q |= range;
881  }
882 
884  auto i = filter.begin();
885  Assert(i != filter.end());
886  Query q = i->second;
887  while (++i != filter.end()) {
888  q &= i->second;
889  }
890  return q;
891  }
892 };
893 
895 class TermGroup {
896  vector<Term *> terms;
897 
903  bool empty_ok;
904 
905  TermGroup(Term* t1, Term* t2) : empty_ok(false) {
906  add_term(t1);
907  add_term(t2);
908  }
909 
910  public:
912  static TermGroup* create(Term* t1, Term* t2) {
913  return new TermGroup(t1, t2);
914  }
915 
917  for (auto&& t : terms) {
918  delete t;
919  }
920  }
921 
923  void add_term(Term * term) {
924  terms.push_back(term);
925  }
926 
928  void set_empty_ok() { empty_ok = true; }
929 
931  Query * as_group(State *state) const;
932 };
933 
934 Query *
936 {
937  const Xapian::Stopper * stopper = state->get_stopper();
938  size_t stoplist_size = state->stoplist_size();
939  bool default_op_is_positional = is_positional(state->default_op());
940 reprocess:
941  Query::op default_op = state->default_op();
942  vector<Query> subqs;
943  subqs.reserve(terms.size());
944  if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
945  // Check for multi-word synonyms.
946  Database db = state->get_database();
947 
948  string key;
949  vector<Term*>::size_type begin = 0;
950  vector<Term*>::size_type i = begin;
951  while (terms.size() - i > 0) {
952  size_t longest_match = 0;
953  // This value is never used, but GCC 4.8 warns with
954  // -Wmaybe-uninitialized (GCC 5.4 doesn't).
955  vector<Term*>::size_type longest_match_end = 0;
956  if (terms.size() - i >= 2) {
957  // Greedily try to match as many consecutive words as possible.
958  key = terms[i]->name;
959  key += ' ';
960  key += terms[i + 1]->name;
961  TermIterator synkey(db.synonym_keys_begin(key));
962  TermIterator synend(db.synonym_keys_end(key));
963  if (synkey != synend) {
964  longest_match = key.size();
965  longest_match_end = i + 2;
966  for (auto j = i + 2; j < terms.size(); ++j) {
967  key += ' ';
968  key += terms[j]->name;
969  synkey.skip_to(key);
970  if (synkey == synend)
971  break;
972  const string& found = *synkey;
973  if (!startswith(found, key))
974  break;
975  if (found.size() == key.size()) {
976  longest_match = key.size();
977  longest_match_end = j + 1;
978  }
979  }
980  }
981  }
982  if (longest_match == 0) {
983  // No multi-synonym matches at position i.
984  if (stopper && (*stopper)(terms[i]->name)) {
985  state->add_to_stoplist(terms[i]);
986  } else {
987  if (default_op_is_positional)
988  terms[i]->need_positions();
989  subqs.push_back(terms[i]->get_query_with_auto_synonyms());
990  }
991  begin = ++i;
992  continue;
993  }
994  i = longest_match_end;
995  key.resize(longest_match);
996 
997  vector<Query> subqs2;
998  for (auto j = begin; j != i; ++j) {
999  if (stopper && (*stopper)(terms[j]->name)) {
1000  state->add_to_stoplist(terms[j]);
1001  } else {
1002  if (default_op_is_positional)
1003  terms[i]->need_positions();
1004  subqs2.push_back(terms[j]->get_query());
1005  }
1006  }
1007  Query q_original_terms;
1008  if (default_op_is_positional) {
1009  q_original_terms = Query(default_op,
1010  subqs2.begin(), subqs2.end(),
1011  subqs2.size() + 9);
1012  } else {
1013  q_original_terms = Query(default_op,
1014  subqs2.begin(), subqs2.end());
1015  }
1016  subqs2.clear();
1017 
1018  // Use the position of the first term for the synonyms.
1019  TermIterator syn = db.synonyms_begin(key);
1020  Query q(Query::OP_SYNONYM,
1021  SynonymIterator(syn, terms[begin]->pos, &q_original_terms),
1022  SynonymIterator(db.synonyms_end(key)));
1023  subqs.push_back(q);
1024 
1025  begin = i;
1026  }
1027  } else {
1028  vector<Term*>::const_iterator i;
1029  for (i = terms.begin(); i != terms.end(); ++i) {
1030  if (stopper && (*stopper)((*i)->name)) {
1031  state->add_to_stoplist(*i);
1032  } else {
1033  if (default_op_is_positional)
1034  (*i)->need_positions();
1035  subqs.push_back((*i)->get_query_with_auto_synonyms());
1036  }
1037  }
1038  }
1039 
1040  if (!empty_ok && stopper && subqs.empty() &&
1041  stoplist_size < state->stoplist_size()) {
1042  // This group is all stopwords, so roll-back, disable stopper
1043  // temporarily, and reprocess this group.
1044  state->stoplist_resize(stoplist_size);
1045  stopper = NULL;
1046  goto reprocess;
1047  }
1048 
1049  Query * q = NULL;
1050  if (!subqs.empty()) {
1051  if (default_op_is_positional) {
1052  q = new Query(default_op, subqs.begin(), subqs.end(),
1053  subqs.size() + 9);
1054  } else {
1055  q = new Query(default_op, subqs.begin(), subqs.end());
1056  }
1057  }
1058  delete this;
1059  return q;
1060 }
1061 
1063 class Terms {
1064  vector<Term *> terms;
1065 
1071  size_t window;
1072 
1083 
1087  const vector<string>* prefixes;
1088 
1090  const vector<Query>& v,
1091  Xapian::termcount w) const {
1092  if (op == Query::OP_AND) {
1093  return Query(op, v.begin(), v.end());
1094  }
1095  return Query(op, v.begin(), v.end(), w);
1096  }
1097 
1100  if (window == size_t(-1)) op = Query::OP_AND;
1101  Query * q = NULL;
1102  size_t n_terms = terms.size();
1103  Xapian::termcount w = w_delta + terms.size();
1104  if (uniform_prefixes) {
1105  if (prefixes) {
1106  for (auto&& prefix : *prefixes) {
1107  vector<Query> subqs;
1108  subqs.reserve(n_terms);
1109  for (Term* t : terms) {
1110  subqs.push_back(Query(t->make_term(prefix), 1, t->pos));
1111  }
1112  add_to_query(q, Query::OP_OR, opwindow_subq(op, subqs, w));
1113  }
1114  }
1115  } else {
1116  vector<Query> subqs;
1117  subqs.reserve(n_terms);
1118  for (Term* t : terms) {
1119  subqs.push_back(t->get_query());
1120  }
1121  q = new Query(opwindow_subq(op, subqs, w));
1122  }
1123 
1124  delete this;
1125  return q;
1126  }
1127 
1128  explicit Terms(bool no_pos)
1129  : window(no_pos ? size_t(-1) : 0),
1130  uniform_prefixes(true),
1131  prefixes(NULL) { }
1132 
1133  public:
1135  static Terms* create(State* state) {
1136  return new Terms(state->flags & QueryParser::FLAG_NO_POSITIONS);
1137  }
1138 
1140  for (auto&& t : terms) {
1141  delete t;
1142  }
1143  }
1144 
1146  void add_positional_term(Term * term) {
1147  const auto& term_prefixes = term->field_info->prefixes;
1148  if (terms.empty()) {
1149  prefixes = &term_prefixes;
1150  } else if (uniform_prefixes && prefixes != &term_prefixes) {
1151  if (*prefixes != term_prefixes) {
1152  prefixes = NULL;
1153  uniform_prefixes = false;
1154  }
1155  }
1156  term->need_positions();
1157  terms.push_back(term);
1158  }
1159 
1160  void adjust_window(size_t alternative_window) {
1161  if (alternative_window > window) window = alternative_window;
1162  }
1163 
1166  return as_opwindow_query(Query::OP_PHRASE, 0);
1167  }
1168 
1170  Query * as_near_query() const {
1171  // The common meaning of 'a NEAR b' is "a within 10 terms of b", which
1172  // means a window size of 11. For more than 2 terms, we just add one
1173  // to the window size for each extra term.
1174  size_t w = window;
1175  if (w == 0) w = 10;
1176  return as_opwindow_query(Query::OP_NEAR, w - 1);
1177  }
1178 
1180  Query * as_adj_query() const {
1181  // The common meaning of 'a ADJ b' is "a at most 10 terms before b",
1182  // which means a window size of 11. For more than 2 terms, we just add
1183  // one to the window size for each extra term.
1184  size_t w = window;
1185  if (w == 0) w = 10;
1186  return as_opwindow_query(Query::OP_PHRASE, w - 1);
1187  }
1188 };
1189 
1190 void
1192 {
1193  // Add each individual character to the phrase.
1194  string t;
1195  for (Utf8Iterator it(name); it != Utf8Iterator(); ++it) {
1196  Unicode::append_utf8(t, *it);
1197  Term * c = new Term(state, t, field_info, unstemmed, stem, pos);
1198  terms->add_positional_term(c);
1199  t.resize(0);
1200  }
1201 
1202  // FIXME: we want to add the n-grams as filters too for efficiency.
1203 
1204  delete this;
1205 }
1206 
1207 // Helper macro to check for missing arguments to a boolean operator.
1208 #define VET_BOOL_ARGS(A, B, OP_TXT) \
1209  do {\
1210  if (!A || !B) {\
1211  state->error = "Syntax: <expression> " OP_TXT " <expression>";\
1212  yy_parse_failed(yypParser);\
1213  return;\
1214  }\
1215  } while (0)
1216 
1217 #line 1218 "queryparser/queryparser_internal.cc"
1218 /**************** End of %include directives **********************************/
1219 /* These constants specify the various numeric values for terminal symbols
1220 ** in a format understandable to "makeheaders". This section is blank unless
1221 ** "lemon" is run with the "-m" command-line option.
1222 ***************** Begin makeheaders token definitions *************************/
1223 /**************** End makeheaders token definitions ***************************/
1224 
1225 /* The next section is a series of control #defines.
1226 ** various aspects of the generated parser.
1227 ** YYCODETYPE is the data type used to store the integer codes
1228 ** that represent terminal and non-terminal symbols.
1229 ** "unsigned char" is used if there are fewer than
1230 ** 256 symbols. Larger types otherwise.
1231 ** YYNOCODE is a number of type YYCODETYPE that is not used for
1232 ** any terminal or nonterminal symbol.
1233 ** YYFALLBACK If defined, this indicates that one or more tokens
1234 ** (also known as: "terminal symbols") have fall-back
1235 ** values which should be used if the original symbol
1236 ** would not parse. This permits keywords to sometimes
1237 ** be used as identifiers, for example.
1238 ** YYACTIONTYPE is the data type used for "action codes" - numbers
1239 ** that indicate what to do in response to the next
1240 ** token.
1241 ** ParseTOKENTYPE is the data type used for minor type for terminal
1242 ** symbols. Background: A "minor type" is a semantic
1243 ** value associated with a terminal or non-terminal
1244 ** symbols. For example, for an "ID" terminal symbol,
1245 ** the minor type might be the name of the identifier.
1246 ** Each non-terminal can have a different minor type.
1247 ** Terminal symbols all have the same minor type, though.
1248 ** This macros defines the minor type for terminal
1249 ** symbols.
1250 ** YYMINORTYPE is the data type used for all minor types.
1251 ** This is typically a union of many types, one of
1252 ** which is ParseTOKENTYPE. The entry in the union
1253 ** for terminal symbols is called "yy0".
1254 ** YYSTACKDEPTH is the maximum depth of the parser's stack. If
1255 ** zero the stack is dynamically sized using realloc()
1256 ** ParseARG_SDECL A static variable declaration for the %extra_argument
1257 ** ParseARG_PDECL A parameter declaration for the %extra_argument
1258 ** ParseARG_STORE Code to store %extra_argument into yypParser
1259 ** ParseARG_FETCH Code to extract %extra_argument from yypParser
1260 ** YYERRORSYMBOL is the code number of the error symbol. If not
1261 ** defined, then do no error processing.
1262 ** YYNSTATE the combined number of states.
1263 ** YYNRULE the number of rules in the grammar
1264 ** YYNTOKEN Number of terminal symbols
1265 ** YY_MAX_SHIFT Maximum value for shift actions
1266 ** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1267 ** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1268 ** YY_ERROR_ACTION The yy_action[] code for syntax error
1269 ** YY_ACCEPT_ACTION The yy_action[] code for accept
1270 ** YY_NO_ACTION The yy_action[] code for no-op
1271 ** YY_MIN_REDUCE Minimum value for reduce actions
1272 ** YY_MAX_REDUCE Maximum value for reduce actions
1273 */
1274 #ifndef INTERFACE
1275 # define INTERFACE 1
1276 #endif
1277 /************* Begin control #defines *****************************************/
1278 #define YYCODETYPE unsigned char
1279 #define YYNOCODE 40
1280 #define YYACTIONTYPE unsigned char
1281 #define ParseTOKENTYPE Term *
1282 typedef union {
1283  int yyinit;
1289  int yy46;
1290 } YYMINORTYPE;
1291 #ifndef YYSTACKDEPTH
1292 #define YYSTACKDEPTH 100
1293 #endif
1294 #define ParseARG_SDECL State * state;
1295 #define ParseARG_PDECL ,State * state
1296 #define ParseARG_FETCH State * state = yypParser->state
1297 #define ParseARG_STORE yypParser->state = state
1298 #define YYNSTATE 35
1299 #define YYNRULE 56
1300 #define YYNTOKEN 24
1301 #define YY_MAX_SHIFT 34
1302 #define YY_MIN_SHIFTREDUCE 77
1303 #define YY_MAX_SHIFTREDUCE 132
1304 #define YY_ERROR_ACTION 133
1305 #define YY_ACCEPT_ACTION 134
1306 #define YY_NO_ACTION 135
1307 #define YY_MIN_REDUCE 136
1308 #define YY_MAX_REDUCE 191
1309 /************* End control #defines *******************************************/
1310 
1311 /* Define the yytestcase() macro to be a no-op if is not already defined
1312 ** otherwise.
1313 **
1314 ** Applications can choose to define yytestcase() in the %include section
1315 ** to a macro that can assist in verifying code coverage. For production
1316 ** code the yytestcase() macro should be turned off. But it is useful
1317 ** for testing.
1318 */
1319 #ifndef yytestcase
1320 # define yytestcase(X)
1321 #endif
1322 
1323 
1324 /* Next are the tables used to determine what action to take based on the
1325 ** current state and lookahead token. These tables are used to implement
1326 ** functions that take a state number and lookahead value and return an
1327 ** action integer.
1328 **
1329 ** Suppose the action integer is N. Then the action is determined as
1330 ** follows
1331 **
1332 ** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
1333 ** token onto the stack and goto state N.
1334 **
1335 ** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
1336 ** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
1337 **
1338 ** N == YY_ERROR_ACTION A syntax error has occurred.
1339 **
1340 ** N == YY_ACCEPT_ACTION The parser accepts its input.
1341 **
1342 ** N == YY_NO_ACTION No such action. Denotes unused
1343 ** slots in the yy_action[] table.
1344 **
1345 ** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
1346 ** and YY_MAX_REDUCE
1347 **
1348 ** The action table is constructed as a single large table named yy_action[].
1349 ** Given state S and lookahead X, the action is computed as either:
1350 **
1351 ** (A) N = yy_action[ yy_shift_ofst[S] + X ]
1352 ** (B) N = yy_default[S]
1353 **
1354 ** The (A) formula is preferred. The B formula is used instead if
1355 ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
1356 **
1357 ** The formulas above are for computing the action when the lookahead is
1358 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after
1359 ** a reduce action) then the yy_reduce_ofst[] array is used in place of
1360 ** the yy_shift_ofst[] array.
1361 **
1362 ** The following are the tables generated in this section:
1363 **
1364 ** yy_action[] A single table containing all actions.
1365 ** yy_lookahead[] A table containing the lookahead for each entry in
1366 ** yy_action. Used to detect hash collisions.
1367 ** yy_shift_ofst[] For each state, the offset into yy_action for
1368 ** shifting terminals.
1369 ** yy_reduce_ofst[] For each state, the offset into yy_action for
1370 ** shifting non-terminals after a reduce.
1371 ** yy_default[] Default action for each state.
1372 **
1373 *********** Begin parsing tables **********************************************/
1374 #define YY_ACTTAB_COUNT (326)
1375 static const YYACTIONTYPE yy_action[] = {
1376  /* 0 */ 134, 34, 34, 20, 8, 34, 18, 13, 16, 27,
1377  /* 10 */ 31, 23, 30, 28, 3, 21, 112, 10, 9, 2,
1378  /* 20 */ 25, 15, 111, 114, 104, 105, 97, 87, 14, 4,
1379  /* 30 */ 137, 113, 126, 115, 12, 11, 1, 7, 10, 9,
1380  /* 40 */ 124, 25, 15, 98, 88, 104, 105, 97, 87, 14,
1381  /* 50 */ 4, 29, 113, 138, 138, 138, 8, 138, 18, 13,
1382  /* 60 */ 16, 119, 31, 23, 30, 28, 141, 141, 141, 8,
1383  /* 70 */ 141, 18, 13, 16, 125, 31, 23, 30, 28, 140,
1384  /* 80 */ 140, 140, 8, 140, 18, 13, 16, 123, 31, 23,
1385  /* 90 */ 30, 28, 26, 26, 20, 8, 26, 18, 13, 16,
1386  /* 100 */ 136, 31, 23, 30, 28, 24, 24, 24, 8, 24,
1387  /* 110 */ 18, 13, 16, 135, 31, 23, 30, 28, 22, 22,
1388  /* 120 */ 22, 8, 22, 18, 13, 16, 135, 31, 23, 30,
1389  /* 130 */ 28, 139, 139, 139, 8, 139, 18, 13, 16, 121,
1390  /* 140 */ 31, 23, 30, 28, 10, 9, 135, 25, 15, 122,
1391  /* 150 */ 135, 104, 105, 97, 87, 14, 4, 135, 113, 135,
1392  /* 160 */ 189, 189, 135, 25, 19, 135, 135, 104, 105, 189,
1393  /* 170 */ 189, 14, 4, 162, 113, 162, 162, 162, 162, 33,
1394  /* 180 */ 32, 33, 32, 116, 135, 135, 120, 118, 120, 118,
1395  /* 190 */ 106, 25, 17, 117, 162, 104, 105, 95, 135, 14,
1396  /* 200 */ 4, 135, 113, 25, 17, 135, 135, 104, 105, 99,
1397  /* 210 */ 135, 14, 4, 135, 113, 25, 17, 135, 135, 104,
1398  /* 220 */ 105, 96, 135, 14, 4, 135, 113, 25, 17, 135,
1399  /* 230 */ 135, 104, 105, 100, 135, 14, 4, 135, 113, 25,
1400  /* 240 */ 19, 135, 135, 104, 105, 135, 135, 14, 4, 135,
1401  /* 250 */ 113, 135, 149, 149, 135, 31, 23, 30, 28, 152,
1402  /* 260 */ 135, 135, 152, 135, 31, 23, 30, 28, 135, 150,
1403  /* 270 */ 135, 135, 150, 135, 31, 23, 30, 28, 153, 135,
1404  /* 280 */ 135, 153, 135, 31, 23, 30, 28, 151, 135, 135,
1405  /* 290 */ 151, 135, 31, 23, 30, 28, 135, 148, 148, 135,
1406  /* 300 */ 31, 23, 30, 28, 191, 135, 191, 191, 191, 191,
1407  /* 310 */ 6, 5, 1, 7, 5, 1, 7, 135, 135, 135,
1408  /* 320 */ 135, 135, 135, 135, 135, 191,
1409 };
1410 static const YYCODETYPE yy_lookahead[] = {
1411  /* 0 */ 25, 26, 27, 28, 29, 30, 31, 32, 33, 7,
1412  /* 10 */ 35, 36, 37, 38, 5, 34, 12, 8, 9, 10,
1413  /* 20 */ 11, 12, 21, 12, 15, 16, 17, 18, 19, 20,
1414  /* 30 */ 0, 22, 12, 22, 8, 9, 4, 5, 8, 9,
1415  /* 40 */ 12, 11, 12, 17, 18, 15, 16, 17, 18, 19,
1416  /* 50 */ 20, 6, 22, 26, 27, 28, 29, 30, 31, 32,
1417  /* 60 */ 33, 14, 35, 36, 37, 38, 26, 27, 28, 29,
1418  /* 70 */ 30, 31, 32, 33, 12, 35, 36, 37, 38, 26,
1419  /* 80 */ 27, 28, 29, 30, 31, 32, 33, 12, 35, 36,
1420  /* 90 */ 37, 38, 26, 27, 28, 29, 30, 31, 32, 33,
1421  /* 100 */ 0, 35, 36, 37, 38, 26, 27, 28, 29, 30,
1422  /* 110 */ 31, 32, 33, 39, 35, 36, 37, 38, 26, 27,
1423  /* 120 */ 28, 29, 30, 31, 32, 33, 39, 35, 36, 37,
1424  /* 130 */ 38, 26, 27, 28, 29, 30, 31, 32, 33, 13,
1425  /* 140 */ 35, 36, 37, 38, 8, 9, 39, 11, 12, 23,
1426  /* 150 */ 39, 15, 16, 17, 18, 19, 20, 39, 22, 39,
1427  /* 160 */ 8, 9, 39, 11, 12, 39, 39, 15, 16, 17,
1428  /* 170 */ 18, 19, 20, 0, 22, 2, 3, 4, 5, 6,
1429  /* 180 */ 7, 6, 7, 12, 39, 39, 13, 14, 13, 14,
1430  /* 190 */ 19, 11, 12, 22, 21, 15, 16, 17, 39, 19,
1431  /* 200 */ 20, 39, 22, 11, 12, 39, 39, 15, 16, 17,
1432  /* 210 */ 39, 19, 20, 39, 22, 11, 12, 39, 39, 15,
1433  /* 220 */ 16, 17, 39, 19, 20, 39, 22, 11, 12, 39,
1434  /* 230 */ 39, 15, 16, 17, 39, 19, 20, 39, 22, 11,
1435  /* 240 */ 12, 39, 39, 15, 16, 39, 39, 19, 20, 39,
1436  /* 250 */ 22, 39, 32, 33, 39, 35, 36, 37, 38, 30,
1437  /* 260 */ 39, 39, 33, 39, 35, 36, 37, 38, 39, 30,
1438  /* 270 */ 39, 39, 33, 39, 35, 36, 37, 38, 30, 39,
1439  /* 280 */ 39, 33, 39, 35, 36, 37, 38, 30, 39, 39,
1440  /* 290 */ 33, 39, 35, 36, 37, 38, 39, 32, 33, 39,
1441  /* 300 */ 35, 36, 37, 38, 0, 39, 2, 3, 4, 5,
1442  /* 310 */ 2, 3, 4, 5, 3, 4, 5, 39, 39, 39,
1443  /* 320 */ 39, 39, 39, 39, 39, 21, 39, 39, 39, 39,
1444  /* 330 */ 39, 39, 39, 39, 39, 39, 39, 39, 39,
1445 };
1446 #define YY_SHIFT_COUNT (34)
1447 #define YY_SHIFT_MIN (0)
1448 #define YY_SHIFT_MAX (311)
1449 static const unsigned short int yy_shift_ofst[] = {
1450  /* 0 */ 30, 9, 136, 136, 136, 136, 136, 136, 152, 180,
1451  /* 10 */ 192, 204, 216, 228, 11, 173, 304, 175, 26, 175,
1452  /* 20 */ 308, 171, 311, 126, 32, 4, 1, 20, 2, 28,
1453  /* 30 */ 45, 47, 62, 75, 100,
1454 };
1455 #define YY_REDUCE_COUNT (14)
1456 #define YY_REDUCE_MIN (-25)
1457 #define YY_REDUCE_MAX (265)
1458 static const short yy_reduce_ofst[] = {
1459  /* 0 */ -25, 27, 40, 53, 66, 79, 92, 105, 220, 229,
1460  /* 10 */ 239, 248, 257, 265, -19,
1461 };
1462 static const YYACTIONTYPE yy_default[] = {
1463  /* 0 */ 144, 144, 144, 144, 144, 144, 144, 144, 145, 133,
1464  /* 10 */ 133, 133, 133, 160, 133, 161, 190, 162, 133, 161,
1465  /* 20 */ 133, 133, 142, 167, 143, 133, 187, 133, 169, 133,
1466  /* 30 */ 168, 166, 133, 133, 187,
1467 };
1468 /********** End of lemon-generated parsing tables *****************************/
1469 
1470 /* The next table maps tokens (terminal symbols) into fallback tokens.
1471 ** If a construct like the following:
1472 **
1473 ** %fallback ID X Y Z.
1474 **
1475 ** appears in the grammar, then ID becomes a fallback token for X, Y,
1476 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
1477 ** but it does not parse, the type of the token is changed to ID and
1478 ** the parse is retried before an error is thrown.
1479 **
1480 ** This feature can be used, for example, to cause some keywords in a language
1481 ** to revert to identifiers if they keyword does not apply in the context where
1482 ** it appears.
1483 */
1484 #ifdef YYFALLBACK
1485 static const YYCODETYPE yyFallback[] = {
1486 };
1487 #endif /* YYFALLBACK */
1488 
1489 /* The following structure represents a single element of the
1490 ** parser's stack. Information stored includes:
1491 **
1492 ** + The state number for the parser at this level of the stack.
1493 **
1494 ** + The value of the token stored at this level of the stack.
1495 ** (In other words, the "major" token.)
1496 **
1497 ** + The semantic value stored at this level of the stack. This is
1498 ** the information used by the action routines in the grammar.
1499 ** It is sometimes called the "minor" token.
1500 **
1501 ** After the "shift" half of a SHIFTREDUCE action, the stateno field
1502 ** actually contains the reduce action for the second half of the
1503 ** SHIFTREDUCE.
1504 */
1507  stateno = 0;
1508  major = 0;
1509  }
1511  stateno = stateno_;
1512  major = major_;
1513  minor.yy0 = minor_;
1514  }
1515  YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
1516  YYCODETYPE major; /* The major token value. This is the code
1517  ** number for the token at this stack level */
1518  YYMINORTYPE minor; /* The user-supplied minor token value. This
1519  ** is the value of the token */
1520 };
1521 
1522 static void ParseInit(yyParser *pParser);
1523 static void ParseFinalize(yyParser *pParser);
1524 
1525 /* The state of the parser is completely contained in an instance of
1526 ** the following structure */
1527 struct yyParser {
1528 #ifdef YYTRACKMAXSTACKDEPTH
1529  int yyhwm; /* High-water mark of the stack */
1530 #endif
1531 #ifndef YYNOERRORRECOVERY
1532  int yyerrcnt; /* Shifts left before out of the error */
1533 #endif
1534  ParseARG_SDECL /* A place to hold %extra_argument */
1535  vector<yyStackEntry> yystack; /* The parser's stack */
1537  ParseInit(this);
1538  }
1540  ParseFinalize(this);
1541  }
1542 };
1543 typedef struct yyParser yyParser;
1544 
1545 #include "omassert.h"
1546 #include "debuglog.h"
1547 
1548 #if defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG)
1549 /* For tracing shifts, the names of all terminals and nonterminals
1550 ** are required. The following table supplies these names */
1551 static const char *const yyTokenName[] = {
1552  /* 0 */ "$",
1553  /* 1 */ "ERROR",
1554  /* 2 */ "OR",
1555  /* 3 */ "XOR",
1556  /* 4 */ "AND",
1557  /* 5 */ "NOT",
1558  /* 6 */ "NEAR",
1559  /* 7 */ "ADJ",
1560  /* 8 */ "LOVE",
1561  /* 9 */ "HATE",
1562  /* 10 */ "HATE_AFTER_AND",
1563  /* 11 */ "SYNONYM",
1564  /* 12 */ "TERM",
1565  /* 13 */ "GROUP_TERM",
1566  /* 14 */ "PHR_TERM",
1567  /* 15 */ "WILD_TERM",
1568  /* 16 */ "PARTIAL_TERM",
1569  /* 17 */ "BOOLEAN_FILTER",
1570  /* 18 */ "RANGE",
1571  /* 19 */ "QUOTE",
1572  /* 20 */ "BRA",
1573  /* 21 */ "KET",
1574  /* 22 */ "UNBROKEN_WORDS",
1575  /* 23 */ "EMPTY_GROUP_OK",
1576  /* 24 */ "error",
1577  /* 25 */ "query",
1578  /* 26 */ "expr",
1579  /* 27 */ "prob_expr",
1580  /* 28 */ "bool_arg",
1581  /* 29 */ "prob",
1582  /* 30 */ "term",
1583  /* 31 */ "stop_prob",
1584  /* 32 */ "stop_term",
1585  /* 33 */ "compound_term",
1586  /* 34 */ "phrase",
1587  /* 35 */ "phrased_term",
1588  /* 36 */ "group",
1589  /* 37 */ "near_expr",
1590  /* 38 */ "adj_expr",
1591 };
1592 
1593 /* For tracing reduce actions, the names of all rules are required.
1594 */
1595 static const char *const yyRuleName[] = {
1596  /* 0 */ "query ::= expr",
1597  /* 1 */ "query ::=",
1598  /* 2 */ "expr ::= bool_arg AND bool_arg",
1599  /* 3 */ "expr ::= bool_arg NOT bool_arg",
1600  /* 4 */ "expr ::= bool_arg AND NOT bool_arg",
1601  /* 5 */ "expr ::= bool_arg AND HATE_AFTER_AND bool_arg",
1602  /* 6 */ "expr ::= bool_arg OR bool_arg",
1603  /* 7 */ "expr ::= bool_arg XOR bool_arg",
1604  /* 8 */ "bool_arg ::=",
1605  /* 9 */ "prob_expr ::= prob",
1606  /* 10 */ "prob ::= RANGE",
1607  /* 11 */ "prob ::= stop_prob RANGE",
1608  /* 12 */ "prob ::= stop_term stop_term",
1609  /* 13 */ "prob ::= prob stop_term",
1610  /* 14 */ "prob ::= LOVE term",
1611  /* 15 */ "prob ::= stop_prob LOVE term",
1612  /* 16 */ "prob ::= HATE term",
1613  /* 17 */ "prob ::= stop_prob HATE term",
1614  /* 18 */ "prob ::= HATE BOOLEAN_FILTER",
1615  /* 19 */ "prob ::= stop_prob HATE BOOLEAN_FILTER",
1616  /* 20 */ "prob ::= BOOLEAN_FILTER",
1617  /* 21 */ "prob ::= stop_prob BOOLEAN_FILTER",
1618  /* 22 */ "prob ::= LOVE BOOLEAN_FILTER",
1619  /* 23 */ "prob ::= stop_prob LOVE BOOLEAN_FILTER",
1620  /* 24 */ "stop_prob ::= stop_term",
1621  /* 25 */ "stop_term ::= TERM",
1622  /* 26 */ "term ::= TERM",
1623  /* 27 */ "compound_term ::= WILD_TERM",
1624  /* 28 */ "compound_term ::= PARTIAL_TERM",
1625  /* 29 */ "compound_term ::= QUOTE phrase QUOTE",
1626  /* 30 */ "compound_term ::= phrased_term",
1627  /* 31 */ "compound_term ::= group",
1628  /* 32 */ "compound_term ::= near_expr",
1629  /* 33 */ "compound_term ::= adj_expr",
1630  /* 34 */ "compound_term ::= BRA expr KET",
1631  /* 35 */ "compound_term ::= SYNONYM TERM",
1632  /* 36 */ "compound_term ::= UNBROKEN_WORDS",
1633  /* 37 */ "phrase ::= TERM",
1634  /* 38 */ "phrase ::= UNBROKEN_WORDS",
1635  /* 39 */ "phrase ::= phrase TERM",
1636  /* 40 */ "phrase ::= phrase UNBROKEN_WORDS",
1637  /* 41 */ "phrased_term ::= TERM PHR_TERM",
1638  /* 42 */ "phrased_term ::= phrased_term PHR_TERM",
1639  /* 43 */ "group ::= TERM GROUP_TERM",
1640  /* 44 */ "group ::= group GROUP_TERM",
1641  /* 45 */ "group ::= group EMPTY_GROUP_OK",
1642  /* 46 */ "near_expr ::= TERM NEAR TERM",
1643  /* 47 */ "near_expr ::= near_expr NEAR TERM",
1644  /* 48 */ "adj_expr ::= TERM ADJ TERM",
1645  /* 49 */ "adj_expr ::= adj_expr ADJ TERM",
1646  /* 50 */ "expr ::= prob_expr",
1647  /* 51 */ "bool_arg ::= expr",
1648  /* 52 */ "prob_expr ::= term",
1649  /* 53 */ "stop_prob ::= prob",
1650  /* 54 */ "stop_term ::= compound_term",
1651  /* 55 */ "term ::= compound_term",
1652 };
1653 
1654 /*
1655 ** This function returns the symbolic name associated with a token
1656 ** value.
1657 */
1658 static const char *ParseTokenName(int tokenType){
1659  if( tokenType>=0 && tokenType<(int)(sizeof(yyTokenName)/sizeof(yyTokenName[0])) ){
1660  return yyTokenName[tokenType];
1661  }
1662  return "Unknown";
1663 }
1664 
1665 /*
1666 ** This function returns the symbolic name associated with a rule
1667 ** value.
1668 */
1669 static const char *ParseRuleName(int ruleNum){
1670  if( ruleNum>=0 && ruleNum<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
1671  return yyRuleName[ruleNum];
1672  }
1673  return "Unknown";
1674 }
1675 #endif /* defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG) */
1676 
1677 /* Datatype of the argument to the memory allocated passed as the
1678 ** second argument to ParseAlloc() below. This can be changed by
1679 ** putting an appropriate #define in the %include section of the input
1680 ** grammar.
1681 */
1682 #ifndef YYMALLOCARGTYPE
1683 # define YYMALLOCARGTYPE size_t
1684 #endif
1685 
1686 /* Initialize a new parser that has already been allocated.
1687 */
1688 static
1689 void ParseInit(yyParser *pParser){
1690 #ifdef YYTRACKMAXSTACKDEPTH
1691  pParser->yyhwm = 0;
1692 #endif
1693 #if 0
1694 #if YYSTACKDEPTH<=0
1695  pParser->yytos = NULL;
1696  pParser->yystack = NULL;
1697  pParser->yystksz = 0;
1698  if( yyGrowStack(pParser) ){
1699  pParser->yystack = &pParser->yystk0;
1700  pParser->yystksz = 1;
1701  }
1702 #endif
1703 #endif
1704 #ifndef YYNOERRORRECOVERY
1705  pParser->yyerrcnt = -1;
1706 #endif
1707 #if 0
1708  pParser->yytos = pParser->yystack;
1709  pParser->yystack[0].stateno = 0;
1710  pParser->yystack[0].major = 0;
1711 #if YYSTACKDEPTH>0
1712  pParser->yystackEnd = &pParser->yystack[YYSTACKDEPTH-1];
1713 #endif
1714 #else
1715  pParser->yystack.push_back(yyStackEntry());
1716 #endif
1717 }
1718 
1719 #ifndef Parse_ENGINEALWAYSONSTACK
1720 /*
1721 ** This function allocates a new parser.
1722 **
1723 ** Inputs:
1724 ** None.
1725 **
1726 ** Outputs:
1727 ** A pointer to a parser. This pointer is used in subsequent calls
1728 ** to Parse and ParseFree.
1729 */
1730 static yyParser *ParseAlloc(void){
1731  return new yyParser;
1732 }
1733 #endif /* Parse_ENGINEALWAYSONSTACK */
1734 
1735 
1736 /* The following function deletes the "minor type" or semantic value
1737 ** associated with a symbol. The symbol can be either a terminal
1738 ** or nonterminal. "yymajor" is the symbol code, and "yypminor" is
1739 ** a pointer to the value to be deleted. The code used to do the
1740 ** deletions is derived from the %destructor and/or %token_destructor
1741 ** directives of the input grammar.
1742 */
1743 static void yy_destructor(
1744  yyParser *yypParser, /* The parser */
1745  YYCODETYPE yymajor, /* Type code for object to destroy */
1746  YYMINORTYPE *yypminor /* The object to be destroyed */
1747 ){
1749  switch( yymajor ){
1750  /* Here is inserted the actions which take place when a
1751  ** terminal or non-terminal is destroyed. This can happen
1752  ** when the symbol is popped from the stack during a
1753  ** reduce or during error processing or when a parser is
1754  ** being destroyed before it is finished parsing.
1755  **
1756  ** Note: during a reduce, the only symbols destroyed are those
1757  ** which appear on the RHS of the rule, but which are *not* used
1758  ** inside the C code.
1759  */
1760 /********* Begin destructor definitions ***************************************/
1761  /* TERMINAL Destructor */
1762  case 1: /* ERROR */
1763  case 2: /* OR */
1764  case 3: /* XOR */
1765  case 4: /* AND */
1766  case 5: /* NOT */
1767  case 6: /* NEAR */
1768  case 7: /* ADJ */
1769  case 8: /* LOVE */
1770  case 9: /* HATE */
1771  case 10: /* HATE_AFTER_AND */
1772  case 11: /* SYNONYM */
1773  case 12: /* TERM */
1774  case 13: /* GROUP_TERM */
1775  case 14: /* PHR_TERM */
1776  case 15: /* WILD_TERM */
1777  case 16: /* PARTIAL_TERM */
1778  case 17: /* BOOLEAN_FILTER */
1779  case 18: /* RANGE */
1780  case 19: /* QUOTE */
1781  case 20: /* BRA */
1782  case 21: /* KET */
1783  case 22: /* UNBROKEN_WORDS */
1784  case 23: /* EMPTY_GROUP_OK */
1785 {
1786 #line 1810 "queryparser/queryparser.lemony"
1787  delete (yypminor->yy0);
1788 #line 1789 "queryparser/queryparser_internal.cc"
1789 }
1790  break;
1791  case 26: /* expr */
1792  case 27: /* prob_expr */
1793  case 28: /* bool_arg */
1794  case 30: /* term */
1795  case 32: /* stop_term */
1796  case 33: /* compound_term */
1797 {
1798 #line 1885 "queryparser/queryparser.lemony"
1799  delete (yypminor->yy39);
1800 #line 1801 "queryparser/queryparser_internal.cc"
1801 }
1802  break;
1803  case 29: /* prob */
1804  case 31: /* stop_prob */
1805 {
1806 #line 1994 "queryparser/queryparser.lemony"
1807  delete (yypminor->yy40);
1808 #line 1809 "queryparser/queryparser_internal.cc"
1809 }
1810  break;
1811  case 34: /* phrase */
1812  case 35: /* phrased_term */
1813  case 37: /* near_expr */
1814  case 38: /* adj_expr */
1815 {
1816 #line 2185 "queryparser/queryparser.lemony"
1817  delete (yypminor->yy32);
1818 #line 1819 "queryparser/queryparser_internal.cc"
1819 }
1820  break;
1821  case 36: /* group */
1822 {
1823 #line 2226 "queryparser/queryparser.lemony"
1824  delete (yypminor->yy14);
1825 #line 1826 "queryparser/queryparser_internal.cc"
1826 }
1827  break;
1828 /********* End destructor definitions *****************************************/
1829  default: break; /* If no destructor action specified: do nothing */
1830  }
1831  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
1832 }
1833 
1834 /*
1835 ** Pop the parser's stack once.
1836 **
1837 ** If there is a destructor routine associated with the token which
1838 ** is popped from the stack, then call it.
1839 */
1840 static void yy_pop_parser_stack(yyParser *pParser){
1841  Assert( pParser->yystack.size() > 1 );
1842  yyStackEntry *yytos = &pParser->yystack.back();
1843 
1844  LOGLINE(QUERYPARSER, "Popping " << ParseTokenName(yytos->major));
1845  yy_destructor(pParser, yytos->major, &yytos->minor);
1846  pParser->yystack.pop_back();
1847 }
1848 
1849 /*
1850 ** Clear all secondary memory allocations from the parser
1851 */
1852 static
1853 void ParseFinalize(yyParser *pParser){
1854  while( pParser->yystack.size() > 1 ) yy_pop_parser_stack(pParser);
1855 }
1856 
1857 #ifndef Parse_ENGINEALWAYSONSTACK
1858 /*
1859 ** Deallocate and destroy a parser. Destructors are called for
1860 ** all stack elements before shutting the parser down.
1861 **
1862 ** If the YYPARSEFREENEVERNULL macro exists (for example because it
1863 ** is defined in a %include section of the input grammar) then it is
1864 ** assumed that the input pointer is never NULL.
1865 */
1866 static
1867 void ParseFree(
1868  yyParser *pParser /* The parser to be deleted */
1869 ){
1870  delete pParser;
1871 }
1872 #endif /* Parse_ENGINEALWAYSONSTACK */
1873 
1874 /*
1875 ** Return the peak depth of the stack for a parser.
1876 */
1877 #ifdef YYTRACKMAXSTACKDEPTH
1878 int ParseStackPeak(yyParser *pParser){
1879  return pParser->yyhwm;
1880 }
1881 #endif
1882 
1883 /* This array of booleans keeps track of the parser statement
1884 ** coverage. The element yycoverage[X][Y] is set when the parser
1885 ** is in state X and has a lookahead token Y. In a well-tested
1886 ** systems, every element of this matrix should end up being set.
1887 */
1888 #if defined(YYCOVERAGE)
1889 static unsigned char yycoverage[YYNSTATE][YYNTOKEN];
1890 #endif
1891 
1892 /*
1893 ** Write into out a description of every state/lookahead combination that
1894 **
1895 ** (1) has not been used by the parser, and
1896 ** (2) is not a syntax error.
1897 **
1898 ** Return the number of missed state/lookahead combinations.
1899 */
1900 #if defined(YYCOVERAGE)
1901 int ParseCoverage(FILE *out){
1902  int stateno, iLookAhead, i;
1903  int nMissed = 0;
1904  for(stateno=0; stateno<YYNSTATE; stateno++){
1905  i = yy_shift_ofst[stateno];
1906  for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
1907  if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
1908  if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
1909  if( out ){
1910  fprintf(out,"State %d lookahead %s %s\n", stateno,
1911  yyTokenName[iLookAhead],
1912  yycoverage[stateno][iLookAhead] ? "ok" : "missed");
1913  }
1914  }
1915  }
1916  return nMissed;
1917 }
1918 #endif
1919 
1920 /*
1921 ** Find the appropriate action for a parser given the terminal
1922 ** look-ahead token iLookAhead.
1923 */
1924 static unsigned int yy_find_shift_action(
1925  yyParser *pParser, /* The parser */
1926  YYCODETYPE iLookAhead /* The look-ahead token */
1927 ){
1928  int i;
1929  int stateno = pParser->yystack.back().stateno;
1930 
1931  if( stateno>YY_MAX_SHIFT ) return stateno;
1932  Assert( stateno <= YY_SHIFT_COUNT );
1933 #if defined(YYCOVERAGE)
1934  yycoverage[stateno][iLookAhead] = 1;
1935 #endif
1936  do{
1937  i = yy_shift_ofst[stateno];
1938  Assert( i>=0 );
1939  Assert( i+YYNTOKEN<=(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) );
1940  Assert( iLookAhead!=YYNOCODE );
1941  Assert( iLookAhead < YYNTOKEN );
1942  i += iLookAhead;
1943  if( yy_lookahead[i]!=iLookAhead ){
1944 #ifdef YYFALLBACK
1945  YYCODETYPE iFallback; /* Fallback token */
1946  if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
1947  && (iFallback = yyFallback[iLookAhead])!=0 ){
1948  LOGLINE(QUERYPARSER,
1949  "FALLBACK " << ParseTokenName(iLookAhead) << " => " <<
1950  ParseTokenName(iFallback));
1951  Assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
1952  iLookAhead = iFallback;
1953  continue;
1954  }
1955 #endif
1956 #ifdef YYWILDCARD
1957  {
1958  int j = i - iLookAhead + YYWILDCARD;
1959  if(
1960 #if YY_SHIFT_MIN+YYWILDCARD<0
1961  j>=0 &&
1962 #endif
1963 #if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT
1964  j<YY_ACTTAB_COUNT &&
1965 #endif
1966  yy_lookahead[j]==YYWILDCARD && iLookAhead>0
1967  ){
1968  LOGLINE(QUERYPARSER,
1969  "WILDCARD " << ParseTokenName(iLookAhead) << " => " <<
1970  ParseTokenName(YYWILDCARD));
1971  return yy_action[j];
1972  }
1973  }
1974 #endif /* YYWILDCARD */
1975  return yy_default[stateno];
1976  }else{
1977  return yy_action[i];
1978  }
1979  }while(1);
1980 }
1981 
1982 /*
1983 ** Find the appropriate action for a parser given the non-terminal
1984 ** look-ahead token iLookAhead.
1985 */
1987  int stateno, /* Current state number */
1988  YYCODETYPE iLookAhead /* The look-ahead token */
1989 ){
1990  int i;
1991 #ifdef YYERRORSYMBOL
1992  if( stateno>YY_REDUCE_COUNT ){
1993  return yy_default[stateno];
1994  }
1995 #else
1996  Assert( stateno<=YY_REDUCE_COUNT );
1997 #endif
1998  i = yy_reduce_ofst[stateno];
1999  Assert( iLookAhead!=YYNOCODE );
2000  i += iLookAhead;
2001 #ifdef YYERRORSYMBOL
2002  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
2003  return yy_default[stateno];
2004  }
2005 #else
2006  Assert( i>=0 && i<YY_ACTTAB_COUNT );
2007  Assert( yy_lookahead[i]==iLookAhead );
2008 #endif
2009  return yy_action[i];
2010 }
2011 
2012 /*
2013 ** The following routine is called if the stack overflows.
2014 ** In Xapian this can never happen as we use std::vector to provide a stack
2015 ** of indefinite size.
2016 */
2017 #if 0
2018 static void yyStackOverflow(yyParser *yypParser){
2020  yypParser->yyidx--;
2021 #ifndef NDEBUG
2022  if( yyTraceFILE ){
2023  fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
2024  }
2025 #endif
2026  while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
2027  /* Here code is inserted which will execute if the parser
2028  ** stack ever overflows */
2029 /******** Begin %stack_overflow code ******************************************/
2030 /******** End %stack_overflow code ********************************************/
2031  ParseARG_STORE; /* Suppress warning about unused %extra_argument var */
2032 }
2033 #endif
2034 
2035 /*
2036 ** Print tracing information for a SHIFT action
2037 */
2038 #ifdef XAPIAN_DEBUG_LOG
2039 static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){
2040  if( yyNewState<YYNSTATE ){
2041  LOGLINE(QUERYPARSER, zTag << " '" <<
2042  yyTokenName[yypParser->yystack.back().major] <<
2043  "', go to state " << yyNewState);
2044  }else{
2045  LOGLINE(QUERYPARSER, zTag << " '" <<
2046  yyTokenName[yypParser->yystack.back().major] <<
2047  "', pending reduce " << yyNewState - YY_MIN_REDUCE);
2048  }
2049 }
2050 #else
2051 # define yyTraceShift(X,Y,Z)
2052 #endif
2053 
2054 /*
2055 ** Perform a shift action.
2056 */
2057 static void yy_shift(
2058  yyParser *yypParser, /* The parser to be shifted */
2059  int yyNewState, /* The new state to shift in */
2060  int yyMajor, /* The major token to shift in */
2061  ParseTOKENTYPE yyMinor /* The minor token to shift in */
2062 ){
2063  if( yyNewState > YY_MAX_SHIFT ){
2064  yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
2065  }
2066  yypParser->yystack.push_back(yyStackEntry(yyNewState, yyMajor, yyMinor));
2067 #ifdef YYTRACKMAXSTACKDEPTH
2068  if( (int)(yypParser->yystack.size()>yypParser->yyhwm ){
2069  yypParser->yyhwm++;
2070  Assert( yypParser->yyhwm == (int)(yypParser->yystack.size() );
2071  }
2072 #endif
2073  yyTraceShift(yypParser, yyNewState, "Shift");
2074 }
2075 
2076 /* The following table contains information about every rule that
2077 ** is used during the reduce.
2078 */
2079 static const struct {
2080  YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */
2081  signed char nrhs; /* Negative of the number of RHS symbols in the rule */
2082 } yyRuleInfo[] = {
2083  { 25, -1 }, /* (0) query ::= expr */
2084  { 25, 0 }, /* (1) query ::= */
2085  { 26, -3 }, /* (2) expr ::= bool_arg AND bool_arg */
2086  { 26, -3 }, /* (3) expr ::= bool_arg NOT bool_arg */
2087  { 26, -4 }, /* (4) expr ::= bool_arg AND NOT bool_arg */
2088  { 26, -4 }, /* (5) expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2089  { 26, -3 }, /* (6) expr ::= bool_arg OR bool_arg */
2090  { 26, -3 }, /* (7) expr ::= bool_arg XOR bool_arg */
2091  { 28, 0 }, /* (8) bool_arg ::= */
2092  { 27, -1 }, /* (9) prob_expr ::= prob */
2093  { 29, -1 }, /* (10) prob ::= RANGE */
2094  { 29, -2 }, /* (11) prob ::= stop_prob RANGE */
2095  { 29, -2 }, /* (12) prob ::= stop_term stop_term */
2096  { 29, -2 }, /* (13) prob ::= prob stop_term */
2097  { 29, -2 }, /* (14) prob ::= LOVE term */
2098  { 29, -3 }, /* (15) prob ::= stop_prob LOVE term */
2099  { 29, -2 }, /* (16) prob ::= HATE term */
2100  { 29, -3 }, /* (17) prob ::= stop_prob HATE term */
2101  { 29, -2 }, /* (18) prob ::= HATE BOOLEAN_FILTER */
2102  { 29, -3 }, /* (19) prob ::= stop_prob HATE BOOLEAN_FILTER */
2103  { 29, -1 }, /* (20) prob ::= BOOLEAN_FILTER */
2104  { 29, -2 }, /* (21) prob ::= stop_prob BOOLEAN_FILTER */
2105  { 29, -2 }, /* (22) prob ::= LOVE BOOLEAN_FILTER */
2106  { 29, -3 }, /* (23) prob ::= stop_prob LOVE BOOLEAN_FILTER */
2107  { 31, -1 }, /* (24) stop_prob ::= stop_term */
2108  { 32, -1 }, /* (25) stop_term ::= TERM */
2109  { 30, -1 }, /* (26) term ::= TERM */
2110  { 33, -1 }, /* (27) compound_term ::= WILD_TERM */
2111  { 33, -1 }, /* (28) compound_term ::= PARTIAL_TERM */
2112  { 33, -3 }, /* (29) compound_term ::= QUOTE phrase QUOTE */
2113  { 33, -1 }, /* (30) compound_term ::= phrased_term */
2114  { 33, -1 }, /* (31) compound_term ::= group */
2115  { 33, -1 }, /* (32) compound_term ::= near_expr */
2116  { 33, -1 }, /* (33) compound_term ::= adj_expr */
2117  { 33, -3 }, /* (34) compound_term ::= BRA expr KET */
2118  { 33, -2 }, /* (35) compound_term ::= SYNONYM TERM */
2119  { 33, -1 }, /* (36) compound_term ::= UNBROKEN_WORDS */
2120  { 34, -1 }, /* (37) phrase ::= TERM */
2121  { 34, -1 }, /* (38) phrase ::= UNBROKEN_WORDS */
2122  { 34, -2 }, /* (39) phrase ::= phrase TERM */
2123  { 34, -2 }, /* (40) phrase ::= phrase UNBROKEN_WORDS */
2124  { 35, -2 }, /* (41) phrased_term ::= TERM PHR_TERM */
2125  { 35, -2 }, /* (42) phrased_term ::= phrased_term PHR_TERM */
2126  { 36, -2 }, /* (43) group ::= TERM GROUP_TERM */
2127  { 36, -2 }, /* (44) group ::= group GROUP_TERM */
2128  { 36, -2 }, /* (45) group ::= group EMPTY_GROUP_OK */
2129  { 37, -3 }, /* (46) near_expr ::= TERM NEAR TERM */
2130  { 37, -3 }, /* (47) near_expr ::= near_expr NEAR TERM */
2131  { 38, -3 }, /* (48) adj_expr ::= TERM ADJ TERM */
2132  { 38, -3 }, /* (49) adj_expr ::= adj_expr ADJ TERM */
2133  { 26, -1 }, /* (50) expr ::= prob_expr */
2134  { 28, -1 }, /* (51) bool_arg ::= expr */
2135  { 27, -1 }, /* (52) prob_expr ::= term */
2136  { 31, -1 }, /* (53) stop_prob ::= prob */
2137  { 32, -1 }, /* (54) stop_term ::= compound_term */
2138  { 30, -1 }, /* (55) term ::= compound_term */
2139 };
2140 
2141 static void yy_accept(yyParser*); /* Forward Declaration */
2142 
2143 /*
2144 ** Perform a reduce action and the shift that must immediately
2145 ** follow the reduce.
2146 **
2147 ** The yyLookahead and yyLookaheadToken parameters provide reduce actions
2148 ** access to the lookahead token (if any). The yyLookahead will be YYNOCODE
2149 ** if the lookahead token has already been consumed. As this procedure is
2150 ** only called from one place, optimizing compilers will in-line it, which
2151 ** means that the extra parameters have no performance impact.
2152 */
2153 static void yy_reduce(
2154  yyParser *yypParser, /* The parser */
2155  unsigned int yyruleno, /* Number of the rule by which to reduce */
2156  int yyLookahead, /* Lookahead token, or YYNOCODE if none */
2157  ParseTOKENTYPE yyLookaheadToken /* Value of the lookahead token */
2158 ){
2159  int yygoto; /* The next state */
2160  int yyact; /* The next action */
2161  yyStackEntry *yymsp; /* The top of the parser's stack */
2162  int yysize; /* Amount to pop the stack */
2164  (void)yyLookahead;
2165  (void)yyLookaheadToken;
2166  yymsp = &yypParser->yystack.back();
2167  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
2168 #ifdef XAPIAN_DEBUG_LOG
2169  {
2170  yysize = yyRuleInfo[yyruleno].nrhs;
2171  if( yysize ){
2172  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2173  ParseRuleName(yyruleno) << "], go to state " <<
2174  yymsp[yysize].stateno);
2175  } else {
2176  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2177  ParseRuleName(yyruleno) << "].");
2178  }
2179  }
2180 #endif /* XAPIAN_DEBUG_LOG */
2181  /* yygotominor = yyzerominor; */
2182 
2183  /* Check that the stack is large enough to grow by a single entry
2184  ** if the RHS of the rule is empty. This ensures that there is room
2185  ** enough on the stack to push the LHS value without invalidating
2186  ** pointers into the stack. */
2187  if( yyRuleInfo[yyruleno].nrhs==0 ){
2188 #if 1
2189  yypParser->yystack.resize(yypParser->yystack.size() + 1);
2190  yymsp = &(yypParser->yystack.back()) - 1;
2191 #else
2192 #ifdef YYTRACKMAXSTACKDEPTH
2193  if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
2194  yypParser->yyhwm++;
2195  Assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
2196  }
2197 #endif
2198 #if YYSTACKDEPTH>0
2199  if( yypParser->yytos>=yypParser->yystackEnd ){
2200  yyStackOverflow(yypParser);
2201  return;
2202  }
2203 #else
2204  if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
2205  if( yyGrowStack(yypParser) ){
2206  yyStackOverflow(yypParser);
2207  return;
2208  }
2209  yymsp = yypParser->yytos;
2210  }
2211 #endif
2212 #endif
2213  }
2214 
2215  switch( yyruleno ){
2216  /* Beginning here are the reduction cases. A typical example
2217  ** follows:
2218  ** case 0:
2219  ** #line <lineno> <grammarfile>
2220  ** { ... } // User supplied code
2221  ** #line <lineno> <thisfile>
2222  ** break;
2223  */
2224 /********** Begin reduce actions **********************************************/
2225  YYMINORTYPE yylhsminor;
2226  case 0: /* query ::= expr */
2227 #line 1867 "queryparser/queryparser.lemony"
2228 {
2229  // Save the parsed query in the State structure so we can return it.
2230  if (yymsp[0].minor.yy39) {
2231  state->query = *yymsp[0].minor.yy39;
2232  delete yymsp[0].minor.yy39;
2233  } else {
2234  state->query = Query();
2235  }
2236 }
2237 #line 2238 "queryparser/queryparser_internal.cc"
2238  break;
2239  case 1: /* query ::= */
2240 #line 1877 "queryparser/queryparser.lemony"
2241 {
2242  // Handle a query string with no terms in.
2243  state->query = Query();
2244 }
2245 #line 2246 "queryparser/queryparser_internal.cc"
2246  break;
2247  case 2: /* expr ::= bool_arg AND bool_arg */
2248 #line 1889 "queryparser/queryparser.lemony"
2249 {
2250  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "AND");
2251  *yymsp[-2].minor.yy39 &= *yymsp[0].minor.yy39;
2252  delete yymsp[0].minor.yy39;
2253 }
2254 #line 2255 "queryparser/queryparser_internal.cc"
2255  yy_destructor(yypParser,4,&yymsp[-1].minor);
2256  break;
2257  case 3: /* expr ::= bool_arg NOT bool_arg */
2258 #line 1895 "queryparser/queryparser.lemony"
2259 {
2260  // 'NOT foo' -> '<alldocuments> NOT foo'
2261  if (!yymsp[-2].minor.yy39 && (state->flags & QueryParser::FLAG_PURE_NOT)) {
2262  yymsp[-2].minor.yy39 = new Query("", 1, 0);
2263  }
2264  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "NOT");
2265  *yymsp[-2].minor.yy39 &= ~*yymsp[0].minor.yy39;
2266  delete yymsp[0].minor.yy39;
2267 }
2268 #line 2269 "queryparser/queryparser_internal.cc"
2269  yy_destructor(yypParser,5,&yymsp[-1].minor);
2270  break;
2271  case 4: /* expr ::= bool_arg AND NOT bool_arg */
2272 #line 1905 "queryparser/queryparser.lemony"
2273 {
2274  VET_BOOL_ARGS(yymsp[-3].minor.yy39, yymsp[0].minor.yy39, "AND NOT");
2275  *yymsp[-3].minor.yy39 &= ~*yymsp[0].minor.yy39;
2276  delete yymsp[0].minor.yy39;
2277 }
2278 #line 2279 "queryparser/queryparser_internal.cc"
2279  yy_destructor(yypParser,4,&yymsp[-2].minor);
2280  yy_destructor(yypParser,5,&yymsp[-1].minor);
2281  break;
2282  case 5: /* expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2283 #line 1911 "queryparser/queryparser.lemony"
2284 {
2285  VET_BOOL_ARGS(yymsp[-3].minor.yy39, yymsp[0].minor.yy39, "AND");
2286  *yymsp[-3].minor.yy39 &= ~*yymsp[0].minor.yy39;
2287  delete yymsp[0].minor.yy39;
2288 }
2289 #line 2290 "queryparser/queryparser_internal.cc"
2290  yy_destructor(yypParser,4,&yymsp[-2].minor);
2291  yy_destructor(yypParser,10,&yymsp[-1].minor);
2292  break;
2293  case 6: /* expr ::= bool_arg OR bool_arg */
2294 #line 1917 "queryparser/queryparser.lemony"
2295 {
2296  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "OR");
2297  *yymsp[-2].minor.yy39 |= *yymsp[0].minor.yy39;
2298  delete yymsp[0].minor.yy39;
2299 }
2300 #line 2301 "queryparser/queryparser_internal.cc"
2301  yy_destructor(yypParser,2,&yymsp[-1].minor);
2302  break;
2303  case 7: /* expr ::= bool_arg XOR bool_arg */
2304 #line 1923 "queryparser/queryparser.lemony"
2305 {
2306  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "XOR");
2307  *yymsp[-2].minor.yy39 ^= *yymsp[0].minor.yy39;
2308  delete yymsp[0].minor.yy39;
2309 }
2310 #line 2311 "queryparser/queryparser_internal.cc"
2311  yy_destructor(yypParser,3,&yymsp[-1].minor);
2312  break;
2313  case 8: /* bool_arg ::= */
2314 #line 1936 "queryparser/queryparser.lemony"
2315 {
2316  // Set the argument to NULL, which enables the bool_arg-using rules in
2317  // expr above to report uses of AND, OR, etc which don't have two
2318  // arguments.
2319  yymsp[1].minor.yy39 = NULL;
2320 }
2321 #line 2322 "queryparser/queryparser_internal.cc"
2322  break;
2323  case 9: /* prob_expr ::= prob */
2324 #line 1948 "queryparser/queryparser.lemony"
2325 {
2326  yylhsminor.yy39 = yymsp[0].minor.yy40->query;
2327  yymsp[0].minor.yy40->query = NULL;
2328  // Handle any "+ terms".
2329  if (yymsp[0].minor.yy40->love) {
2330  if (yymsp[0].minor.yy40->love->empty()) {
2331  // +<nothing>.
2332  delete yylhsminor.yy39;
2333  yylhsminor.yy39 = yymsp[0].minor.yy40->love;
2334  } else if (yylhsminor.yy39) {
2335  swap(yylhsminor.yy39, yymsp[0].minor.yy40->love);
2336  add_to_query(yylhsminor.yy39, Query::OP_AND_MAYBE, yymsp[0].minor.yy40->love);
2337  } else {
2338  yylhsminor.yy39 = yymsp[0].minor.yy40->love;
2339  }
2340  yymsp[0].minor.yy40->love = NULL;
2341  }
2342  // Handle any boolean filters.
2343  if (!yymsp[0].minor.yy40->filter.empty()) {
2344  if (yylhsminor.yy39) {
2345  add_to_query(yylhsminor.yy39, Query::OP_FILTER, yymsp[0].minor.yy40->merge_filters());
2346  } else {
2347  // Make the query a boolean one.
2348  yylhsminor.yy39 = new Query(Query::OP_SCALE_WEIGHT, yymsp[0].minor.yy40->merge_filters(), 0.0);
2349  }
2350  }
2351  // Handle any "- terms".
2352  if (yymsp[0].minor.yy40->hate && !yymsp[0].minor.yy40->hate->empty()) {
2353  if (!yylhsminor.yy39) {
2354  // Can't just hate!
2355  yy_parse_failed(yypParser);
2356  return;
2357  }
2358  *yylhsminor.yy39 = Query(Query::OP_AND_NOT, *yylhsminor.yy39, *yymsp[0].minor.yy40->hate);
2359  }
2360  delete yymsp[0].minor.yy40;
2361 }
2362 #line 2363 "queryparser/queryparser_internal.cc"
2363  yymsp[0].minor.yy39 = yylhsminor.yy39;
2364  break;
2365  case 10: /* prob ::= RANGE */
2366 #line 1996 "queryparser/queryparser.lemony"
2367 {
2368  string grouping = yymsp[0].minor.yy0->name;
2369  const Query & range = yymsp[0].minor.yy0->as_range_query();
2370  yymsp[0].minor.yy40 = new ProbQuery; /*P-overwrites-R*/
2371  yymsp[0].minor.yy40->add_filter_range(grouping, range);
2372 }
2373 #line 2374 "queryparser/queryparser_internal.cc"
2374  break;
2375  case 11: /* prob ::= stop_prob RANGE */
2376 #line 2003 "queryparser/queryparser.lemony"
2377 {
2378  string grouping = yymsp[0].minor.yy0->name;
2379  const Query & range = yymsp[0].minor.yy0->as_range_query();
2380  yymsp[-1].minor.yy40->append_filter_range(grouping, range);
2381 }
2382 #line 2383 "queryparser/queryparser_internal.cc"
2383  break;
2384  case 12: /* prob ::= stop_term stop_term */
2385 #line 2009 "queryparser/queryparser.lemony"
2386 {
2387  yymsp[-1].minor.yy40 = new ProbQuery(yymsp[-1].minor.yy39); /*P-overwrites-T*/
2388  if (yymsp[0].minor.yy39) {
2389  Query::op op = state->default_op();
2390  if (yymsp[-1].minor.yy40->query && is_positional(op)) {
2391  // If default_op is OP_NEAR or OP_PHRASE, set the window size to
2392  // 11 for the first pair of terms and it will automatically grow
2393  // by one for each subsequent term.
2394  Query * subqs[2] = { yymsp[-1].minor.yy40->query, yymsp[0].minor.yy39 };
2395  *(yymsp[-1].minor.yy40->query) = Query(op, subqs, subqs + 2, 11);
2396  delete yymsp[0].minor.yy39;
2397  } else {
2398  add_to_query(yymsp[-1].minor.yy40->query, op, yymsp[0].minor.yy39);
2399  }
2400  }
2401 }
2402 #line 2403 "queryparser/queryparser_internal.cc"
2403  break;
2404  case 13: /* prob ::= prob stop_term */
2405 #line 2026 "queryparser/queryparser.lemony"
2406 {
2407  // If yymsp[0].minor.yy39 is a stopword, there's nothing to do here.
2408  if (yymsp[0].minor.yy39) add_to_query(yymsp[-1].minor.yy40->query, state->default_op(), yymsp[0].minor.yy39);
2409 }
2410 #line 2411 "queryparser/queryparser_internal.cc"
2411  break;
2412  case 14: /* prob ::= LOVE term */
2413 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2414 #line 2031 "queryparser/queryparser.lemony"
2415 {
2416  yymsp[-1].minor.yy40 = new ProbQuery;
2417  if (state->default_op() == Query::OP_AND) {
2418  yymsp[-1].minor.yy40->query = yymsp[0].minor.yy39;
2419  } else {
2420  yymsp[-1].minor.yy40->love = yymsp[0].minor.yy39;
2421  }
2422 }
2423 #line 2424 "queryparser/queryparser_internal.cc"
2424 }
2425  break;
2426  case 15: /* prob ::= stop_prob LOVE term */
2427 #line 2040 "queryparser/queryparser.lemony"
2428 {
2429  if (state->default_op() == Query::OP_AND) {
2430  /* The default op is AND, so we just put loved terms into the query
2431  * (in this case the only effect of love is to ignore the stopword
2432  * list). */
2433  add_to_query(yymsp[-2].minor.yy40->query, Query::OP_AND, yymsp[0].minor.yy39);
2434  } else {
2435  add_to_query(yymsp[-2].minor.yy40->love, Query::OP_AND, yymsp[0].minor.yy39);
2436  }
2437 }
2438 #line 2439 "queryparser/queryparser_internal.cc"
2439  yy_destructor(yypParser,8,&yymsp[-1].minor);
2440  break;
2441  case 16: /* prob ::= HATE term */
2442 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2443 #line 2051 "queryparser/queryparser.lemony"
2444 {
2445  yymsp[-1].minor.yy40 = new ProbQuery;
2446  yymsp[-1].minor.yy40->hate = yymsp[0].minor.yy39;
2447 }
2448 #line 2449 "queryparser/queryparser_internal.cc"
2449 }
2450  break;
2451  case 17: /* prob ::= stop_prob HATE term */
2452 #line 2056 "queryparser/queryparser.lemony"
2453 {
2454  add_to_query(yymsp[-2].minor.yy40->hate, Query::OP_OR, yymsp[0].minor.yy39);
2455 }
2456 #line 2457 "queryparser/queryparser_internal.cc"
2457  yy_destructor(yypParser,9,&yymsp[-1].minor);
2458  break;
2459  case 18: /* prob ::= HATE BOOLEAN_FILTER */
2460 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2461 #line 2060 "queryparser/queryparser.lemony"
2462 {
2463  yymsp[-1].minor.yy40 = new ProbQuery;
2464  yymsp[-1].minor.yy40->hate = new Query(yymsp[0].minor.yy0->get_query());
2465  delete yymsp[0].minor.yy0;
2466 }
2467 #line 2468 "queryparser/queryparser_internal.cc"
2468 }
2469  break;
2470  case 19: /* prob ::= stop_prob HATE BOOLEAN_FILTER */
2471 #line 2066 "queryparser/queryparser.lemony"
2472 {
2473  add_to_query(yymsp[-2].minor.yy40->hate, Query::OP_OR, yymsp[0].minor.yy0->get_query());
2474  delete yymsp[0].minor.yy0;
2475 }
2476 #line 2477 "queryparser/queryparser_internal.cc"
2477  yy_destructor(yypParser,9,&yymsp[-1].minor);
2478  break;
2479  case 20: /* prob ::= BOOLEAN_FILTER */
2480 #line 2071 "queryparser/queryparser.lemony"
2481 {
2482  yylhsminor.yy40 = new ProbQuery;
2483  yylhsminor.yy40->add_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2484  delete yymsp[0].minor.yy0;
2485 }
2486 #line 2487 "queryparser/queryparser_internal.cc"
2487  yymsp[0].minor.yy40 = yylhsminor.yy40;
2488  break;
2489  case 21: /* prob ::= stop_prob BOOLEAN_FILTER */
2490 #line 2077 "queryparser/queryparser.lemony"
2491 {
2492  yymsp[-1].minor.yy40->append_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2493  delete yymsp[0].minor.yy0;
2494 }
2495 #line 2496 "queryparser/queryparser_internal.cc"
2496  break;
2497  case 22: /* prob ::= LOVE BOOLEAN_FILTER */
2498 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2499 #line 2082 "queryparser/queryparser.lemony"
2500 {
2501  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2502  yymsp[-1].minor.yy40 = new ProbQuery;
2503  yymsp[-1].minor.yy40->filter[yymsp[0].minor.yy0->get_grouping()] = yymsp[0].minor.yy0->get_query();
2504  delete yymsp[0].minor.yy0;
2505 }
2506 #line 2507 "queryparser/queryparser_internal.cc"
2507 }
2508  break;
2509  case 23: /* prob ::= stop_prob LOVE BOOLEAN_FILTER */
2510 #line 2089 "queryparser/queryparser.lemony"
2511 {
2512  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2513  // We OR filters with the same prefix...
2514  Query & q = yymsp[-2].minor.yy40->filter[yymsp[0].minor.yy0->get_grouping()];
2515  q |= yymsp[0].minor.yy0->get_query();
2516  delete yymsp[0].minor.yy0;
2517 }
2518 #line 2519 "queryparser/queryparser_internal.cc"
2519  yy_destructor(yypParser,8,&yymsp[-1].minor);
2520  break;
2521  case 24: /* stop_prob ::= stop_term */
2522 #line 2104 "queryparser/queryparser.lemony"
2523 {
2524  yymsp[0].minor.yy40 = new ProbQuery(yymsp[0].minor.yy39); /*P-overwrites-T*/
2525 }
2526 #line 2527 "queryparser/queryparser_internal.cc"
2527  break;
2528  case 25: /* stop_term ::= TERM */
2529 #line 2117 "queryparser/queryparser.lemony"
2530 {
2531  if (state->is_stopword(yymsp[0].minor.yy0)) {
2532  yylhsminor.yy39 = NULL;
2533  state->add_to_stoplist(yymsp[0].minor.yy0);
2534  } else {
2535  yylhsminor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2536  }
2537  delete yymsp[0].minor.yy0;
2538 }
2539 #line 2540 "queryparser/queryparser_internal.cc"
2540  yymsp[0].minor.yy39 = yylhsminor.yy39;
2541  break;
2542  case 26: /* term ::= TERM */
2543 #line 2134 "queryparser/queryparser.lemony"
2544 {
2545  yylhsminor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2546  delete yymsp[0].minor.yy0;
2547 }
2548 #line 2549 "queryparser/queryparser_internal.cc"
2549  yymsp[0].minor.yy39 = yylhsminor.yy39;
2550  break;
2551  case 27: /* compound_term ::= WILD_TERM */
2552 #line 2149 "queryparser/queryparser.lemony"
2553 { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_wildcarded_query(state); /*T-overwrites-U*/ }
2554 #line 2555 "queryparser/queryparser_internal.cc"
2555  break;
2556  case 28: /* compound_term ::= PARTIAL_TERM */
2557 #line 2152 "queryparser/queryparser.lemony"
2558 { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_partial_query(state); /*T-overwrites-U*/ }
2559 #line 2560 "queryparser/queryparser_internal.cc"
2560  break;
2561  case 29: /* compound_term ::= QUOTE phrase QUOTE */
2562 { yy_destructor(yypParser,19,&yymsp[-2].minor);
2563 #line 2155 "queryparser/queryparser.lemony"
2564 { yymsp[-2].minor.yy39 = yymsp[-1].minor.yy32->as_phrase_query(); }
2565 #line 2566 "queryparser/queryparser_internal.cc"
2566  yy_destructor(yypParser,19,&yymsp[0].minor);
2567 }
2568  break;
2569  case 30: /* compound_term ::= phrased_term */
2570 #line 2158 "queryparser/queryparser.lemony"
2571 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_phrase_query(); /*T-overwrites-P*/ }
2572 #line 2573 "queryparser/queryparser_internal.cc"
2573  break;
2574  case 31: /* compound_term ::= group */
2575 #line 2161 "queryparser/queryparser.lemony"
2576 { yymsp[0].minor.yy39 = yymsp[0].minor.yy14->as_group(state); /*T-overwrites-P*/ }
2577 #line 2578 "queryparser/queryparser_internal.cc"
2578  break;
2579  case 32: /* compound_term ::= near_expr */
2580 #line 2164 "queryparser/queryparser.lemony"
2581 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_near_query(); /*T-overwrites-P*/ }
2582 #line 2583 "queryparser/queryparser_internal.cc"
2583  break;
2584  case 33: /* compound_term ::= adj_expr */
2585 #line 2167 "queryparser/queryparser.lemony"
2586 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_adj_query(); /*T-overwrites-P*/ }
2587 #line 2588 "queryparser/queryparser_internal.cc"
2588  break;
2589  case 34: /* compound_term ::= BRA expr KET */
2590 { yy_destructor(yypParser,20,&yymsp[-2].minor);
2591 #line 2170 "queryparser/queryparser.lemony"
2592 { yymsp[-2].minor.yy39 = yymsp[-1].minor.yy39; }
2593 #line 2594 "queryparser/queryparser_internal.cc"
2594  yy_destructor(yypParser,21,&yymsp[0].minor);
2595 }
2596  break;
2597  case 35: /* compound_term ::= SYNONYM TERM */
2598 { yy_destructor(yypParser,11,&yymsp[-1].minor);
2599 #line 2172 "queryparser/queryparser.lemony"
2600 {
2601  yymsp[-1].minor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_synonyms());
2602  delete yymsp[0].minor.yy0;
2603 }
2604 #line 2605 "queryparser/queryparser_internal.cc"
2605 }
2606  break;
2607  case 36: /* compound_term ::= UNBROKEN_WORDS */
2608 #line 2177 "queryparser/queryparser.lemony"
2609 {
2610  { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_unbroken_query(); /*T-overwrites-U*/ }
2611 }
2612 #line 2613 "queryparser/queryparser_internal.cc"
2613  break;
2614  case 37: /* phrase ::= TERM */
2615 #line 2187 "queryparser/queryparser.lemony"
2616 {
2617  yylhsminor.yy32 = Terms::create(state);
2618  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2619 }
2620 #line 2621 "queryparser/queryparser_internal.cc"
2621  yymsp[0].minor.yy32 = yylhsminor.yy32;
2622  break;
2623  case 38: /* phrase ::= UNBROKEN_WORDS */
2624 #line 2192 "queryparser/queryparser.lemony"
2625 {
2626  yylhsminor.yy32 = Terms::create(state);
2627  yymsp[0].minor.yy0->as_positional_unbroken(yylhsminor.yy32);
2628 }
2629 #line 2630 "queryparser/queryparser_internal.cc"
2630  yymsp[0].minor.yy32 = yylhsminor.yy32;
2631  break;
2632  case 39: /* phrase ::= phrase TERM */
2633  case 42: /* phrased_term ::= phrased_term PHR_TERM */ yytestcase(yyruleno==42);
2634 #line 2197 "queryparser/queryparser.lemony"
2635 {
2636  yymsp[-1].minor.yy32->add_positional_term(yymsp[0].minor.yy0);
2637 }
2638 #line 2639 "queryparser/queryparser_internal.cc"
2639  break;
2640  case 40: /* phrase ::= phrase UNBROKEN_WORDS */
2641 #line 2201 "queryparser/queryparser.lemony"
2642 {
2643  yymsp[0].minor.yy0->as_positional_unbroken(yymsp[-1].minor.yy32);
2644 }
2645 #line 2646 "queryparser/queryparser_internal.cc"
2646  break;
2647  case 41: /* phrased_term ::= TERM PHR_TERM */
2648 #line 2212 "queryparser/queryparser.lemony"
2649 {
2650  yylhsminor.yy32 = Terms::create(state);
2651  yylhsminor.yy32->add_positional_term(yymsp[-1].minor.yy0);
2652  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2653 }
2654 #line 2655 "queryparser/queryparser_internal.cc"
2655  yymsp[-1].minor.yy32 = yylhsminor.yy32;
2656  break;
2657  case 43: /* group ::= TERM GROUP_TERM */
2658 #line 2228 "queryparser/queryparser.lemony"
2659 {
2660  yymsp[-1].minor.yy14 = TermGroup::create(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); /*P-overwrites-T*/
2661 }
2662 #line 2663 "queryparser/queryparser_internal.cc"
2663  break;
2664  case 44: /* group ::= group GROUP_TERM */
2665 #line 2232 "queryparser/queryparser.lemony"
2666 {
2667  yymsp[-1].minor.yy14->add_term(yymsp[0].minor.yy0);
2668 }
2669 #line 2670 "queryparser/queryparser_internal.cc"
2670  break;
2671  case 45: /* group ::= group EMPTY_GROUP_OK */
2672 #line 2236 "queryparser/queryparser.lemony"
2673 {
2674  yymsp[-1].minor.yy14->set_empty_ok();
2675 }
2676 #line 2677 "queryparser/queryparser_internal.cc"
2677  yy_destructor(yypParser,23,&yymsp[0].minor);
2678  break;
2679  case 46: /* near_expr ::= TERM NEAR TERM */
2680  case 48: /* adj_expr ::= TERM ADJ TERM */ yytestcase(yyruleno==48);
2681 #line 2246 "queryparser/queryparser.lemony"
2682 {
2683  yylhsminor.yy32 = Terms::create(state);
2684  yylhsminor.yy32->add_positional_term(yymsp[-2].minor.yy0);
2685  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2686  if (yymsp[-1].minor.yy0) {
2687  yylhsminor.yy32->adjust_window(yymsp[-1].minor.yy0->get_termpos());
2688  delete yymsp[-1].minor.yy0;
2689  }
2690 }
2691 #line 2692 "queryparser/queryparser_internal.cc"
2692  yymsp[-2].minor.yy32 = yylhsminor.yy32;
2693  break;
2694  case 47: /* near_expr ::= near_expr NEAR TERM */
2695  case 49: /* adj_expr ::= adj_expr ADJ TERM */ yytestcase(yyruleno==49);
2696 #line 2256 "queryparser/queryparser.lemony"
2697 {
2698  yymsp[-2].minor.yy32->add_positional_term(yymsp[0].minor.yy0);
2699  if (yymsp[-1].minor.yy0) {
2700  yymsp[-2].minor.yy32->adjust_window(yymsp[-1].minor.yy0->get_termpos());
2701  delete yymsp[-1].minor.yy0;
2702  }
2703 }
2704 #line 2705 "queryparser/queryparser_internal.cc"
2705  break;
2706  default:
2707  /* (50) expr ::= prob_expr (OPTIMIZED OUT) */ Assert(yyruleno!=50);
2708  /* (51) bool_arg ::= expr */ yytestcase(yyruleno==51);
2709  /* (52) prob_expr ::= term (OPTIMIZED OUT) */ Assert(yyruleno!=52);
2710  /* (53) stop_prob ::= prob */ yytestcase(yyruleno==53);
2711  /* (54) stop_term ::= compound_term */ yytestcase(yyruleno==54);
2712  /* (55) term ::= compound_term */ yytestcase(yyruleno==55);
2713  break;
2714 /********** End reduce actions ************************************************/
2715  }
2716  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
2717  yygoto = yyRuleInfo[yyruleno].lhs;
2718  yysize = yyRuleInfo[yyruleno].nrhs;
2719  yyact = yy_find_reduce_action(yymsp[yysize].stateno,static_cast<YYCODETYPE>(yygoto));
2720 
2721  /* There are no SHIFTREDUCE actions on nonterminals because the table
2722  ** generator has simplified them to pure REDUCE actions. */
2723  Assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );
2724 
2725  /* It is not possible for a REDUCE to be followed by an error */
2726  Assert( yyact!=YY_ERROR_ACTION );
2727 
2728  yymsp += yysize+1;
2729  if (yysize) {
2730  yypParser->yystack.resize(yypParser->yystack.size() + yysize+1);
2731  }
2732  yymsp->stateno = static_cast<YYACTIONTYPE>(yyact);
2733  yymsp->major = static_cast<YYCODETYPE>(yygoto);
2734  yyTraceShift(yypParser, yyact, "... then shift");
2735 }
2736 
2737 /*
2738 ** The following code executes when the parse fails
2739 */
2740 #ifndef YYNOERRORRECOVERY
2741 static void yy_parse_failed(
2742  yyParser *yypParser /* The parser */
2743 ){
2745  LOGLINE(QUERYPARSER, "Fail!");
2746  while( yypParser->yystack.size() > 1 ) yy_pop_parser_stack(yypParser);
2747  /* Here code is inserted which will be executed whenever the
2748  ** parser fails */
2749 /************ Begin %parse_failure code ***************************************/
2750 #line 1814 "queryparser/queryparser.lemony"
2751 
2752  // If we've not already set an error message, set a default one.
2753  if (!state->error) state->error = "parse error";
2754 #line 2755 "queryparser/queryparser_internal.cc"
2755 /************ End %parse_failure code *****************************************/
2756  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2757 }
2758 #endif /* YYNOERRORRECOVERY */
2759 
2760 /*
2761 ** The following code executes when a syntax error first occurs.
2762 */
2763 static void yy_syntax_error(
2764  yyParser *yypParser, /* The parser */
2765  int yymajor, /* The major type of the error token */
2766  ParseTOKENTYPE yyminor /* The minor type of the error token */
2767 ){
2769  (void)yymajor;
2770  (void)yyminor;
2771 #define TOKEN yyminor
2772 /************ Begin %syntax_error code ****************************************/
2773 #line 1819 "queryparser/queryparser.lemony"
2774 
2775  yy_parse_failed(yypParser);
2776 #line 2777 "queryparser/queryparser_internal.cc"
2777 /************ End %syntax_error code ******************************************/
2778  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2779 }
2780 
2781 /*
2782 ** The following is executed when the parser accepts
2783 */
2784 static void yy_accept(
2785  yyParser *yypParser /* The parser */
2786 ){
2788  LOGLINE(QUERYPARSER, "Accept!");
2789 #ifndef YYNOERRORRECOVERY
2790  yypParser->yyerrcnt = -1;
2791 #endif
2792  AssertEq( yypParser->yystack.size(), 1 );
2793  /* Here code is inserted which will be executed whenever the
2794  ** parser accepts */
2795 /*********** Begin %parse_accept code *****************************************/
2796 /*********** End %parse_accept code *******************************************/
2797  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2798 }
2799 
2800 /* The main parser program.
2801 ** The first argument is a pointer to a structure obtained from
2802 ** "ParseAlloc" which describes the current state of the parser.
2803 ** The second argument is the major token number. The third is
2804 ** the minor token. The fourth optional argument is whatever the
2805 ** user wants (and specified in the grammar) and is available for
2806 ** use by the action routines.
2807 **
2808 ** Inputs:
2809 ** <ul>
2810 ** <li> A pointer to the parser (an opaque structure.)
2811 ** <li> The major token number.
2812 ** <li> The minor token number.
2813 ** <li> An option argument of a grammar-specified type.
2814 ** </ul>
2815 **
2816 ** Outputs:
2817 ** None.
2818 */
2819 static
2820 void Parse(
2821  yyParser *yypParser, /* The parser */
2822  int yymajor, /* The major token code number */
2823  ParseTOKENTYPE yyminor /* The value for the token */
2824  ParseARG_PDECL /* Optional %extra_argument parameter */
2825 ){
2826  YYMINORTYPE yyminorunion;
2827  unsigned int yyact; /* The parser action. */
2828 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
2829  int yyendofinput; /* True if we are at the end of input */
2830 #endif
2831 #ifdef YYERRORSYMBOL
2832  int yyerrorhit = 0; /* True if yymajor has invoked an error */
2833 #endif
2834 
2835 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
2836  yyendofinput = (yymajor==0);
2837 #endif
2839 
2840 #ifdef XAPIAN_DEBUG_LOG
2841  {
2842  int stateno = yypParser->yystack.back().stateno;
2843  if( stateno < YY_MIN_REDUCE ){
2844  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
2845  "'," << (yyminor ? yyminor->name : "<<null>>") <<
2846  "in state " << stateno);
2847  }else{
2848  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
2849  "'," << (yyminor ? yyminor->name : "<<null>>") <<
2850  "with pending reduce " << stateno-YY_MIN_REDUCE);
2851  }
2852  }
2853 #endif
2854 
2855  do{
2856  yyact = yy_find_shift_action(yypParser,static_cast<YYCODETYPE>(yymajor));
2857  if( yyact >= YY_MIN_REDUCE ){
2858  yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,yyminor);
2859  }else if( yyact <= YY_MAX_SHIFTREDUCE ){
2860  yy_shift(yypParser,yyact,yymajor,yyminor);
2861 #ifndef YYNOERRORRECOVERY
2862  yypParser->yyerrcnt--;
2863 #endif
2864  yymajor = YYNOCODE;
2865  }else if( yyact==YY_ACCEPT_ACTION ){
2866  yypParser->yystack.pop_back();
2867  yy_accept(yypParser);
2868  return;
2869  }else{
2870  Assert( yyact == YY_ERROR_ACTION );
2871  yyminorunion.yy0 = yyminor;
2872 #ifdef YYERRORSYMBOL
2873  int yymx;
2874 #endif
2875  LOGLINE(QUERYPARSER, "Syntax Error!");
2876 #ifdef YYERRORSYMBOL
2877  /* A syntax error has occurred.
2878  ** The response to an error depends upon whether or not the
2879  ** grammar defines an error token "ERROR".
2880  **
2881  ** This is what we do if the grammar does define ERROR:
2882  **
2883  ** * Call the %syntax_error function.
2884  **
2885  ** * Begin popping the stack until we enter a state where
2886  ** it is legal to shift the error symbol, then shift
2887  ** the error symbol.
2888  **
2889  ** * Set the error count to three.
2890  **
2891  ** * Begin accepting and shifting new tokens. No new error
2892  ** processing will occur until three tokens have been
2893  ** shifted successfully.
2894  **
2895  */
2896  if( yypParser->yyerrcnt<0 ){
2897  yy_syntax_error(yypParser,yymajor,yyminor);
2898  }
2899  yymx = yypParser->yystack.back().major;
2900  if( yymx==YYERRORSYMBOL || yyerrorhit ){
2901  LOGLINE(QUERYPARSER, "Discard input token " << ParseTokenName(yymajor));
2902  yy_destructor(yypParser, static_cast<YYCODETYPE>(yymajor), &yyminorunion);
2903  yymajor = YYNOCODE;
2904  }else{
2905  while( !yypParser->yystack.empty()
2906  && yymx != YYERRORSYMBOL
2907  && (yyact = yy_find_reduce_action(
2908  yypParser->yystack.back().stateno,
2909  YYERRORSYMBOL)) >= YY_MIN_REDUCE
2910  ){
2911  yy_pop_parser_stack(yypParser);
2912  }
2913  if( yypParser->yystack.empty() || yymajor==0 ){
2914  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2915  yy_parse_failed(yypParser);
2916 #ifndef YYNOERRORRECOVERY
2917  yypParser->yyerrcnt = -1;
2918 #endif
2919  yymajor = YYNOCODE;
2920  }else if( yymx!=YYERRORSYMBOL ){
2921  yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
2922  }
2923  }
2924  yypParser->yyerrcnt = 3;
2925  yyerrorhit = 1;
2926 #elif defined(YYNOERRORRECOVERY)
2927  /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
2928  ** do any kind of error recovery. Instead, simply invoke the syntax
2929  ** error routine and continue going as if nothing had happened.
2930  **
2931  ** Applications can set this macro (for example inside %include) if
2932  ** they intend to abandon the parse upon the first syntax error seen.
2933  */
2934  yy_syntax_error(yypParser,yymajor, yyminor);
2935  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2936  yymajor = YYNOCODE;
2937 
2938 #else /* YYERRORSYMBOL is not defined */
2939  /* This is what we do if the grammar does not define ERROR:
2940  **
2941  ** * Report an error message, and throw away the input token.
2942  **
2943  ** * If the input token is $, then fail the parse.
2944  **
2945  ** As before, subsequent error messages are suppressed until
2946  ** three input tokens have been successfully shifted.
2947  */
2948  if( yypParser->yyerrcnt<=0 ){
2949  yy_syntax_error(yypParser,yymajor, yyminor);
2950  }
2951  yypParser->yyerrcnt = 3;
2952  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2953  if( yyendofinput ){
2954  yy_parse_failed(yypParser);
2955 #ifndef YYNOERRORRECOVERY
2956  yypParser->yyerrcnt = -1;
2957 #endif
2958  }
2959  yymajor = YYNOCODE;
2960 #endif
2961  }
2962  }while( yymajor!=YYNOCODE && yypParser->yystack.size() > 1 );
2963 #ifdef XAPIAN_DEBUG_LOG
2964  {
2965  int i;
2966  LOGLINE(QUERYPARSER, "Return. Stack=");
2967  for(i=1; i<=(int)yypParser->yystack.size(); i++)
2968  LOGLINE(QUERYPARSER, yyTokenName[yypParser->yystack[i].major]);
2969  }
2970 #endif
2971  return;
2972 }
2973 
2974 // Select C++ syntax highlighting in vim editor: vim: syntax=cpp
2975 #line 804 "queryparser/queryparser.lemony"
2976 
2977 
2978 Query
2979 QueryParser::Internal::parse_query(const string &qs, unsigned flags,
2980  const string &default_prefix)
2981 {
2982  bool try_word_break = (flags & FLAG_NGRAMS) || is_ngram_enabled();
2983 
2984  // Set ranges if we may have to handle ranges in the query.
2985  bool ranges = !rangeprocs.empty() && (qs.find("..") != string::npos);
2986 
2987  termpos term_pos = 1;
2988  Utf8Iterator it(qs), end;
2989 
2990  State state(this, flags);
2991 
2992  // To successfully apply more than one spelling correction to a query
2993  // string, we must keep track of the offset due to previous corrections.
2994  int correction_offset = 0;
2995  corrected_query.resize(0);
2996 
2997  // Stack of prefixes, used for phrases and subexpressions.
2998  list<const FieldInfo *> prefix_stack;
2999 
3000  // If default_prefix is specified, use it. Otherwise, use any list
3001  // that has been set for the empty prefix.
3002  const FieldInfo def_pfx(NON_BOOLEAN, default_prefix);
3003  {
3004  const FieldInfo * default_field_info = &def_pfx;
3005  if (default_prefix.empty()) {
3006  auto f = field_map.find(string());
3007  if (f != field_map.end()) default_field_info = &(f->second);
3008  }
3009 
3010  // We always have the current prefix on the top of the stack.
3011  prefix_stack.push_back(default_field_info);
3012  }
3013 
3014  yyParser parser;
3015 
3016  unsigned newprev = ' ';
3017 main_lex_loop:
3018  enum {
3019  DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP,
3020  IN_GROUP2, EXPLICIT_SYNONYM
3021  } mode = DEFAULT;
3022  while (it != end && !state.error) {
3023  bool last_was_operator = false;
3024  bool last_was_operator_needing_term = false;
3025  if (mode == EXPLICIT_SYNONYM) mode = DEFAULT;
3026  if (false) {
3027 just_had_operator:
3028  if (it == end) break;
3029  mode = DEFAULT;
3030  last_was_operator_needing_term = false;
3031  last_was_operator = true;
3032  }
3033  if (false) {
3034 just_had_operator_needing_term:
3035  last_was_operator_needing_term = true;
3036  last_was_operator = true;
3037  }
3038  if (mode == IN_PHRASED_TERM) mode = DEFAULT;
3039  if (is_whitespace(*it)) {
3040  newprev = ' ';
3041  ++it;
3042  it = find_if(it, end, is_not_whitespace);
3043  if (it == end) break;
3044  }
3045 
3046  if (ranges &&
3047  (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2)) {
3048  // Scan forward to see if this could be the "start of range"
3049  // token. Sadly this has O(n²) tendencies, though at least
3050  // "n" is the number of words in a query which is likely to
3051  // remain fairly small. FIXME: can we tokenise more elegantly?
3052  Utf8Iterator it_initial = it;
3053  Utf8Iterator p = it;
3054  unsigned ch = 0;
3055  while (p != end) {
3056  if (ch == '.' && *p == '.') {
3057  string a;
3058  while (it != p) {
3059  Unicode::append_utf8(a, *it++);
3060  }
3061  // Trim off the trailing ".".
3062  a.resize(a.size() - 1);
3063  ++p;
3064  // Either end of the range can be empty (for an open-ended
3065  // range) but both can't be empty.
3066  if (!a.empty() || (p != end && *p > ' ' && *p != ')')) {
3067  string b;
3068  // Allow any character except whitespace and ')' in the
3069  // upper bound.
3070  while (p != end && *p > ' ' && *p != ')') {
3071  Unicode::append_utf8(b, *p++);
3072  }
3073  Term * range = state.range(a, b);
3074  if (!range) {
3075  state.error = "Unknown range operation";
3076  if (a.find(':', 1) == string::npos) {
3077  goto done;
3078  }
3079  // Might be a boolean filter with ".." in. Leave
3080  // state.error in case it isn't.
3081  it = it_initial;
3082  break;
3083  }
3084  Parse(&parser, RANGE, range, &state);
3085  }
3086  it = p;
3087  goto main_lex_loop;
3088  }
3089  ch = *p;
3090  // Allow any character except whitespace and '(' in the lower
3091  // bound.
3092  if (ch <= ' ' || ch == '(') break;
3093  ++p;
3094  }
3095  }
3096 
3097  if (!is_wordchar(*it)) {
3098  unsigned prev = newprev;
3099  unsigned ch = *it++;
3100  newprev = ch;
3101  // Drop out of IN_GROUP mode.
3102  if (mode == IN_GROUP || mode == IN_GROUP2)
3103  mode = DEFAULT;
3104  switch (ch) {
3105  case '"':
3106  case 0x201c: // Left curly double quote.
3107  case 0x201d: // Right curly double quote.
3108  // Quoted phrase.
3109  if (mode == DEFAULT) {
3110  // Skip whitespace.
3111  it = find_if(it, end, is_not_whitespace);
3112  if (it == end) {
3113  // Ignore an unmatched " at the end of the query to
3114  // avoid generating an empty pair of QUOTEs which will
3115  // cause a parse error.
3116  goto done;
3117  }
3118  if (is_double_quote(*it)) {
3119  // Ignore empty "" (but only if we're not already
3120  // IN_QUOTES as we don't merge two adjacent quoted
3121  // phrases!)
3122  newprev = *it++;
3123  break;
3124  }
3125  }
3126  if (flags & QueryParser::FLAG_PHRASE) {
3127  if (ch == '"' && it != end && *it == '"') {
3128  ++it;
3129  // Handle "" inside a quoted phrase as an escaped " for
3130  // consistency with quoted boolean terms.
3131  break;
3132  }
3133  Parse(&parser, QUOTE, NULL, &state);
3134  if (mode == DEFAULT) {
3135  mode = IN_QUOTES;
3136  } else {
3137  // Remove the prefix we pushed for this phrase.
3138  if (mode == IN_PREFIXED_QUOTES)
3139  prefix_stack.pop_back();
3140  mode = DEFAULT;
3141  }
3142  }
3143  break;
3144 
3145  case '+': case '-': // Loved or hated term/phrase/subexpression.
3146  // Ignore + or - at the end of the query string.
3147  if (it == end) goto done;
3148  if (prev > ' ' && prev != '(') {
3149  // Or if not after whitespace or an open bracket.
3150  break;
3151  }
3152  if (is_whitespace(*it) || *it == '+' || *it == '-') {
3153  // Ignore + or - followed by a space, or further + or -.
3154  // Postfix + (such as in C++ and H+) is handled as part of
3155  // the term lexing code in parse_term().
3156  newprev = *it++;
3157  break;
3158  }
3159  if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
3160  int token;
3161  if (ch == '+') {
3162  token = LOVE;
3163  } else if (last_was_operator) {
3164  token = HATE_AFTER_AND;
3165  } else {
3166  token = HATE;
3167  }
3168  Parse(&parser, token, NULL, &state);
3169  goto just_had_operator_needing_term;
3170  }
3171  // Need to prevent the term after a LOVE or HATE starting a
3172  // term group...
3173  break;
3174 
3175  case '(': // Bracketed subexpression.
3176  // Skip whitespace.
3177  it = find_if(it, end, is_not_whitespace);
3178  // Ignore ( at the end of the query string.
3179  if (it == end) goto done;
3180  if (prev > ' ' && strchr("()+-", prev) == NULL) {
3181  // Or if not after whitespace or a bracket or '+' or '-'.
3182  break;
3183  }
3184  if (*it == ')') {
3185  // Ignore empty ().
3186  newprev = *it++;
3187  break;
3188  }
3189  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3190  prefix_stack.push_back(prefix_stack.back());
3191  Parse(&parser, BRA, NULL, &state);
3192  }
3193  break;
3194 
3195  case ')': // End of bracketed subexpression.
3196  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3197  // Remove the prefix we pushed for the corresponding BRA.
3198  // If brackets are unmatched, it's a syntax error, but
3199  // that's no excuse to SEGV!
3200  if (prefix_stack.size() > 1) prefix_stack.pop_back();
3201  Parse(&parser, KET, NULL, &state);
3202  }
3203  break;
3204 
3205  case '~': // Synonym expansion.
3206  // Ignore at the end of the query string.
3207  if (it == end) goto done;
3208  if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
3209  if (prev > ' ' && strchr("+-(", prev) == NULL) {
3210  // Or if not after whitespace, +, -, or an open bracket.
3211  break;
3212  }
3213  if (!is_wordchar(*it)) {
3214  // Ignore if not followed by a word character.
3215  break;
3216  }
3217  Parse(&parser, SYNONYM, NULL, &state);
3218  mode = EXPLICIT_SYNONYM;
3219  goto just_had_operator_needing_term;
3220  }
3221  break;
3222  }
3223  // Skip any other characters.
3224  continue;
3225  }
3226 
3227  Assert(is_wordchar(*it));
3228 
3229  size_t term_start_index = it.raw() - qs.data();
3230 
3231  newprev = 'A'; // Any letter will do...
3232 
3233  // A term, a prefix, or a boolean operator.
3234  const FieldInfo * field_info = NULL;
3235  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2 || mode == EXPLICIT_SYNONYM) &&
3236  !field_map.empty()) {
3237  // Check for a fieldname prefix (e.g. title:historical).
3238  Utf8Iterator p = find_if(it, end, is_not_wordchar);
3239  if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
3240  string field;
3241  p = it;
3242  while (*p != ':')
3243  Unicode::append_utf8(field, *p++);
3244  map<string, FieldInfo>::const_iterator f;
3245  f = field_map.find(field);
3246  if (f != field_map.end()) {
3247  // Special handling for prefixed fields, depending on the
3248  // type of the prefix.
3249  unsigned ch = *++p;
3250  field_info = &(f->second);
3251 
3252  if (field_info->type != NON_BOOLEAN) {
3253  // Drop out of IN_GROUP if we're in it.
3254  if (mode == IN_GROUP || mode == IN_GROUP2)
3255  mode = DEFAULT;
3256  it = p;
3257  string name;
3258  if (it != end && is_double_quote(*it)) {
3259  // Quoted boolean term (can contain any character).
3260  bool fancy = (*it != '"');
3261  ++it;
3262  while (it != end) {
3263  if (*it == '"') {
3264  // Interpret "" as an escaped ".
3265  if (++it == end || *it != '"')
3266  break;
3267  } else if (fancy && is_double_quote(*it)) {
3268  // If the opening quote was ASCII, then the
3269  // closing one must be too - otherwise
3270  // the user can't protect non-ASCII double
3271  // quote characters by quoting or escaping.
3272  ++it;
3273  break;
3274  }
3275  Unicode::append_utf8(name, *it++);
3276  }
3277  } else {
3278  // Can't boolean filter prefix a subexpression, so
3279  // just use anything following the prefix until the
3280  // next space or ')' as part of the boolean filter
3281  // term.
3282  while (it != end && *it > ' ' && *it != ')')
3283  Unicode::append_utf8(name, *it++);
3284  }
3285  // Build the unstemmed form in field.
3286  field += ':';
3287  field += name;
3288  // Clear any pending range error.
3289  state.error = NULL;
3290  Term * token = new Term(&state, name, field_info, field);
3291  Parse(&parser, BOOLEAN_FILTER, token, &state);
3292  continue;
3293  }
3294 
3295  if ((flags & FLAG_PHRASE) && is_double_quote(ch)) {
3296  // Prefixed phrase, e.g.: subject:"space flight"
3297  mode = IN_PREFIXED_QUOTES;
3298  Parse(&parser, QUOTE, NULL, &state);
3299  it = p;
3300  newprev = ch;
3301  ++it;
3302  prefix_stack.push_back(field_info);
3303  continue;
3304  }
3305 
3306  if (ch == '(' && (flags & FLAG_BOOLEAN)) {
3307  // Prefixed subexpression, e.g.: title:(fast NEAR food)
3308  mode = DEFAULT;
3309  Parse(&parser, BRA, NULL, &state);
3310  it = p;
3311  newprev = ch;
3312  ++it;
3313  prefix_stack.push_back(field_info);
3314  continue;
3315  }
3316 
3317  if (ch != ':') {
3318  // Allow 'path:/usr/local' but not 'foo::bar::baz'.
3319  while (is_phrase_generator(ch)) {
3320  if (++p == end)
3321  goto not_prefix;
3322  ch = *p;
3323  }
3324  }
3325 
3326  if (is_wordchar(ch)) {
3327  // Prefixed term.
3328  it = p;
3329  } else {
3330 not_prefix:
3331  // It looks like a prefix but isn't, so parse it as
3332  // text instead.
3333  field_info = NULL;
3334  }
3335  }
3336  }
3337  }
3338 
3339 phrased_term:
3340  bool was_acronym;
3341  bool needs_word_break = false;
3342  string term = parse_term(it, end, try_word_break,
3343  needs_word_break, was_acronym);
3344 
3345  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) &&
3346  (flags & FLAG_BOOLEAN) &&
3347  // Don't want to interpret A.N.D. as an AND operator.
3348  !was_acronym &&
3349  !field_info &&
3350  term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {
3351  // Boolean operators.
3352  string op = term;
3353  if (flags & FLAG_BOOLEAN_ANY_CASE) {
3354  for (string::iterator i = op.begin(); i != op.end(); ++i) {
3355  *i = C_toupper(*i);
3356  }
3357  }
3358  if (op.size() == 3) {
3359  if (op == "AND") {
3360  Parse(&parser, AND, NULL, &state);
3361  goto just_had_operator;
3362  }
3363  if (op == "NOT") {
3364  Parse(&parser, NOT, NULL, &state);
3365  goto just_had_operator;
3366  }
3367  if (op == "XOR") {
3368  Parse(&parser, XOR, NULL, &state);
3369  goto just_had_operator;
3370  }
3371  if (op == "ADJ") {
3372  if (it != end && *it == '/') {
3373  size_t width = 0;
3374  Utf8Iterator p = it;
3375  while (++p != end && U_isdigit(*p)) {
3376  width = (width * 10) + (*p - '0');
3377  }
3378  if (width && (p == end || is_whitespace(*p))) {
3379  it = p;
3380  Parse(&parser, ADJ, new Term(width), &state);
3381  goto just_had_operator;
3382  }
3383  } else {
3384  Parse(&parser, ADJ, NULL, &state);
3385  goto just_had_operator;
3386  }
3387  }
3388  } else if (op.size() == 2) {
3389  if (op == "OR") {
3390  Parse(&parser, OR, NULL, &state);
3391  goto just_had_operator;
3392  }
3393  } else if (op.size() == 4) {
3394  if (op == "NEAR") {
3395  if (it != end && *it == '/') {
3396  size_t width = 0;
3397  Utf8Iterator p = it;
3398  while (++p != end && U_isdigit(*p)) {
3399  width = (width * 10) + (*p - '0');
3400  }
3401  if (width && (p == end || is_whitespace(*p))) {
3402  it = p;
3403  Parse(&parser, NEAR, new Term(width), &state);
3404  goto just_had_operator;
3405  }
3406  } else {
3407  Parse(&parser, NEAR, NULL, &state);
3408  goto just_had_operator;
3409  }
3410  }
3411  }
3412  }
3413 
3414  // If no prefix is set, use the default one.
3415  if (!field_info) field_info = prefix_stack.back();
3416 
3417  Assert(field_info->type == NON_BOOLEAN);
3418 
3419  {
3420  string unstemmed_term(term);
3421  term = Unicode::tolower(term);
3422 
3423  // Reuse stem_strategy - STEM_SOME here means "stem terms except
3424  // when used with positional operators".
3425  stem_strategy stem_term = stem_action;
3426  if (stem_term != STEM_NONE) {
3427  if (stemmer.is_none()) {
3428  stem_term = STEM_NONE;
3429  } else if (stem_term == STEM_SOME ||
3430  stem_term == STEM_SOME_FULL_POS) {
3431  if (!should_stem(unstemmed_term) ||
3432  (it != end && is_stem_preventer(*it))) {
3433  // Don't stem this particular term.
3434  stem_term = STEM_NONE;
3435  }
3436  }
3437  }
3438 
3439  Term * term_obj = new Term(&state, term, field_info,
3440  unstemmed_term, stem_term, term_pos++);
3441 
3442  if (needs_word_break) {
3443  Parse(&parser, UNBROKEN_WORDS, term_obj, &state);
3444  // Drop out of IN_GROUP mode.
3445  if (mode == IN_GROUP || mode == IN_GROUP2)
3446  mode = DEFAULT;
3447  if (it == end) break;
3448  continue;
3449  }
3450 
3451  if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3452  if (it != end) {
3453  if ((flags & FLAG_WILDCARD) && *it == '*') {
3454  Utf8Iterator p(it);
3455  ++p;
3456  if (p == end || !is_wordchar(*p)) {
3457  it = p;
3458  if (mode == IN_GROUP || mode == IN_GROUP2) {
3459  // Drop out of IN_GROUP and flag that the group
3460  // can be empty if all members are stopwords.
3461  if (mode == IN_GROUP2)
3462  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3463  mode = DEFAULT;
3464  }
3465  // Wildcard at end of term (also known as
3466  // "right truncation").
3467  Parse(&parser, WILD_TERM, term_obj, &state);
3468  continue;
3469  }
3470  }
3471  } else {
3472  if (flags & FLAG_PARTIAL) {
3473  if (mode == IN_GROUP || mode == IN_GROUP2) {
3474  // Drop out of IN_GROUP and flag that the group
3475  // can be empty if all members are stopwords.
3476  if (mode == IN_GROUP2)
3477  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3478  mode = DEFAULT;
3479  }
3480  // Final term of a partial match query, with no
3481  // following characters - treat as a wildcard.
3482  Parse(&parser, PARTIAL_TERM, term_obj, &state);
3483  continue;
3484  }
3485  }
3486  }
3487 
3488  // Check spelling, if we're a normal term, and any of the prefixes
3489  // are empty.
3490  if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
3491  const auto& prefixes = field_info->prefixes;
3492  for (const string& prefix : prefixes) {
3493  if (!prefix.empty())
3494  continue;
3495  const string & suggest = db.get_spelling_suggestion(term);
3496  if (!suggest.empty()) {
3497  if (corrected_query.empty()) corrected_query = qs;
3498  size_t term_end_index = it.raw() - qs.data();
3499  size_t n = term_end_index - term_start_index;
3500  size_t pos = term_start_index + correction_offset;
3501  corrected_query.replace(pos, n, suggest);
3502  correction_offset += suggest.size();
3503  correction_offset -= n;
3504  }
3505  break;
3506  }
3507  }
3508 
3509  if (mode == IN_PHRASED_TERM) {
3510  Parse(&parser, PHR_TERM, term_obj, &state);
3511  } else {
3512  // See if the next token will be PHR_TERM - if so, this one
3513  // needs to be TERM not GROUP_TERM.
3514  if ((mode == IN_GROUP || mode == IN_GROUP2) &&
3515  is_phrase_generator(*it)) {
3516  // FIXME: can we clean this up?
3517  Utf8Iterator p = it;
3518  do {
3519  ++p;
3520  } while (p != end && is_phrase_generator(*p));
3521  // Don't generate a phrase unless the phrase generators are
3522  // immediately followed by another term.
3523  if (p != end && is_wordchar(*p)) {
3524  mode = DEFAULT;
3525  }
3526  }
3527 
3528  int token = TERM;
3529  if (mode == IN_GROUP || mode == IN_GROUP2) {
3530  mode = IN_GROUP2;
3531  token = GROUP_TERM;
3532  }
3533  Parse(&parser, token, term_obj, &state);
3534  if (token == TERM && mode != DEFAULT)
3535  continue;
3536  }
3537  }
3538 
3539  if (it == end) break;
3540 
3541  if (is_phrase_generator(*it)) {
3542  // Skip multiple phrase generators.
3543  do {
3544  ++it;
3545  } while (it != end && is_phrase_generator(*it));
3546  // Don't generate a phrase unless the phrase generators are
3547  // immediately followed by another term.
3548  if (it != end && is_wordchar(*it)) {
3549  mode = IN_PHRASED_TERM;
3550  term_start_index = it.raw() - qs.data();
3551  goto phrased_term;
3552  }
3553  } else if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3554  int old_mode = mode;
3555  mode = DEFAULT;
3556  if (!last_was_operator_needing_term && is_whitespace(*it)) {
3557  newprev = ' ';
3558  // Skip multiple whitespace.
3559  do {
3560  ++it;
3561  } while (it != end && is_whitespace(*it));
3562  // Don't generate a group unless the terms are only separated
3563  // by whitespace.
3564  if (it != end && is_wordchar(*it)) {
3565  if (old_mode == IN_GROUP || old_mode == IN_GROUP2) {
3566  mode = IN_GROUP2;
3567  } else {
3568  mode = IN_GROUP;
3569  }
3570  }
3571  }
3572  }
3573  }
3574 done:
3575  if (!state.error) {
3576  // Implicitly close any unclosed quotes.
3577  if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
3578  Parse(&parser, QUOTE, NULL, &state);
3579 
3580  // Implicitly close all unclosed brackets.
3581  while (prefix_stack.size() > 1) {
3582  Parse(&parser, KET, NULL, &state);
3583  prefix_stack.pop_back();
3584  }
3585  Parse(&parser, 0, NULL, &state);
3586  }
3587 
3588  errmsg = state.error;
3589  return state.query;
3590 }
3591 
3592 #line 3593 "queryparser/queryparser_internal.cc"
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
char name[9]
Definition: dbcheck.cc:55
Iterator returning unigrams and bigrams.
Definition: word-breaker.h:52
Parser State shared between the lexer and the parser.
unsigned flags
State(QueryParser::Internal *qpi_, unsigned flags_)
Query::op default_op() const
Xapian::termcount get_max_partial_expansion() const
Term * range(const string &a, const string &b)
Database get_database() const
int get_max_partial_type() const
void add_to_stoplist(const Term *term)
void stoplist_resize(size_t s)
int get_max_wildcard_type() const
const Stopper * get_stopper() const
Query::op effective_default_op
QueryParser::Internal * qpi
Xapian::termcount get_max_wildcard_expansion() const
void add_to_unstem(const string &term, const string &unstemmed)
const char * error
size_t stoplist_size() const
string stem_term(const string &term)
bool is_stopword(const Term *term) const
Xapian::Query value_type
bool operator!=(const SynonymIterator &o) const
bool operator==(const SynonymIterator &o) const
const Xapian::Query operator*() const
SynonymIterator(const Xapian::TermIterator &i_, Xapian::termpos pos_=0, const Xapian::Query *first_=NULL)
SynonymIterator & operator++()
const Xapian::Query * first
Xapian::TermIterator i
std::input_iterator_tag iterator_category
Xapian::termcount_diff difference_type
Xapian::Query * pointer
Xapian::Query & reference
A group of terms separated only by whitespace.
Query * as_group(State *state) const
Convert to a Xapian::Query * using default_op.
TermGroup(Term *t1, Term *t2)
static TermGroup * create(Term *t1, Term *t2)
Factory function - ensures heap allocation.
void add_term(Term *term)
Add a Term object to this TermGroup object.
bool empty_ok
Controls how to handle a group where all terms are stopwords.
vector< Term * > terms
void set_empty_ok()
Set the empty_ok flag.
Class used to pass information about a token from lexer to parser.
string get_grouping() const
Term(const string &name_, termpos pos_)
QueryParser::stem_strategy stem
const FieldInfo * field_info
Term(const string &name_)
string make_term(const string &prefix) const
void need_positions()
Query get_query_with_synonyms() const
void as_positional_unbroken(Terms *terms) const
Handle text without explicit word breaks in a positional context.
Query get_query() const
Term(const string &name_, const FieldInfo *field_info_)
Term(const Xapian::Query &q, const string &grouping)
Query * as_partial_query(State *state_) const
Build a query for a term at the very end of the query string when FLAG_PARTIAL is in use.
Query * as_wildcarded_query(State *state) const
Term(termpos pos_)
Query get_query_with_auto_synonyms() const
Query as_range_query() const
Range query.
string unstemmed
Query * as_unbroken_query() const
Build a query for a string of words without explicit word breaks.
termpos get_termpos() const
Term(State *state_, const string &name_, const FieldInfo *field_info_, const string &unstemmed_, QueryParser::stem_strategy stem_=QueryParser::STEM_NONE, termpos pos_=0)
Some terms which form a positional sub-query.
Query * as_adj_query() const
Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const
Convert to a query using the given operator and window size.
size_t window
Window size.
static Terms * create(State *state)
Factory function - ensures heap allocation.
bool uniform_prefixes
Keep track of whether the terms added all have the same list of prefixes.
const vector< string > * prefixes
The list of prefixes of the terms added.
Query * as_near_query() const
Convert to a Xapian::Query * using OP_NEAR.
Terms(bool no_pos)
void adjust_window(size_t alternative_window)
Query * as_phrase_query() const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
Query opwindow_subq(Query::op op, const vector< Query > &v, Xapian::termcount w) const
vector< Term * > terms
void add_positional_term(Term *term)
Add an unstemmed Term object to this Terms object.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::TermIterator synonyms_end(const std::string &) const
Corresponding end iterator to synonyms_begin(term).
Definition: database.h:452
Xapian::TermIterator synonym_keys_begin(const std::string &prefix=std::string()) const
An iterator which returns all terms which have synonyms.
Definition: omdatabase.cc:731
Xapian::TermIterator synonym_keys_end(const std::string &=std::string()) const
Corresponding end iterator to synonym_keys_begin(prefix).
Definition: database.h:464
Xapian::TermIterator synonyms_begin(const std::string &term) const
An iterator which returns all the synonyms for a given term.
Definition: omdatabase.cc:713
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
Base class for field processors.
Definition: queryparser.h:749
Xapian::valueno get_slot() const
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
Xapian::Internal::opt_intrusive_ptr< const Stopper > stopper
multimap< string, string > unstem
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:797
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:962
Class representing a query.
Definition: query.h:56
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:537
op
Query operators.
Definition: query.h:88
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
Definition: query.h:168
@ OP_NEAR
Match only documents where all subqueries match near each other.
Definition: query.h:150
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:162
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
Definition: query.h:241
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:249
@ LEAF_TERM
Value returned by get_type() for a term.
Definition: query.h:276
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:233
@ OP_INVALID
Construct an invalid query.
Definition: query.h:273
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:59
op get_type() const
Get the type of the top level of the query.
Definition: query.cc:212
bool is_none() const
Return true if this is a no-op stemmer.
Definition: stem.h:170
Abstract base class for stop-word decision functor.
Definition: queryparser.h:51
Class for iterating over a list of terms.
Definition: termiterator.h:41
void skip_to(const std::string &term)
Advance the iterator to term term.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:38
const char * raw() const
Return the raw const char* pointer for the current position.
Definition: unicode.h:54
Debug logging macros.
#define LOGLINE(a, b)
Definition: debuglog.h:494
Hierarchy of classes which Xapian can throw as exceptions.
#define true
Definition: header.h:8
#define false
Definition: header.h:9
string str(int value)
Convert int to std::string.
Definition: str.cc:90
category get_category(int info)
Definition: unicode.h:271
void append_utf8(std::string &s, unsigned ch)
Append the UTF-8 representation of a single Unicode character to a std::string.
Definition: unicode.h:332
unsigned tolower(unsigned ch)
Convert a Unicode character to lowercase.
Definition: unicode.h:376
@ LOWERCASE_LETTER
Letter, lowercase (Ll)
Definition: unicode.h:223
@ MODIFIER_LETTER
Letter, modifier (Lm)
Definition: unicode.h:225
@ OTHER_LETTER
Letter, other (Lo)
Definition: unicode.h:226
@ DECIMAL_DIGIT_NUMBER
Number, decimal digit (Nd)
Definition: unicode.h:230
@ TITLECASE_LETTER
Letter, titlecase (Lt)
Definition: unicode.h:224
bool is_wordchar(unsigned ch)
Test if a given Unicode character is "word character".
Definition: unicode.h:343
bool is_currency(unsigned ch)
Test if a given Unicode character is a currency symbol.
Definition: unicode.h:371
bool is_whitespace(unsigned ch)
Test if a given Unicode character is a whitespace character.
Definition: unicode.h:361
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:79
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
Various assertion macros.
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
Xapian::Query internals.
static void yy_pop_parser_stack(yyParser *pParser)
bool is_not_whitespace(unsigned ch)
const unsigned UNICODE_IGNORE
Value representing "ignore this" when returned by check_infix() or check_infix_digit().
YYCODETYPE lhs
bool should_stem(const string &term)
#define YYCODETYPE
#define VET_BOOL_ARGS(A, B, OP_TXT)
static const YYCODETYPE yy_lookahead[]
static unsigned int yy_find_shift_action(yyParser *pParser, YYCODETYPE iLookAhead)
#define YY_SHIFT_MIN
#define ParseARG_FETCH
static const YYACTIONTYPE yy_action[]
#define ParseARG_PDECL
static void yy_syntax_error(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor)
static const YYACTIONTYPE yy_default[]
static void add_to_query(Query *&q, Query::op op, Query *term)
bool U_isupper(unsigned ch)
struct yyParser yyParser
#define YYACTIONTYPE
#define YYNOCODE
#define YY_MIN_SHIFTREDUCE
#define YY_ERROR_ACTION
static const unsigned short int yy_shift_ofst[]
#define YYNSTATE
#define YY_ACTTAB_COUNT
#define YY_MIN_REDUCE
bool is_not_wordchar(unsigned ch)
bool is_digit(unsigned ch)
static void yy_shift(yyParser *yypParser, int yyNewState, int yyMajor, ParseTOKENTYPE yyMinor)
unsigned check_infix_digit(unsigned ch)
bool is_suffix(unsigned ch)
#define YY_MAX_SHIFT
static const short yy_reduce_ofst[]
static void yy_accept(yyParser *)
static int yy_find_reduce_action(int stateno, YYCODETYPE iLookAhead)
#define ParseARG_SDECL
#define YY_SHIFT_COUNT
bool U_isdigit(unsigned ch)
#define YY_REDUCE_COUNT
#define YY_ACCEPT_ACTION
bool is_stem_preventer(unsigned ch)
#define yytestcase(X)
bool is_positional(Xapian::Query::op op)
#define YYNTOKEN
static void ParseFinalize(yyParser *pParser)
bool is_phrase_generator(unsigned ch)
unsigned check_infix(unsigned ch)
#define ParseARG_STORE
#define YY_MAX_SHIFTREDUCE
#define yyTraceShift(X, Y, Z)
static const struct @13 yyRuleInfo[]
#define ParseTOKENTYPE
bool U_isalpha(unsigned ch)
#define YYSTACKDEPTH
bool prefix_needs_colon(const string &prefix, unsigned ch)
signed char nrhs
#define YY_SHIFT_MAX
static void yy_parse_failed(yyParser *)
bool is_double_quote(unsigned ch)
static void ParseInit(yyParser *pParser)
static void Parse(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor ParseARG_PDECL)
static void yy_reduce(yyParser *yypParser, unsigned int yyruleno, int yyLookahead, ParseTOKENTYPE yyLookaheadToken)
static void yy_destructor(yyParser *yypParser, YYCODETYPE yymajor, YYMINORTYPE *yypminor)
The non-lemon-generated parts of the QueryParser class.
@ NON_BOOLEAN
@ BOOLEAN_EXCLUSIVE
#define HATE_AFTER_AND
#define OR
#define SYNONYM
#define XOR
#define BRA
#define EMPTY_GROUP_OK
#define HATE
#define QUOTE
#define NEAR
#define LOVE
#define TERM
#define UNBROKEN_WORDS
#define PARTIAL_TERM
#define RANGE
#define KET
#define AND
#define NOT
#define ADJ
#define WILD_TERM
#define GROUP_TERM
#define BOOLEAN_FILTER
#define PHR_TERM
static Xapian::Stem stemmer
Definition: stemtest.cc:42
Convert types to std::string.
Various handy helpers which std::string really should provide.
char C_toupper(char ch)
Definition: stringutils.h:226
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
Information about how to handle a field prefix in the query string.
filter_type type
The type of this field.
vector< string > prefixes
Field prefix strings.
void append_filter(const string &grouping, const Query &qnew)
Query merge_filters() const
void add_filter_range(const string &grouping, const Query &range)
void append_filter_range(const string &grouping, const Query &range)
ProbQuery(Query *query_)
map< string, Query > filter
void add_filter(const string &grouping, const Query &q)
Definition: header.h:151
ParseARG_SDECL vector< yyStackEntry > yystack
yyStackEntry(YYACTIONTYPE stateno_, YYCODETYPE major_, ParseTOKENTYPE minor_)
Unicode and UTF-8 related classes and functions.
ParseTOKENTYPE yy0
bool is_unbroken_script(unsigned p)
Definition: word-breaker.cc:71
void get_unbroken(Xapian::Utf8Iterator &it)
Definition: word-breaker.cc:86
bool is_ngram_enabled()
Should we use the n-gram code?
Definition: word-breaker.cc:41
Handle text without explicit word breaks.