xapian-core  2.0.0
queryparser_internal.cc
Go to the documentation of this file.
1 /*
2 ** 2000-05-29
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** Driver template for the LEMON parser generator.
13 **
14 ** Synced with upstream:
15 ** https://www.sqlite.org/src/artifact/468a155e8729cfbccfe1d85bf60d064f1dab76167a51149ec5c7928a2de63953
16 **
17 ** The "lemon" program processes an LALR(1) input grammar file, then uses
18 ** this template to construct a parser. The "lemon" program inserts text
19 ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
20 ** interstitial "-" characters) contained in this template is changed into
21 ** the value of the %name directive from the grammar. Otherwise, the content
22 ** of this template is copied straight through into the generate parser
23 ** source file.
24 **
25 ** The following is the concatenation of all %include directives from the
26 ** input grammar file:
27 */
28 /************ Begin %include sections from the grammar ************************/
29 #line 1 "queryparser/queryparser.lemony"
30 
34 /* Copyright (C) 2004-2026 Olly Betts
35  * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
36  * Copyright (C) 2010 Adam Sjøgren
37  *
38  * This program is free software; you can redistribute it and/or
39  * modify it under the terms of the GNU General Public License as
40  * published by the Free Software Foundation; either version 2 of the
41  * License, or (at your option) any later version.
42  *
43  * This program is distributed in the hope that it will be useful,
44  * but WITHOUT ANY WARRANTY; without even the implied warranty of
45  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46  * GNU General Public License for more details.
47  *
48  * You should have received a copy of the GNU General Public License
49  * along with this program; if not, see
50  * <https://www.gnu.org/licenses/>.
51  */
52 
53 #include <config.h>
54 
55 #include "queryparser_internal.h"
56 
57 #include "api/queryinternal.h"
58 #include "omassert.h"
59 #include "str.h"
60 #include "stringutils.h"
61 #include "xapian/error.h"
62 #include "xapian/unicode.h"
63 
64 // Include the list of token values lemon generates.
65 #include "queryparser_token.h"
66 
67 #include "word-breaker.h"
68 
69 #include <algorithm>
70 #include <cstring>
71 #include <limits>
72 #include <list>
73 #include <string>
74 #include <string_view>
75 #include <vector>
76 
77 // We create the yyParser on the stack.
78 #define Parse_ENGINEALWAYSONSTACK
79 
80 using namespace std;
81 
82 using namespace Xapian;
83 
84 static constexpr unsigned NO_EDIT_DISTANCE = unsigned(-1);
85 static constexpr unsigned DEFAULT_EDIT_DISTANCE = 2;
86 
87 inline bool
88 U_isupper(unsigned ch) {
89  return ch < 128 && C_isupper(static_cast<unsigned char>(ch));
90 }
91 
92 inline bool
93 U_isdigit(unsigned ch) {
94  return ch < 128 && C_isdigit(static_cast<unsigned char>(ch));
95 }
96 
97 inline bool
98 U_isalpha(unsigned ch) {
99  return ch < 128 && C_isalpha(static_cast<unsigned char>(ch));
100 }
101 
103 
104 inline bool
105 is_not_whitespace(unsigned ch) {
106  return !is_whitespace(ch);
107 }
108 
110 
111 inline bool
112 is_not_wordchar(unsigned ch) {
113  return !is_wordchar(ch);
114 }
115 
116 inline bool
117 is_digit(unsigned ch) {
119 }
120 
121 // FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
122 // and there's the risk of hyphens getting stuck onto the end of terms...
123 //
124 // There are currently assumptions below that this only matches ASCII
125 // characters.
126 inline bool
127 is_suffix(unsigned ch) {
128  return ch == '+' || ch == '#';
129 }
130 
131 inline bool
132 is_double_quote(unsigned ch) {
133  // We simply treat all double quotes as equivalent, which is a bit crude,
134  // but it isn't clear that it would actually better to require them to
135  // match up exactly.
136  //
137  // 0x201c is Unicode opening double quote.
138  // 0x201d is Unicode closing double quote.
139  return ch == '"' || ch == 0x201c || ch == 0x201d;
140 }
141 
142 inline bool
143 prefix_needs_colon(const string & prefix, unsigned ch)
144 {
145  if (!U_isupper(ch) && ch != ':') return false;
146  string::size_type len = prefix.length();
147  return (len > 1 && prefix[len - 1] != ':');
148 }
149 
151 
152 inline bool
154 {
155  return (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR);
156 }
157 
158 class Terms;
159 
166 class Term {
168 
169  public:
170  string name;
172  string unstemmed;
176  unsigned edit_distance;
177 
178  Term(const string &name_, termpos pos_)
179  : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
180  explicit Term(const string &name_)
181  : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
182  Term(const string &name_, const FieldInfo * field_info_)
183  : name(name_), field_info(field_info_),
184  stem(QueryParser::STEM_NONE), pos(0) { }
185  explicit Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
186  Term(State * state_, const string &name_, const FieldInfo * field_info_,
187  const string &unstemmed_,
188  QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
189  termpos pos_ = 0,
190  unsigned edit_distance_ = NO_EDIT_DISTANCE)
191  : state(state_), name(name_), field_info(field_info_),
192  unstemmed(unstemmed_), stem(stem_), pos(pos_),
193  edit_distance(edit_distance_) { }
194  // For RANGE tokens.
195  Term(const Xapian::Query & q, const string & grouping)
196  : name(grouping), query(q) { }
197 
198  string make_term(const string & prefix) const;
199 
200  void need_positions() {
201  if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
202  }
203 
204  termpos get_termpos() const { return pos; }
205 
206  string get_grouping() const {
207  return field_info->grouping;
208  }
209 
210  Query * as_fuzzy_query(State * state) const;
211 
212  Query * as_wildcarded_query(State * state) const;
213 
222  Query * as_partial_query(State * state_) const;
223 
225  Query* as_unbroken_query() const;
226 
228  void as_positional_unbroken(Terms* terms) const;
229 
231  Query as_range_query() const;
232 
233  Query get_query() const;
234 
235  Query get_query_with_synonyms() const;
236 
237  Query get_query_with_auto_synonyms() const;
238 };
239 
241 class State {
243 
244  public:
246  const char* error = NULL;
247  unsigned flags;
248  unsigned int should_stem_mask =
251  (1 << Unicode::MODIFIER_LETTER) |
252  (1 << Unicode::OTHER_LETTER);
254 
255  State(QueryParser::Internal * qpi_, unsigned flags_)
256  : qpi(qpi_), flags(flags_), effective_default_op(qpi_->default_op)
257  {
258  if ((flags & QueryParser::FLAG_NO_PROPER_NOUN_HEURISTIC) ||
259  qpi->stemmer.is_none() ||
260  !qpi->stemmer.internal->use_proper_noun_heuristic()) {
261  should_stem_mask |= (1 << Unicode::UPPERCASE_LETTER);
262  }
263  if ((flags & QueryParser::FLAG_NO_POSITIONS)) {
264  if (is_positional(effective_default_op)) {
265  effective_default_op = Query::OP_AND;
266  }
267  }
268  }
269 
270  string stem_term(const string &term) {
271  return qpi->stemmer(term);
272  }
273 
274  void add_to_stoplist(const Term * term) {
275  qpi->stoplist.push_back(term->name);
276  }
277 
278  void add_to_unstem(const string & term, const string & unstemmed) {
279  qpi->unstem.insert(make_pair(term, unstemmed));
280  }
281 
282  Term * range(const string &a, const string &b) {
283  for (auto i : qpi->rangeprocs) {
284  Xapian::Query range_query = (i.proc)->check_range(a, b);
285  Xapian::Query::op op = range_query.get_type();
286  switch (op) {
288  break;
292  if (i.default_grouping) {
294  static_cast<Xapian::Internal::QueryValueBase*>(
295  range_query.internal.get());
296  Xapian::valueno slot = base->get_slot();
297  return new Term(range_query, str(slot));
298  }
299  // FALLTHRU
301  return new Term(range_query, i.grouping);
302  default:
303  return new Term(range_query, string());
304  }
305  }
306  return NULL;
307  }
308 
310  return effective_default_op;
311  }
312 
313  bool is_stopword(const Term *term) const {
314  return qpi->stopper && (*qpi->stopper)(term->name);
315  }
316 
318  return qpi->db;
319  }
320 
321  const Stopper * get_stopper() const {
322  return qpi->stopper.get();
323  }
324 
326  return qpi->stop_mode;
327  }
328 
329  size_t stoplist_size() const {
330  return qpi->stoplist.size();
331  }
332 
333  void stoplist_resize(size_t s) {
334  qpi->stoplist.resize(s);
335  }
336 
338  return qpi->max_wildcard_expansion;
339  }
340 
341  int get_max_wildcard_type() const {
342  return qpi->max_wildcard_type;
343  }
344 
345  unsigned get_min_wildcard_prefix_len() const {
346  return qpi->min_wildcard_prefix_len;
347  }
348 
350  return qpi->max_partial_expansion;
351  }
352 
353  int get_max_partial_type() const {
354  return qpi->max_partial_type;
355  }
356 
357  unsigned get_min_partial_prefix_len() const {
358  return qpi->min_partial_prefix_len;
359  }
360 
362  return qpi->max_fuzzy_expansion;
363  }
364 
365  int get_max_fuzzy_type() const {
366  return qpi->max_fuzzy_type;
367  }
368 };
369 
370 string
371 Term::make_term(const string & prefix) const
372 {
373  if (state->get_stopper_strategy() == QueryParser::STOP_ALL) {
374  const Xapian::Stopper* stopper = state->get_stopper();
375  if (stopper && (*stopper)(name)) {
376  return string();
377  }
378  }
379 
380  string term;
381  if (stem != QueryParser::STEM_NONE && stem != QueryParser::STEM_ALL)
382  term += 'Z';
383  if (!prefix.empty()) {
384  term += prefix;
385  if (prefix_needs_colon(prefix, name[0])) term += ':';
386  }
387  if (stem != QueryParser::STEM_NONE) {
388  term += state->stem_term(name);
389  } else {
390  term += name;
391  }
392 
393  if (!unstemmed.empty())
394  state->add_to_unstem(term, unstemmed);
395  return term;
396 }
397 
398 // Iterator shim to allow building a synonym query from a TermIterator pair.
401 
403 
405 
406  public:
408  Xapian::termpos pos_ = 0,
409  const Xapian::Query * first_ = NULL)
410  : i(i_), pos(pos_), first(first_) { }
411 
413  if (first)
414  first = NULL;
415  else
416  ++i;
417  return *this;
418  }
419 
420  const Xapian::Query operator*() const {
421  if (first) return *first;
422  return Xapian::Query(*i, 1, pos);
423  }
424 
425  bool operator==(const SynonymIterator & o) const {
426  return i == o.i && first == o.first;
427  }
428 
429  bool operator!=(const SynonymIterator & o) const {
430  return !(*this == o);
431  }
432 
433  typedef std::input_iterator_tag iterator_category;
438 };
439 
440 Query
442 {
443  // Handle single-word synonyms with each prefix.
444  const auto& prefixes = field_info->prefixes;
445  if (prefixes.empty()) {
446  Assert(field_info->proc);
447  return (*field_info->proc)(name);
448  }
449 
450  Query q = get_query();
451 
452  for (auto&& prefix : prefixes) {
453  // First try the unstemmed term:
454  string term;
455  if (!prefix.empty()) {
456  term += prefix;
457  if (prefix_needs_colon(prefix, name[0])) term += ':';
458  }
459  term += name;
460 
461  Xapian::Database db = state->get_database();
464  if (syn == end && stem != QueryParser::STEM_NONE) {
465  // If that has no synonyms, try the stemmed form:
466  term = 'Z';
467  if (!prefix.empty()) {
468  term += prefix;
469  if (prefix_needs_colon(prefix, name[0])) term += ':';
470  }
471  term += state->stem_term(name);
472  syn = db.synonyms_begin(term);
473  end = db.synonyms_end(term);
474  }
475  q = Query(q.OP_SYNONYM,
476  SynonymIterator(syn, pos, &q),
477  SynonymIterator(end));
478  }
479  return q;
480 }
481 
482 Query
484 {
485  const unsigned MASK_ENABLE_AUTO_SYNONYMS =
486  QueryParser::FLAG_AUTO_SYNONYMS |
487  QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS;
488  if (state->flags & MASK_ENABLE_AUTO_SYNONYMS)
489  return get_query_with_synonyms();
490 
491  return get_query();
492 }
493 
494 static void
496 {
497  Assert(term);
498  if (q) {
499  if (op == Query::OP_OR) {
500  *q |= *term;
501  } else if (op == Query::OP_AND) {
502  *q &= *term;
503  } else {
504  *q = Query(op, *q, *term);
505  }
506  delete term;
507  } else {
508  q = term;
509  }
510 }
511 
512 static void
514 {
515  if (q) {
516  if (op == Query::OP_OR) {
517  *q |= term;
518  } else if (op == Query::OP_AND) {
519  *q &= term;
520  } else {
521  *q = Query(op, *q, term);
522  }
523  } else {
524  q = new Query(term);
525  }
526 }
527 
528 Query
530 {
531  const auto& prefixes = field_info->prefixes;
532  if (prefixes.empty()) {
533  Assert(field_info->proc);
534  return (*field_info->proc)(name);
535  }
536  auto piter = prefixes.begin();
537  const string& term = make_term(*piter);
538  if (term.empty()) return Query();
539  Query q(term, 1, pos);
540  while (++piter != prefixes.end()) {
541  q |= Query(make_term(*piter), 1, pos);
542  }
543  return q;
544 }
545 
546 Query *
548 {
549  const auto& prefixes = field_info->prefixes;
551  int query_flags = state_->get_max_fuzzy_type();
552  vector<Query> subqs;
553  subqs.reserve(prefixes.size());
554  for (auto&& prefix : prefixes) {
555  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
556  subqs.emplace_back(Query::OP_EDIT_DISTANCE,
557  prefix + name,
558  max,
559  query_flags,
560  Query::OP_OR,
561  edit_distance,
562  prefix.size());
563  }
564  Query* q = new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
565  delete this;
566  return q;
567 }
568 
569 Query *
571 {
572  const auto& prefixes = field_info->prefixes;
574  int query_flags = state_->get_max_wildcard_type();
575  if (state_->flags & QueryParser::FLAG_WILDCARD_SINGLE)
576  query_flags |= Query::WILDCARD_PATTERN_SINGLE;
577  if (state_->flags & QueryParser::FLAG_WILDCARD_MULTI)
578  query_flags |= Query::WILDCARD_PATTERN_MULTI;
579  vector<Query> subqs;
580  subqs.reserve(prefixes.size());
581  for (string root : prefixes) {
582  root += name;
583  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
584  subqs.push_back(Query(Query::OP_WILDCARD, root, max, query_flags,
585  Query::OP_OR));
586  }
587  Query * q = new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
588  delete this;
589  return q;
590 }
591 
592 Query *
594 {
596  int max_type = state_->get_max_partial_type();
597  vector<Query> subqs_partial; // A synonym of all the partial terms.
598  vector<Query> subqs_full; // A synonym of all the full terms.
599 
600  for (const string& prefix : field_info->prefixes) {
601  string root = prefix;
602  root += name;
603  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
604  subqs_partial.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
605  Query::OP_OR));
606  if (!state->is_stopword(this)) {
607  // Add the term, as it would normally be handled, as an alternative
608  // (unless it is a stopword).
609  subqs_full.push_back(Query(make_term(prefix), 1, pos));
610  }
611  }
612  Query * q = new Query(Query::OP_OR,
613  Query(Query::OP_SYNONYM,
614  subqs_partial.begin(), subqs_partial.end()),
615  Query(Query::OP_SYNONYM,
616  subqs_full.begin(), subqs_full.end()));
617  delete this;
618  return q;
619 }
620 
621 Query *
623 {
624  const auto& prefixes = field_info->prefixes;
625  Query *q;
626  vector<Query> prefix_subqs;
627 
628 #ifdef USE_ICU
629  if (state->flags & QueryParser::FLAG_WORD_BREAKS) {
630  for (WordIterator tk(name); tk != WordIterator(); ++tk) {
631  const string& token = *tk;
632  for (const string& prefix : prefixes) {
633  prefix_subqs.push_back(Query(prefix + token, 1, pos));
634  }
635  }
636 
637  q = new Query(Query::OP_AND, prefix_subqs.begin(), prefix_subqs.end());
638 
639  delete this;
640  return q;
641  }
642 #endif
643 
644  vector<Query> ngram_subqs;
645 
646  for (const string& prefix : prefixes) {
647  for (NgramIterator tk(name); tk != NgramIterator(); ++tk) {
648  ngram_subqs.push_back(Query(prefix + *tk, 1, pos));
649  }
650  prefix_subqs.push_back(Query(Query::OP_AND,
651  ngram_subqs.begin(), ngram_subqs.end()));
652  ngram_subqs.clear();
653  }
654  q = new Query(Query::OP_OR, prefix_subqs.begin(), prefix_subqs.end());
655 
656  delete this;
657  return q;
658 }
659 
660 Query
662 {
663  Query q = query;
664  delete this;
665  return q;
666 }
667 
668 inline bool
670 {
671  // These characters generate a phrase search.
672  // Ordered mostly by frequency of calls to this function done when
673  // running the testcases in api_queryparser.cc.
674  return (ch && ch < 128 && strchr(".-/:\\@", ch) != NULL);
675 }
676 
677 inline bool
678 is_stem_preventer(unsigned ch)
679 {
680  return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
681 }
682 
683 inline bool
684 should_stem(const string& term, const State& state)
685 {
686  Utf8Iterator u(term);
687  return ((state.should_stem_mask >> Unicode::get_category(*u)) & 1);
688 }
689 
693 const unsigned UNICODE_IGNORE = numeric_limits<unsigned>::max();
694 
695 inline unsigned check_infix(unsigned ch) {
696  if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
697  // Unicode includes all these except '&' in its word boundary rules,
698  // as well as 0x2019 (which we handle below) and ':' (for Swedish
699  // apparently, but we ignore this for now as it's problematic in
700  // real world cases:
701  // https://en.wikipedia.org/wiki/Colon_(punctuation)#Usage_in_other_languages
702  // ).
703  return ch;
704  }
705  if (ch >= 0x200c) {
706  // 0x2019 is Unicode apostrophe and single closing quote.
707  // 0x201b is Unicode single opening quote with the tail rising.
708  if (ch == 0x2019 || ch == 0x201b)
709  return '\'';
710  // 0x200c and 0x200d are zero width non-joiner and joiner respectively.
711  // 0x2060 and 0xfeff are word joiners (0xfeff deprecated since Unicode
712  // 3.2).
713  if (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff)
714  return UNICODE_IGNORE;
715  }
716  // 0xad is SOFT HYPHEN which marks a potential hyphenation point in a word.
717  if (ch == 0xad)
718  return UNICODE_IGNORE;
719  return 0;
720 }
721 
722 inline unsigned check_infix_digit(unsigned ch) {
723  // This list of characters comes from Unicode's word identifying algorithm.
724  switch (ch) {
725  case ',':
726  case '.':
727  case ';':
728  case 0x037e: // GREEK QUESTION MARK
729  case 0x0589: // ARMENIAN FULL STOP
730  case 0x060D: // ARABIC DATE SEPARATOR
731  case 0x07F8: // NKO COMMA
732  case 0x2044: // FRACTION SLASH
733  case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
734  case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
735  case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
736  return ch;
737  }
738  if (ch >= 0x200b && (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff))
739  return UNICODE_IGNORE;
740  return 0;
741 }
742 
743 // Prototype a function lemon generates, but which we want to call before that
744 // in the generated source code file.
745 struct yyParser;
746 static void yy_parse_failed(yyParser *);
747 
748 void
749 QueryParser::Internal::add_prefix(string_view field, string_view prefix)
750 {
751  // Allow for optional trailing `:` for consistency with how range prefixes
752  // are specified.
753  if (!field.empty() && field.back() == ':') {
754  field = field.substr(0, field.size() - 1);
755  }
756 #ifdef __cpp_lib_associative_heterogeneous_insertion // C++26
757  auto [it, inserted] = field_map.try_emplace(field, NON_BOOLEAN);
758 #else
759  auto [it, inserted] = field_map.try_emplace(string(field), NON_BOOLEAN);
760 #endif
761  auto&& p = it->second;
762  if (inserted) {
763  p.append(prefix);
764  return;
765  }
766 
767  // Check that this is the same type of filter as the existing one(s).
768  if (p.type != NON_BOOLEAN) {
769  throw Xapian::InvalidOperationError("Can't use add_prefix() and "
770  "add_boolean_prefix() on the "
771  "same field name, or "
772  "add_boolean_prefix() with "
773  "different values of the "
774  "'exclusive' parameter");
775  }
776  if (p.proc)
777  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects "
778  "and string prefixes currently "
779  "not supported");
780  // Only add if it's not already there as duplicate entries just result
781  // in redundant query terms. This is a linear scan so makes calling
782  // add_prefix() n times for the same field value with different prefix
783  // values O(n²), but you wouldn't realistically want to map one field
784  // to more than a handful of prefixes.
785  auto& prefixes = p.prefixes;
786  if (find(prefixes.begin(), prefixes.end(), prefix) == prefixes.end()) {
787  p.append(prefix);
788  }
789 }
790 
791 void
792 QueryParser::Internal::add_prefix(string_view field, FieldProcessor* proc)
793 {
794  // Allow for optional trailing `:` for consistency with how range prefixes
795  // are specified.
796  if (!field.empty() && field.back() == ':') {
797  field = field.substr(0, field.size() - 1);
798  }
799 #ifdef __cpp_lib_associative_heterogeneous_insertion // C++26
800  auto [it, inserted] = field_map.try_emplace(field,
801  NON_BOOLEAN, proc);
802 #else
803  auto [it, inserted] = field_map.try_emplace(string(field),
804  NON_BOOLEAN, proc);
805 #endif
806  if (inserted)
807  return;
808 
809  auto&& p = it->second;
810  // Check that this is the same type of filter as the existing one(s).
811  if (p.type != NON_BOOLEAN) {
812  throw Xapian::InvalidOperationError("Can't use add_prefix() and "
813  "add_boolean_prefix() on the "
814  "same field name, or "
815  "add_boolean_prefix() with "
816  "different values of the "
817  "'exclusive' parameter");
818  }
819  if (!p.prefixes.empty())
820  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects "
821  "and string prefixes currently "
822  "not supported");
823  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects "
824  "for the same prefix currently not "
825  "supported");
826 }
827 
828 void
829 QueryParser::Internal::add_boolean_prefix(string_view field,
830  string_view prefix,
831  const string* grouping)
832 {
833  // Allow for optional trailing `:` for consistency with how range prefixes
834  // are specified.
835  if (!field.empty() && field.back() == ':') {
836  field = field.substr(0, field.size() - 1);
837  }
838  // Don't allow the empty prefix to be set as boolean as it doesn't
839  // really make sense.
840  if (field.empty())
841  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
842  // If grouping == nullptr then it defaults to field which is not empty().
843  bool inclusive = grouping && grouping->empty();
844  filter_type type = inclusive ? BOOLEAN : BOOLEAN_EXCLUSIVE;
845 #ifdef __cpp_lib_associative_heterogeneous_insertion // C++26
846  auto [it, inserted] = field_map.try_emplace(field, type,
847  grouping ? *grouping : field);
848 #else
849  auto [it, inserted] = field_map.try_emplace(string(field), type,
850  grouping ? *grouping : field);
851 #endif
852  auto&& p = it->second;
853  if (inserted) {
854  p.append(prefix);
855  return;
856  }
857 
858  // Check that this is the same type of filter as the existing one(s).
859  if (p.type != type) {
860  throw Xapian::InvalidOperationError("Can't use add_prefix() and "
861  "add_boolean_prefix() on the "
862  "same field name, or "
863  "add_boolean_prefix() with "
864  "different values of the "
865  "'exclusive' parameter"); // FIXME
866  }
867  if (p.proc)
868  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects "
869  "and string prefixes currently "
870  "not supported");
871  // Only add if it's not already there as duplicate entries just result
872  // in redundant query terms. This is a linear scan so makes calling
873  // add_prefix() n times for the same field value with different prefix
874  // values O(n²), but you wouldn't realistically want to map one field
875  // to more than a handful of prefixes.
876  auto& prefixes = p.prefixes;
877  if (find(prefixes.begin(), prefixes.end(), prefix) == prefixes.end()) {
878  prefixes.emplace_back(prefix); // FIXME grouping
879  }
880 }
881 
882 void
883 QueryParser::Internal::add_boolean_prefix(string_view field,
884  FieldProcessor *proc,
885  const string* grouping)
886 {
887  // Allow for optional trailing `:` for consistency with how range prefixes
888  // are specified.
889  if (!field.empty() && field.back() == ':') {
890  field = field.substr(0, field.size() - 1);
891  }
892  // Don't allow the empty prefix to be set as boolean as it doesn't
893  // really make sense.
894  if (field.empty())
895  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
896  // If grouping == nullptr then it defaults to field which is not empty().
897  bool inclusive = grouping && grouping->empty();
898  filter_type type = inclusive ? BOOLEAN : BOOLEAN_EXCLUSIVE;
899 #ifdef __cpp_lib_associative_heterogeneous_insertion // C++26
900  auto [it, inserted] = field_map.try_emplace(field, type, proc,
901  grouping ? *grouping : field);
902 #else
903  auto [it, inserted] = field_map.try_emplace(string(field), type, proc,
904  grouping ? *grouping : field);
905 #endif
906  if (inserted)
907  return;
908 
909  auto&& p = it->second;
910  // Check that this is the same type of filter as the existing one(s).
911  if (p.type != type) {
912  throw Xapian::InvalidOperationError("Can't use add_prefix() and "
913  "add_boolean_prefix() on the "
914  "same field name, or "
915  "add_boolean_prefix() with "
916  "different values of the "
917  "'exclusive' parameter"); // FIXME
918  }
919  if (!p.prefixes.empty())
920  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects "
921  "and string prefixes currently "
922  "not supported");
923  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects "
924  "for the same prefix currently not "
925  "supported");
926 }
927 
928 inline bool
929 is_extended_wildcard(unsigned ch, unsigned flags)
930 {
931  if (ch == '*') return (flags & QueryParser::FLAG_WILDCARD_MULTI);
932  if (ch == '?') return (flags & QueryParser::FLAG_WILDCARD_SINGLE);
933  return false;
934 }
935 
936 string
937 QueryParser::Internal::parse_term(Utf8Iterator& it, const Utf8Iterator& end,
938  bool try_word_break, unsigned flags,
939  bool& needs_word_break, bool& was_acronym,
940  size_t& first_wildcard,
941  size_t& char_count,
942  unsigned& edit_distance)
943 {
944  string term;
945  char_count = 0;
946  // Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
947  // Don't worry if there's a trailing '.' or not.
948  if (U_isupper(*it)) {
949  string t;
950  Utf8Iterator p = it;
951  do {
952  Unicode::append_utf8(t, *p++);
953  ++char_count;
954  } while (p != end && *p == '.' && ++p != end && U_isupper(*p));
955  // One letter does not make an acronym! If we handled a single
956  // uppercase letter here, we wouldn't catch M&S below.
957  if (t.length() > 1) {
958  // Check there's not a (lower case) letter or digit
959  // immediately after it.
960  // FIXME: should I.B.M..P.T.O be a range search?
961  if (p == end || !is_wordchar(*p)) {
962  it = p;
963  swap(term, t);
964  } else {
965  char_count = 0;
966  }
967  }
968  }
969  was_acronym = !term.empty();
970 
971  if (try_word_break && term.empty() && is_unbroken_script(*it)) {
972  const char* start = it.raw();
973  char_count = get_unbroken(it);
974  term.assign(start, it.raw() - start);
975  needs_word_break = true;
976  }
977 
978  if (term.empty()) {
979  unsigned prevch = *it;
980  if (first_wildcard == term.npos &&
981  is_extended_wildcard(prevch, flags)) {
982  // Leading wildcard.
983  first_wildcard = 0;
984  }
985  Unicode::append_utf8(term, prevch);
986  char_count = 1;
987  while (++it != end) {
988  if (try_word_break && is_unbroken_script(*it)) break;
989  unsigned ch = *it;
990  if (is_extended_wildcard(ch, flags)) {
991  if (first_wildcard == term.npos) {
992  first_wildcard = char_count;
993  }
994  } else if (!is_wordchar(ch)) {
995  // Treat a single embedded '&' or "'" or similar as a word
996  // character (e.g. AT&T, Fred's). Also, normalise
997  // apostrophes to ASCII apostrophe.
998  Utf8Iterator p = it;
999  ++p;
1000  if (p == end) break;
1001  unsigned nextch = *p;
1002  if (is_extended_wildcard(nextch, flags)) {
1003  // A wildcard follows, which could expand to a digit or a non-digit.
1004  unsigned ch_orig = ch;
1005  ch = check_infix(ch);
1006  if (!ch && is_digit(prevch))
1007  ch = check_infix_digit(ch_orig);
1008  if (!ch)
1009  break;
1010  } else {
1011  if (!is_wordchar(nextch)) break;
1012  }
1013  if (is_digit(prevch) && is_digit(nextch)) {
1014  ch = check_infix_digit(ch);
1015  } else {
1016  ch = check_infix(ch);
1017  }
1018  if (!ch) break;
1019  if (ch == UNICODE_IGNORE)
1020  continue;
1021  }
1023  ++char_count;
1024  prevch = ch;
1025  }
1026  if (it != end && is_suffix(*it)) {
1027  string suff_term = term;
1028  Utf8Iterator p = it;
1029  // Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
1030  do {
1031  // Assumes is_suffix() only matches ASCII.
1032  if (suff_term.size() - term.size() == 3) {
1033  suff_term.resize(0);
1034  break;
1035  }
1036  suff_term += *p;
1037  } while (is_suffix(*++p));
1038  if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
1039  // If the suffixed term doesn't exist, check that the
1040  // non-suffixed term does. This also takes care of
1041  // the case when QueryParser::set_database() hasn't
1042  // been called.
1043  bool use_suff_term = false;
1044  string lc = Unicode::tolower(suff_term);
1045  if (db.term_exists(lc)) {
1046  use_suff_term = true;
1047  } else {
1048  lc = Unicode::tolower(term);
1049  if (!db.term_exists(lc)) use_suff_term = true;
1050  }
1051  if (use_suff_term) {
1052  // Assumes is_suffix() only matches ASCII.
1053  char_count += (suff_term.size() - term.size());
1054  term = suff_term;
1055  it = p;
1056  }
1057  }
1058  }
1059  if (first_wildcard == term.npos &&
1060  (flags & QueryParser::FLAG_WILDCARD)) {
1061  // Check for right-truncation.
1062  if (it != end && *it == '*') {
1063  ++it;
1064  first_wildcard = char_count;
1065  }
1066  }
1067  if (it != end &&
1068  (flags & QueryParser::FLAG_FUZZY) &&
1069  // Not a wildcard.
1070  first_wildcard == string::npos &&
1071  *it == '~') {
1072  Utf8Iterator p = it;
1073  ++p;
1074  unsigned ch = *p;
1075  if (p == end || is_whitespace(ch) || ch == ')') {
1076  it = p;
1077  edit_distance = DEFAULT_EDIT_DISTANCE;
1078  } else if (U_isdigit(ch)) {
1079  unsigned distance = ch - '0';
1080  while (++p != end && U_isdigit(*p)) {
1081  distance = distance * 10 + (*p - '0');
1082  }
1083  if (p != end && *p == '.') {
1084  if (distance == 0) goto fractional;
1085  // Ignore the fractional part on e.g. foo~12.5
1086  while (++p != end && U_isdigit(*p)) { }
1087  }
1088  if (p == end || is_whitespace(ch) || ch == ')') {
1089  it = p;
1090  edit_distance = distance;
1091  }
1092  } else if (ch == '.') {
1093 fractional:
1094  double fraction = 0.0;
1095  double digit = 0.1;
1096  while (++p != end && U_isdigit(*p)) {
1097  fraction += digit * (*p - '0');
1098  digit *= 0.1;
1099  }
1100  if (p == end || is_whitespace(ch) || ch == ')') {
1101  it = p;
1102  unsigned codepoints = 0;
1103  for (Utf8Iterator u8(term); u8 != Utf8Iterator(); ++u8) {
1104  ++codepoints;
1105  }
1106  edit_distance = unsigned(codepoints * fraction);
1107  }
1108  }
1109  }
1110  }
1111  return term;
1112 }
1113 
1114 #line 1771 "queryparser/queryparser.lemony"
1115 
1116 
1117 struct ProbQuery {
1118  Query* query = NULL;
1119  Query* love = NULL;
1120  Query* hate = NULL;
1121  // filter is a map from prefix to a query for that prefix. Queries with
1122  // the same prefix are combined with OR, and the results of this are
1123  // combined with AND to get the full filter.
1124  map<string, Query> filter;
1125 
1127 
1128  explicit
1129  ProbQuery(Query* query_) : query(query_) {}
1130 
1132  delete query;
1133  delete love;
1134  delete hate;
1135  }
1136 
1137  void add_filter(const string& grouping, const Query & q) {
1138  filter[grouping] = q;
1139  }
1140 
1141  void append_filter(const string& grouping, const Query & qnew) {
1142  auto it = filter.find(grouping);
1143  if (it == filter.end()) {
1144  filter.insert(make_pair(grouping, qnew));
1145  } else {
1146  Query & q = it->second;
1147  // We OR multiple filters with the same prefix if they're
1148  // exclusive, otherwise we AND them.
1149  bool exclusive = !grouping.empty();
1150  if (exclusive) {
1151  q |= qnew;
1152  } else {
1153  q &= qnew;
1154  }
1155  }
1156  }
1157 
1158  void add_filter_range(const string& grouping, const Query & range) {
1159  filter[grouping] = range;
1160  }
1161 
1162  void append_filter_range(const string& grouping, const Query & range) {
1163  Query & q = filter[grouping];
1164  q |= range;
1165  }
1166 
1168  auto i = filter.begin();
1169  Assert(i != filter.end());
1170  Query q = i->second;
1171  while (++i != filter.end()) {
1172  q &= i->second;
1173  }
1174  return q;
1175  }
1176 };
1177 
1179 class TermGroup {
1180  vector<Term *> terms;
1181 
1187  bool empty_ok;
1188 
1189  TermGroup(Term* t1, Term* t2) : empty_ok(false) {
1190  add_term(t1);
1191  add_term(t2);
1192  }
1193 
1194  public:
1196  static TermGroup* create(Term* t1, Term* t2) {
1197  return new TermGroup(t1, t2);
1198  }
1199 
1201  for (auto&& t : terms) {
1202  delete t;
1203  }
1204  }
1205 
1207  void add_term(Term * term) {
1208  terms.push_back(term);
1209  }
1210 
1212  void set_empty_ok() { empty_ok = true; }
1213 
1215  Query * as_group(State *state) const;
1216 };
1217 
1218 Query *
1220 {
1221  const Xapian::Stopper * stopper = state->get_stopper();
1222  size_t stoplist_size = state->stoplist_size();
1223  bool default_op_is_positional = is_positional(state->default_op());
1224 reprocess:
1225  Query::op default_op = state->default_op();
1226  vector<Query> subqs;
1227  subqs.reserve(terms.size());
1228  if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
1229  // Check for multi-word synonyms.
1230  Database db = state->get_database();
1231 
1232  string key;
1233  vector<Term*>::size_type begin = 0;
1234  vector<Term*>::size_type i = begin;
1235  while (terms.size() - i > 0) {
1236  size_t longest_match = 0;
1237  // This value is never used, but GCC 4.8 warns with
1238  // -Wmaybe-uninitialized (GCC 5.4 doesn't).
1239  vector<Term*>::size_type longest_match_end = 0;
1240  if (terms.size() - i >= 2) {
1241  // Greedily try to match as many consecutive words as possible.
1242  key = terms[i]->name;
1243  key += ' ';
1244  key += terms[i + 1]->name;
1245  TermIterator synkey(db.synonym_keys_begin(key));
1246  TermIterator synend(db.synonym_keys_end(key));
1247  if (synkey != synend) {
1248  longest_match = key.size();
1249  longest_match_end = i + 2;
1250  for (auto j = i + 2; j < terms.size(); ++j) {
1251  key += ' ';
1252  key += terms[j]->name;
1253  synkey.skip_to(key);
1254  if (synkey == synend)
1255  break;
1256  const string& found = *synkey;
1257  if (!startswith(found, key))
1258  break;
1259  if (found.size() == key.size()) {
1260  longest_match = key.size();
1261  longest_match_end = j + 1;
1262  }
1263  }
1264  }
1265  }
1266  if (longest_match == 0) {
1267  // No multi-synonym matches at position i.
1268  if (stopper && (*stopper)(terms[i]->name)) {
1269  state->add_to_stoplist(terms[i]);
1270  } else {
1271  if (default_op_is_positional)
1272  terms[i]->need_positions();
1273  subqs.push_back(terms[i]->get_query_with_auto_synonyms());
1274  }
1275  begin = ++i;
1276  continue;
1277  }
1278  i = longest_match_end;
1279  key.resize(longest_match);
1280 
1281  vector<Query> subqs2;
1282  for (auto j = begin; j != i; ++j) {
1283  if (stopper && (*stopper)(terms[j]->name)) {
1284  state->add_to_stoplist(terms[j]);
1285  } else {
1286  if (default_op_is_positional)
1287  terms[i]->need_positions();
1288  subqs2.push_back(terms[j]->get_query());
1289  }
1290  }
1291  Query q_original_terms;
1292  if (default_op_is_positional) {
1293  q_original_terms = Query(default_op,
1294  subqs2.begin(), subqs2.end(),
1295  subqs2.size() + 9);
1296  } else {
1297  q_original_terms = Query(default_op,
1298  subqs2.begin(), subqs2.end());
1299  }
1300  subqs2.clear();
1301 
1302  // Use the position of the first term for the synonyms.
1303  TermIterator syn = db.synonyms_begin(key);
1304  Query q(Query::OP_SYNONYM,
1305  SynonymIterator(syn, terms[begin]->pos, &q_original_terms),
1306  SynonymIterator(db.synonyms_end(key)));
1307  subqs.push_back(q);
1308 
1309  begin = i;
1310  }
1311  } else {
1312  vector<Term*>::const_iterator i;
1313  for (i = terms.begin(); i != terms.end(); ++i) {
1314  if (stopper && (*stopper)((*i)->name)) {
1315  state->add_to_stoplist(*i);
1316  } else {
1317  if (default_op_is_positional)
1318  (*i)->need_positions();
1319  subqs.push_back((*i)->get_query_with_auto_synonyms());
1320  }
1321  }
1322  }
1323 
1324  if (!empty_ok && stopper &&
1325  state->get_stopper_strategy() != QueryParser::STOP_ALL &&
1326  subqs.empty() &&
1327  stoplist_size < state->stoplist_size()) {
1328  // This group is all stopwords, so roll-back, disable stopper
1329  // temporarily, and reprocess this group.
1330  state->stoplist_resize(stoplist_size);
1331  stopper = NULL;
1332  goto reprocess;
1333  }
1334 
1335  Query * q = NULL;
1336  if (!subqs.empty()) {
1337  if (default_op_is_positional) {
1338  q = new Query(default_op, subqs.begin(), subqs.end(),
1339  subqs.size() + 9);
1340  } else {
1341  q = new Query(default_op, subqs.begin(), subqs.end());
1342  }
1343  }
1344  delete this;
1345  return q;
1346 }
1347 
1349 class Terms {
1350  vector<Term *> terms;
1351 
1357  size_t window;
1358 
1369 
1373  const vector<string>* prefixes;
1374 
1376  const vector<Query>& v,
1377  Xapian::termcount w) const {
1378  if (op == Query::OP_AND) {
1379  return Query(op, v.begin(), v.end());
1380  }
1381  return Query(op, v.begin(), v.end(), w);
1382  }
1383 
1386  if (window == size_t(-1)) op = Query::OP_AND;
1387  Query * q = NULL;
1388  size_t n_terms = terms.size();
1389  Xapian::termcount w = w_delta + terms.size();
1390  if (uniform_prefixes) {
1391  if (prefixes) {
1392  for (auto&& prefix : *prefixes) {
1393  vector<Query> subqs;
1394  subqs.reserve(n_terms);
1395  for (Term* t : terms) {
1396  const string& term = t->make_term(prefix);
1397  if (term.empty()) continue;
1398  subqs.push_back(Query(term, 1, t->pos));
1399  }
1400  add_to_query(q, Query::OP_OR, opwindow_subq(op, subqs, w));
1401  }
1402  }
1403  } else {
1404  vector<Query> subqs;
1405  subqs.reserve(n_terms);
1406  for (Term* t : terms) {
1407  Query query = t->get_query();
1408  if (query.get_type() == query.LEAF_MATCH_ALL) continue;
1409  subqs.push_back(query);
1410  }
1411  q = new Query(opwindow_subq(op, subqs, w));
1412  }
1413 
1414  delete this;
1415  return q;
1416  }
1417 
1418  explicit Terms(bool no_pos)
1419  : window(no_pos ? size_t(-1) : 0),
1420  uniform_prefixes(true),
1421  prefixes(NULL) { }
1422 
1423  public:
1425  static Terms* create(State* state) {
1426  return new Terms(state->flags & QueryParser::FLAG_NO_POSITIONS);
1427  }
1428 
1430  for (auto&& t : terms) {
1431  delete t;
1432  }
1433  }
1434 
1437  const auto& term_prefixes = term->field_info->prefixes;
1438  if (terms.empty()) {
1439  prefixes = &term_prefixes;
1440  } else if (uniform_prefixes && prefixes != &term_prefixes) {
1441  if (*prefixes != term_prefixes) {
1442  prefixes = NULL;
1443  uniform_prefixes = false;
1444  }
1445  }
1446  term->need_positions();
1447  terms.push_back(term);
1448  }
1449 
1450  void adjust_window(size_t alternative_window) {
1451  if (alternative_window > window) window = alternative_window;
1452  }
1453 
1456  return as_opwindow_query(Query::OP_PHRASE, 0);
1457  }
1458 
1461  string name;
1462  termpos pos = 0;
1463  for (Term* t : terms) {
1464  if (!name.empty()) {
1465  name += ' ';
1466  } else {
1467  pos = t->pos;
1468  }
1469  name += t->name;
1470  }
1471 
1472  for (auto&& prefix : *prefixes) {
1473  // Only try unstemmed for multi-word.
1474  string term;
1475  if (!prefix.empty()) {
1476  term += prefix;
1477  if (prefix_needs_colon(prefix, name[0])) term += ':';
1478  }
1479  term += name;
1480 
1481  Xapian::Database db = state->get_database();
1484 
1485  // Caution: this does `delete this;`!
1486  Query* q = as_opwindow_query(Query::OP_PHRASE, 0);
1487  // FIXME: Is this right when there's more than one entry in
1488  // prefixes?
1489  Query* q2 = new Query(q->OP_SYNONYM,
1490  SynonymIterator(syn, pos, q),
1491  SynonymIterator(end));
1492  delete q;
1493  return q2;
1494  // FIXME: Handle multiple prefixes properly...
1495  }
1496  return new Query();
1497  }
1498 
1500  Query * as_near_query() const {
1501  // The common meaning of 'a NEAR b' is "a within 10 terms of b", which
1502  // means a window size of 11. For more than 2 terms, we just add one
1503  // to the window size for each extra term.
1504  size_t w = window;
1505  if (w == 0) w = 10;
1506  return as_opwindow_query(Query::OP_NEAR, w - 1);
1507  }
1508 
1510  Query * as_adj_query() const {
1511  // The common meaning of 'a ADJ b' is "a at most 10 terms before b",
1512  // which means a window size of 11. For more than 2 terms, we just add
1513  // one to the window size for each extra term.
1514  size_t w = window;
1515  if (w == 0) w = 10;
1516  return as_opwindow_query(Query::OP_PHRASE, w - 1);
1517  }
1518 };
1519 
1520 void
1522 {
1523 #ifdef USE_ICU
1524  if (state->flags & QueryParser::FLAG_WORD_BREAKS) {
1525  for (WordIterator tk(name); tk != WordIterator(); ++tk) {
1526  const string& t = *tk;
1527  Term * c = new Term(state, t, field_info, unstemmed, stem, pos);
1528  terms->add_positional_term(c);
1529  }
1530  delete this;
1531  return;
1532  }
1533 #endif
1534  // Add each individual character to the phrase.
1535  string t;
1536  for (Utf8Iterator it(name); it != Utf8Iterator(); ++it) {
1537  Unicode::append_utf8(t, *it);
1538  Term * c = new Term(state, t, field_info, unstemmed, stem, pos);
1539  terms->add_positional_term(c);
1540  t.resize(0);
1541  }
1542 
1543  // FIXME: we want to add the n-grams as filters too for efficiency.
1544 
1545  delete this;
1546 }
1547 
1548 // Helper macro to check for missing arguments to a boolean operator.
1549 #define VET_BOOL_ARGS(A, B, OP_TXT) \
1550  do {\
1551  if (!A || !B) {\
1552  state->error = "Syntax: <expression> " OP_TXT " <expression>";\
1553  yy_parse_failed(yypParser);\
1554  return;\
1555  }\
1556  } while (0)
1557 
1558 #line 1559 "queryparser/queryparser_internal.cc"
1559 /**************** End of %include directives **********************************/
1560 /* These constants specify the various numeric values for terminal symbols
1561 ** in a format understandable to "makeheaders". This section is blank unless
1562 ** "lemon" is run with the "-m" command-line option.
1563 ***************** Begin makeheaders token definitions *************************/
1564 /**************** End makeheaders token definitions ***************************/
1565 
1566 /* The next section is a series of control #defines.
1567 ** various aspects of the generated parser.
1568 ** YYCODETYPE is the data type used to store the integer codes
1569 ** that represent terminal and non-terminal symbols.
1570 ** "unsigned char" is used if there are fewer than
1571 ** 256 symbols. Larger types otherwise.
1572 ** YYNOCODE is a number of type YYCODETYPE that is not used for
1573 ** any terminal or nonterminal symbol.
1574 ** YYFALLBACK If defined, this indicates that one or more tokens
1575 ** (also known as: "terminal symbols") have fall-back
1576 ** values which should be used if the original symbol
1577 ** would not parse. This permits keywords to sometimes
1578 ** be used as identifiers, for example.
1579 ** YYACTIONTYPE is the data type used for "action codes" - numbers
1580 ** that indicate what to do in response to the next
1581 ** token.
1582 ** ParseTOKENTYPE is the data type used for minor type for terminal
1583 ** symbols. Background: A "minor type" is a semantic
1584 ** value associated with a terminal or non-terminal
1585 ** symbols. For example, for an "ID" terminal symbol,
1586 ** the minor type might be the name of the identifier.
1587 ** Each non-terminal can have a different minor type.
1588 ** Terminal symbols all have the same minor type, though.
1589 ** This macros defines the minor type for terminal
1590 ** symbols.
1591 ** YYMINORTYPE is the data type used for all minor types.
1592 ** This is typically a union of many types, one of
1593 ** which is ParseTOKENTYPE. The entry in the union
1594 ** for terminal symbols is called "yy0".
1595 ** YYSTACKDEPTH is the maximum depth of the parser's stack. If
1596 ** zero the stack is dynamically sized using realloc()
1597 ** ParseARG_SDECL A static variable declaration for the %extra_argument
1598 ** ParseARG_PDECL A parameter declaration for the %extra_argument
1599 ** ParseARG_STORE Code to store %extra_argument into yypParser
1600 ** ParseARG_FETCH Code to extract %extra_argument from yypParser
1601 ** YYERRORSYMBOL is the code number of the error symbol. If not
1602 ** defined, then do no error processing.
1603 ** YYNSTATE the combined number of states.
1604 ** YYNRULE the number of rules in the grammar
1605 ** YYNTOKEN Number of terminal symbols
1606 ** YY_MAX_SHIFT Maximum value for shift actions
1607 ** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1608 ** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1609 ** YY_ERROR_ACTION The yy_action[] code for syntax error
1610 ** YY_ACCEPT_ACTION The yy_action[] code for accept
1611 ** YY_NO_ACTION The yy_action[] code for no-op
1612 ** YY_MIN_REDUCE Minimum value for reduce actions
1613 ** YY_MAX_REDUCE Maximum value for reduce actions
1614 */
1615 #ifndef INTERFACE
1616 # define INTERFACE 1
1617 #endif
1618 /************* Begin control #defines *****************************************/
1619 #define YYCODETYPE unsigned char
1620 #define YYNOCODE 42
1621 #define YYACTIONTYPE unsigned char
1622 #define ParseTOKENTYPE Term *
1623 typedef union {
1624  int yyinit;
1629  int yy34;
1631 } YYMINORTYPE;
1632 #ifndef YYSTACKDEPTH
1633 #define YYSTACKDEPTH 100
1634 #endif
1635 #define ParseARG_SDECL State * state;
1636 #define ParseARG_PDECL ,State * state
1637 #define ParseARG_FETCH State * state = yypParser->state
1638 #define ParseARG_STORE yypParser->state = state
1639 #define YYNSTATE 42
1640 #define YYNRULE 59
1641 #define YYNTOKEN 26
1642 #define YY_MAX_SHIFT 41
1643 #define YY_MIN_SHIFTREDUCE 83
1644 #define YY_MAX_SHIFTREDUCE 141
1645 #define YY_ERROR_ACTION 142
1646 #define YY_ACCEPT_ACTION 143
1647 #define YY_NO_ACTION 144
1648 #define YY_MIN_REDUCE 145
1649 #define YY_MAX_REDUCE 203
1650 /************* End control #defines *******************************************/
1651 
1652 /* Define the yytestcase() macro to be a no-op if is not already defined
1653 ** otherwise.
1654 **
1655 ** Applications can choose to define yytestcase() in the %include section
1656 ** to a macro that can assist in verifying code coverage. For production
1657 ** code the yytestcase() macro should be turned off. But it is useful
1658 ** for testing.
1659 */
1660 #ifndef yytestcase
1661 # define yytestcase(X)
1662 #endif
1663 
1664 
1665 /* Next are the tables used to determine what action to take based on the
1666 ** current state and lookahead token. These tables are used to implement
1667 ** functions that take a state number and lookahead value and return an
1668 ** action integer.
1669 **
1670 ** Suppose the action integer is N. Then the action is determined as
1671 ** follows
1672 **
1673 ** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
1674 ** token onto the stack and goto state N.
1675 **
1676 ** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
1677 ** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
1678 **
1679 ** N == YY_ERROR_ACTION A syntax error has occurred.
1680 **
1681 ** N == YY_ACCEPT_ACTION The parser accepts its input.
1682 **
1683 ** N == YY_NO_ACTION No such action. Denotes unused
1684 ** slots in the yy_action[] table.
1685 **
1686 ** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
1687 ** and YY_MAX_REDUCE
1688 **
1689 ** The action table is constructed as a single large table named yy_action[].
1690 ** Given state S and lookahead X, the action is computed as either:
1691 **
1692 ** (A) N = yy_action[ yy_shift_ofst[S] + X ]
1693 ** (B) N = yy_default[S]
1694 **
1695 ** The (A) formula is preferred. The B formula is used instead if
1696 ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
1697 **
1698 ** The formulas above are for computing the action when the lookahead is
1699 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after
1700 ** a reduce action) then the yy_reduce_ofst[] array is used in place of
1701 ** the yy_shift_ofst[] array.
1702 **
1703 ** The following are the tables generated in this section:
1704 **
1705 ** yy_action[] A single table containing all actions.
1706 ** yy_lookahead[] A table containing the lookahead for each entry in
1707 ** yy_action. Used to detect hash collisions.
1708 ** yy_shift_ofst[] For each state, the offset into yy_action for
1709 ** shifting terminals.
1710 ** yy_reduce_ofst[] For each state, the offset into yy_action for
1711 ** shifting non-terminals after a reduce.
1712 ** yy_default[] Default action for each state.
1713 **
1714 *********** Begin parsing tables **********************************************/
1715 #define YY_ACTTAB_COUNT (352)
1716 static const YYACTIONTYPE yy_action[] = {
1717  /* 0 */ 24, 25, 145, 144, 144, 3, 144, 34, 11, 10,
1718  /* 10 */ 2, 27, 144, 17, 13, 12, 111, 112, 113, 104,
1719  /* 20 */ 94, 16, 4, 146, 122, 105, 95, 7, 6, 1,
1720  /* 30 */ 8, 11, 10, 119, 27, 123, 17, 5, 5, 111,
1721  /* 40 */ 112, 113, 104, 94, 16, 4, 124, 122, 143, 41,
1722  /* 50 */ 41, 19, 9, 41, 21, 14, 18, 135, 36, 28,
1723  /* 60 */ 35, 33, 32, 40, 40, 40, 9, 40, 21, 14,
1724  /* 70 */ 18, 130, 36, 28, 35, 33, 11, 10, 128, 27,
1725  /* 80 */ 133, 17, 131, 120, 111, 112, 113, 104, 94, 16,
1726  /* 90 */ 4, 15, 122, 29, 29, 29, 9, 29, 21, 14,
1727  /* 100 */ 18, 134, 36, 28, 35, 33, 30, 30, 30, 9,
1728  /* 110 */ 30, 21, 14, 18, 132, 36, 28, 35, 33, 31,
1729  /* 120 */ 31, 19, 9, 31, 21, 14, 18, 144, 36, 28,
1730  /* 130 */ 35, 33, 153, 153, 153, 9, 153, 21, 14, 18,
1731  /* 140 */ 144, 36, 28, 35, 33, 26, 26, 26, 9, 26,
1732  /* 150 */ 21, 14, 18, 144, 36, 28, 35, 33, 23, 23,
1733  /* 160 */ 23, 9, 23, 21, 14, 18, 144, 36, 28, 35,
1734  /* 170 */ 33, 39, 39, 39, 9, 39, 21, 14, 18, 144,
1735  /* 180 */ 36, 28, 35, 33, 201, 201, 144, 27, 144, 22,
1736  /* 190 */ 144, 144, 111, 112, 113, 201, 201, 16, 4, 172,
1737  /* 200 */ 122, 172, 172, 172, 172, 38, 37, 38, 37, 1,
1738  /* 210 */ 8, 172, 144, 129, 127, 129, 127, 5, 27, 144,
1739  /* 220 */ 20, 144, 172, 111, 112, 113, 102, 144, 16, 4,
1740  /* 230 */ 27, 122, 20, 144, 144, 111, 112, 113, 106, 144,
1741  /* 240 */ 16, 4, 27, 122, 20, 144, 144, 111, 112, 113,
1742  /* 250 */ 103, 144, 16, 4, 27, 122, 20, 144, 144, 111,
1743  /* 260 */ 112, 113, 107, 144, 16, 4, 27, 122, 22, 144,
1744  /* 270 */ 144, 111, 112, 113, 144, 144, 16, 4, 203, 122,
1745  /* 280 */ 203, 203, 203, 203, 144, 144, 144, 144, 159, 159,
1746  /* 290 */ 203, 36, 28, 35, 33, 144, 144, 162, 144, 144,
1747  /* 300 */ 162, 203, 36, 28, 35, 33, 160, 144, 125, 160,
1748  /* 310 */ 144, 36, 28, 35, 33, 163, 121, 125, 163, 126,
1749  /* 320 */ 36, 28, 35, 33, 161, 114, 144, 161, 126, 36,
1750  /* 330 */ 28, 35, 33, 144, 144, 158, 158, 144, 36, 28,
1751  /* 340 */ 35, 33, 6, 1, 8, 144, 144, 144, 144, 144,
1752  /* 350 */ 144, 5,
1753 };
1754 static const YYCODETYPE yy_lookahead[] = {
1755  /* 0 */ 36, 36, 0, 41, 41, 5, 41, 6, 8, 9,
1756  /* 10 */ 10, 11, 41, 13, 8, 9, 16, 17, 18, 19,
1757  /* 20 */ 20, 21, 22, 0, 24, 19, 20, 2, 3, 4,
1758  /* 30 */ 5, 8, 9, 23, 11, 13, 13, 12, 12, 16,
1759  /* 40 */ 17, 18, 19, 20, 21, 22, 24, 24, 27, 28,
1760  /* 50 */ 29, 30, 31, 32, 33, 34, 35, 13, 37, 38,
1761  /* 60 */ 39, 40, 7, 28, 29, 30, 31, 32, 33, 34,
1762  /* 70 */ 35, 14, 37, 38, 39, 40, 8, 9, 15, 11,
1763  /* 80 */ 13, 13, 25, 13, 16, 17, 18, 19, 20, 21,
1764  /* 90 */ 22, 21, 24, 28, 29, 30, 31, 32, 33, 34,
1765  /* 100 */ 35, 13, 37, 38, 39, 40, 28, 29, 30, 31,
1766  /* 110 */ 32, 33, 34, 35, 13, 37, 38, 39, 40, 28,
1767  /* 120 */ 29, 30, 31, 32, 33, 34, 35, 41, 37, 38,
1768  /* 130 */ 39, 40, 28, 29, 30, 31, 32, 33, 34, 35,
1769  /* 140 */ 41, 37, 38, 39, 40, 28, 29, 30, 31, 32,
1770  /* 150 */ 33, 34, 35, 41, 37, 38, 39, 40, 28, 29,
1771  /* 160 */ 30, 31, 32, 33, 34, 35, 41, 37, 38, 39,
1772  /* 170 */ 40, 28, 29, 30, 31, 32, 33, 34, 35, 41,
1773  /* 180 */ 37, 38, 39, 40, 8, 9, 41, 11, 41, 13,
1774  /* 190 */ 41, 41, 16, 17, 18, 19, 20, 21, 22, 0,
1775  /* 200 */ 24, 2, 3, 4, 5, 6, 7, 6, 7, 4,
1776  /* 210 */ 5, 12, 41, 14, 15, 14, 15, 12, 11, 41,
1777  /* 220 */ 13, 41, 23, 16, 17, 18, 19, 41, 21, 22,
1778  /* 230 */ 11, 24, 13, 41, 41, 16, 17, 18, 19, 41,
1779  /* 240 */ 21, 22, 11, 24, 13, 41, 41, 16, 17, 18,
1780  /* 250 */ 19, 41, 21, 22, 11, 24, 13, 41, 41, 16,
1781  /* 260 */ 17, 18, 19, 41, 21, 22, 11, 24, 13, 41,
1782  /* 270 */ 41, 16, 17, 18, 41, 41, 21, 22, 0, 24,
1783  /* 280 */ 2, 3, 4, 5, 41, 41, 41, 41, 34, 35,
1784  /* 290 */ 12, 37, 38, 39, 40, 41, 41, 32, 41, 41,
1785  /* 300 */ 35, 23, 37, 38, 39, 40, 32, 41, 13, 35,
1786  /* 310 */ 41, 37, 38, 39, 40, 32, 21, 13, 35, 24,
1787  /* 320 */ 37, 38, 39, 40, 32, 21, 41, 35, 24, 37,
1788  /* 330 */ 38, 39, 40, 41, 41, 34, 35, 41, 37, 38,
1789  /* 340 */ 39, 40, 3, 4, 5, 41, 41, 41, 41, 41,
1790  /* 350 */ 41, 12, 41, 41, 41, 41, 41, 41, 41, 41,
1791  /* 360 */ 41, 41, 41, 41, 41, 41, 41, 41, 41,
1792 };
1793 #define YY_SHIFT_COUNT (41)
1794 #define YY_SHIFT_MIN (0)
1795 #define YY_SHIFT_MAX (339)
1796 static const unsigned short int yy_shift_ofst[] = {
1797  /* 0 */ 23, 0, 68, 68, 68, 68, 68, 68, 68, 176,
1798  /* 10 */ 207, 219, 231, 243, 255, 22, 22, 199, 278, 25,
1799  /* 20 */ 201, 6, 201, 339, 295, 304, 205, 70, 57, 26,
1800  /* 30 */ 26, 10, 44, 55, 67, 1, 63, 88, 101, 26,
1801  /* 40 */ 26, 2,
1802 };
1803 #define YY_REDUCE_COUNT (16)
1804 #define YY_REDUCE_MIN (-36)
1805 #define YY_REDUCE_MAX (301)
1806 static const short yy_reduce_ofst[] = {
1807  /* 0 */ 21, 35, 65, 78, 91, 104, 117, 130, 143, 254,
1808  /* 10 */ 265, 274, 283, 292, 301, -36, -35,
1809 };
1810 static const YYACTIONTYPE yy_default[] = {
1811  /* 0 */ 154, 154, 154, 154, 154, 154, 154, 154, 154, 155,
1812  /* 10 */ 142, 142, 142, 142, 170, 142, 142, 171, 202, 142,
1813  /* 20 */ 172, 142, 171, 151, 142, 142, 152, 142, 178, 150,
1814  /* 30 */ 149, 199, 142, 180, 142, 179, 177, 142, 142, 148,
1815  /* 40 */ 147, 199,
1816 };
1817 /********** End of lemon-generated parsing tables *****************************/
1818 
1819 /* The next table maps tokens (terminal symbols) into fallback tokens.
1820 ** If a construct like the following:
1821 **
1822 ** %fallback ID X Y Z.
1823 **
1824 ** appears in the grammar, then ID becomes a fallback token for X, Y,
1825 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
1826 ** but it does not parse, the type of the token is changed to ID and
1827 ** the parse is retried before an error is thrown.
1828 **
1829 ** This feature can be used, for example, to cause some keywords in a language
1830 ** to revert to identifiers if they keyword does not apply in the context where
1831 ** it appears.
1832 */
1833 #ifdef YYFALLBACK
1834 static const YYCODETYPE yyFallback[] = {
1835 };
1836 #endif /* YYFALLBACK */
1837 
1838 /* The following structure represents a single element of the
1839 ** parser's stack. Information stored includes:
1840 **
1841 ** + The state number for the parser at this level of the stack.
1842 **
1843 ** + The value of the token stored at this level of the stack.
1844 ** (In other words, the "major" token.)
1845 **
1846 ** + The semantic value stored at this level of the stack. This is
1847 ** the information used by the action routines in the grammar.
1848 ** It is sometimes called the "minor" token.
1849 **
1850 ** After the "shift" half of a SHIFTREDUCE action, the stateno field
1851 ** actually contains the reduce action for the second half of the
1852 ** SHIFTREDUCE.
1853 */
1856  stateno = 0;
1857  major = 0;
1858  }
1860  stateno = stateno_;
1861  major = major_;
1862  minor.yy0 = minor_;
1863  }
1864  YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
1865  YYCODETYPE major; /* The major token value. This is the code
1866  ** number for the token at this stack level */
1867  YYMINORTYPE minor; /* The user-supplied minor token value. This
1868  ** is the value of the token */
1869 };
1870 
1871 static void ParseInit(yyParser *pParser);
1872 static void ParseFinalize(yyParser *pParser);
1873 
1874 /* The state of the parser is completely contained in an instance of
1875 ** the following structure */
1876 struct yyParser {
1877 #ifdef YYTRACKMAXSTACKDEPTH
1878  int yyhwm; /* High-water mark of the stack */
1879 #endif
1880 #ifndef YYNOERRORRECOVERY
1881  int yyerrcnt; /* Shifts left before out of the error */
1882 #endif
1883  ParseARG_SDECL /* A place to hold %extra_argument */
1884  vector<yyStackEntry> yystack; /* The parser's stack */
1886  ParseInit(this);
1887  }
1889  ParseFinalize(this);
1890  }
1891 };
1892 typedef struct yyParser yyParser;
1893 
1894 #include "omassert.h"
1895 #include "debuglog.h"
1896 
1897 #if defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG)
1898 /* For tracing shifts, the names of all terminals and nonterminals
1899 ** are required. The following table supplies these names */
1900 static const char *const yyTokenName[] = {
1901  /* 0 */ "$",
1902  /* 1 */ "ERROR",
1903  /* 2 */ "OR",
1904  /* 3 */ "XOR",
1905  /* 4 */ "AND",
1906  /* 5 */ "NOT",
1907  /* 6 */ "NEAR",
1908  /* 7 */ "ADJ",
1909  /* 8 */ "LOVE",
1910  /* 9 */ "HATE",
1911  /* 10 */ "HATE_AFTER_AND",
1912  /* 11 */ "SYNONYM",
1913  /* 12 */ "SYN",
1914  /* 13 */ "TERM",
1915  /* 14 */ "GROUP_TERM",
1916  /* 15 */ "PHR_TERM",
1917  /* 16 */ "EDIT_TERM",
1918  /* 17 */ "WILD_TERM",
1919  /* 18 */ "PARTIAL_TERM",
1920  /* 19 */ "BOOLEAN_FILTER",
1921  /* 20 */ "RANGE",
1922  /* 21 */ "QUOTE",
1923  /* 22 */ "BRA",
1924  /* 23 */ "KET",
1925  /* 24 */ "UNBROKEN_WORDS",
1926  /* 25 */ "EMPTY_GROUP_OK",
1927  /* 26 */ "error",
1928  /* 27 */ "query",
1929  /* 28 */ "expr",
1930  /* 29 */ "prob_expr",
1931  /* 30 */ "bool_arg",
1932  /* 31 */ "prob",
1933  /* 32 */ "term",
1934  /* 33 */ "stop_prob",
1935  /* 34 */ "stop_term",
1936  /* 35 */ "compound_term",
1937  /* 36 */ "phrase",
1938  /* 37 */ "phrased_term",
1939  /* 38 */ "group",
1940  /* 39 */ "near_expr",
1941  /* 40 */ "adj_expr",
1942 };
1943 
1944 /* For tracing reduce actions, the names of all rules are required.
1945 */
1946 static const char *const yyRuleName[] = {
1947  /* 0 */ "query ::= expr",
1948  /* 1 */ "query ::=",
1949  /* 2 */ "expr ::= bool_arg AND bool_arg",
1950  /* 3 */ "expr ::= bool_arg NOT bool_arg",
1951  /* 4 */ "expr ::= bool_arg AND NOT bool_arg",
1952  /* 5 */ "expr ::= bool_arg AND HATE_AFTER_AND bool_arg",
1953  /* 6 */ "expr ::= bool_arg OR bool_arg",
1954  /* 7 */ "expr ::= bool_arg XOR bool_arg",
1955  /* 8 */ "expr ::= bool_arg SYN bool_arg",
1956  /* 9 */ "bool_arg ::=",
1957  /* 10 */ "prob_expr ::= prob",
1958  /* 11 */ "prob ::= RANGE",
1959  /* 12 */ "prob ::= stop_prob RANGE",
1960  /* 13 */ "prob ::= stop_term stop_term",
1961  /* 14 */ "prob ::= prob stop_term",
1962  /* 15 */ "prob ::= LOVE term",
1963  /* 16 */ "prob ::= stop_prob LOVE term",
1964  /* 17 */ "prob ::= HATE term",
1965  /* 18 */ "prob ::= stop_prob HATE term",
1966  /* 19 */ "prob ::= HATE BOOLEAN_FILTER",
1967  /* 20 */ "prob ::= stop_prob HATE BOOLEAN_FILTER",
1968  /* 21 */ "prob ::= BOOLEAN_FILTER",
1969  /* 22 */ "prob ::= stop_prob BOOLEAN_FILTER",
1970  /* 23 */ "prob ::= LOVE BOOLEAN_FILTER",
1971  /* 24 */ "prob ::= stop_prob LOVE BOOLEAN_FILTER",
1972  /* 25 */ "stop_prob ::= stop_term",
1973  /* 26 */ "stop_term ::= TERM",
1974  /* 27 */ "term ::= TERM",
1975  /* 28 */ "compound_term ::= EDIT_TERM",
1976  /* 29 */ "compound_term ::= WILD_TERM",
1977  /* 30 */ "compound_term ::= PARTIAL_TERM",
1978  /* 31 */ "compound_term ::= QUOTE phrase QUOTE",
1979  /* 32 */ "compound_term ::= phrased_term",
1980  /* 33 */ "compound_term ::= group",
1981  /* 34 */ "compound_term ::= near_expr",
1982  /* 35 */ "compound_term ::= adj_expr",
1983  /* 36 */ "compound_term ::= BRA expr KET",
1984  /* 37 */ "compound_term ::= SYNONYM TERM",
1985  /* 38 */ "compound_term ::= SYNONYM QUOTE phrase QUOTE",
1986  /* 39 */ "compound_term ::= UNBROKEN_WORDS",
1987  /* 40 */ "phrase ::= TERM",
1988  /* 41 */ "phrase ::= UNBROKEN_WORDS",
1989  /* 42 */ "phrase ::= phrase TERM",
1990  /* 43 */ "phrase ::= phrase UNBROKEN_WORDS",
1991  /* 44 */ "phrased_term ::= TERM PHR_TERM",
1992  /* 45 */ "phrased_term ::= phrased_term PHR_TERM",
1993  /* 46 */ "group ::= TERM GROUP_TERM",
1994  /* 47 */ "group ::= group GROUP_TERM",
1995  /* 48 */ "group ::= group EMPTY_GROUP_OK",
1996  /* 49 */ "near_expr ::= TERM NEAR TERM",
1997  /* 50 */ "near_expr ::= near_expr NEAR TERM",
1998  /* 51 */ "adj_expr ::= TERM ADJ TERM",
1999  /* 52 */ "adj_expr ::= adj_expr ADJ TERM",
2000  /* 53 */ "expr ::= prob_expr",
2001  /* 54 */ "bool_arg ::= expr",
2002  /* 55 */ "prob_expr ::= term",
2003  /* 56 */ "stop_prob ::= prob",
2004  /* 57 */ "stop_term ::= compound_term",
2005  /* 58 */ "term ::= compound_term",
2006 };
2007 
2008 /*
2009 ** This function returns the symbolic name associated with a token
2010 ** value.
2011 */
2012 static const char *ParseTokenName(int tokenType){
2013  if( tokenType>=0 && tokenType<(int)(sizeof(yyTokenName)/sizeof(yyTokenName[0])) ){
2014  return yyTokenName[tokenType];
2015  }
2016  return "Unknown";
2017 }
2018 
2019 /*
2020 ** This function returns the symbolic name associated with a rule
2021 ** value.
2022 */
2023 static const char *ParseRuleName(int ruleNum){
2024  if( ruleNum>=0 && ruleNum<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
2025  return yyRuleName[ruleNum];
2026  }
2027  return "Unknown";
2028 }
2029 #endif /* defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG) */
2030 
2031 /* Datatype of the argument to the memory allocated passed as the
2032 ** second argument to ParseAlloc() below. This can be changed by
2033 ** putting an appropriate #define in the %include section of the input
2034 ** grammar.
2035 */
2036 #ifndef YYMALLOCARGTYPE
2037 # define YYMALLOCARGTYPE size_t
2038 #endif
2039 
2040 /* Initialize a new parser that has already been allocated.
2041 */
2042 static
2043 void ParseInit(yyParser *pParser){
2044 #ifdef YYTRACKMAXSTACKDEPTH
2045  pParser->yyhwm = 0;
2046 #endif
2047 #if 0
2048 #if YYSTACKDEPTH<=0
2049  pParser->yytos = NULL;
2050  pParser->yystack = NULL;
2051  pParser->yystksz = 0;
2052  if( yyGrowStack(pParser) ){
2053  pParser->yystack = &pParser->yystk0;
2054  pParser->yystksz = 1;
2055  }
2056 #endif
2057 #endif
2058 #ifndef YYNOERRORRECOVERY
2059  pParser->yyerrcnt = -1;
2060 #endif
2061 #if 0
2062  pParser->yytos = pParser->yystack;
2063  pParser->yystack[0].stateno = 0;
2064  pParser->yystack[0].major = 0;
2065 #if YYSTACKDEPTH>0
2066  pParser->yystackEnd = &pParser->yystack[YYSTACKDEPTH-1];
2067 #endif
2068 #else
2069  pParser->yystack.push_back(yyStackEntry());
2070 #endif
2071 }
2072 
2073 #ifndef Parse_ENGINEALWAYSONSTACK
2074 /*
2075 ** This function allocates a new parser.
2076 **
2077 ** Inputs:
2078 ** None.
2079 **
2080 ** Outputs:
2081 ** A pointer to a parser. This pointer is used in subsequent calls
2082 ** to Parse and ParseFree.
2083 */
2084 static yyParser *ParseAlloc(void){
2085  return new yyParser;
2086 }
2087 #endif /* Parse_ENGINEALWAYSONSTACK */
2088 
2089 
2090 /* The following function deletes the "minor type" or semantic value
2091 ** associated with a symbol. The symbol can be either a terminal
2092 ** or nonterminal. "yymajor" is the symbol code, and "yypminor" is
2093 ** a pointer to the value to be deleted. The code used to do the
2094 ** deletions is derived from the %destructor and/or %token_destructor
2095 ** directives of the input grammar.
2096 */
2097 static void yy_destructor(
2098  yyParser *yypParser, /* The parser */
2099  YYCODETYPE yymajor, /* Type code for object to destroy */
2100  YYMINORTYPE *yypminor /* The object to be destroyed */
2101 ){
2103  switch( yymajor ){
2104  /* Here is inserted the actions which take place when a
2105  ** terminal or non-terminal is destroyed. This can happen
2106  ** when the symbol is popped from the stack during a
2107  ** reduce or during error processing or when a parser is
2108  ** being destroyed before it is finished parsing.
2109  **
2110  ** Note: during a reduce, the only symbols destroyed are those
2111  ** which appear on the RHS of the rule, but which are *not* used
2112  ** inside the C code.
2113  */
2114 /********* Begin destructor definitions ***************************************/
2115  /* TERMINAL Destructor */
2116  case 1: /* ERROR */
2117  case 2: /* OR */
2118  case 3: /* XOR */
2119  case 4: /* AND */
2120  case 5: /* NOT */
2121  case 6: /* NEAR */
2122  case 7: /* ADJ */
2123  case 8: /* LOVE */
2124  case 9: /* HATE */
2125  case 10: /* HATE_AFTER_AND */
2126  case 11: /* SYNONYM */
2127  case 12: /* SYN */
2128  case 13: /* TERM */
2129  case 14: /* GROUP_TERM */
2130  case 15: /* PHR_TERM */
2131  case 16: /* EDIT_TERM */
2132  case 17: /* WILD_TERM */
2133  case 18: /* PARTIAL_TERM */
2134  case 19: /* BOOLEAN_FILTER */
2135  case 20: /* RANGE */
2136  case 21: /* QUOTE */
2137  case 22: /* BRA */
2138  case 23: /* KET */
2139  case 24: /* UNBROKEN_WORDS */
2140  case 25: /* EMPTY_GROUP_OK */
2141 {
2142 #line 2217 "queryparser/queryparser.lemony"
2143  delete (yypminor->yy0);
2144 #line 2145 "queryparser/queryparser_internal.cc"
2145 }
2146  break;
2147  case 28: /* expr */
2148  case 29: /* prob_expr */
2149  case 30: /* bool_arg */
2150  case 32: /* term */
2151  case 34: /* stop_term */
2152  case 35: /* compound_term */
2153 {
2154 #line 2295 "queryparser/queryparser.lemony"
2155  delete (yypminor->yy1);
2156 #line 2157 "queryparser/queryparser_internal.cc"
2157 }
2158  break;
2159  case 31: /* prob */
2160  case 33: /* stop_prob */
2161 {
2162 #line 2415 "queryparser/queryparser.lemony"
2163  delete (yypminor->yy18);
2164 #line 2165 "queryparser/queryparser_internal.cc"
2165 }
2166  break;
2167  case 36: /* phrase */
2168  case 37: /* phrased_term */
2169  case 39: /* near_expr */
2170  case 40: /* adj_expr */
2171 {
2172 #line 2612 "queryparser/queryparser.lemony"
2173  delete (yypminor->yy36);
2174 #line 2175 "queryparser/queryparser_internal.cc"
2175 }
2176  break;
2177  case 38: /* group */
2178 {
2179 #line 2653 "queryparser/queryparser.lemony"
2180  delete (yypminor->yy32);
2181 #line 2182 "queryparser/queryparser_internal.cc"
2182 }
2183  break;
2184 /********* End destructor definitions *****************************************/
2185  default: break; /* If no destructor action specified: do nothing */
2186  }
2187  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2188 }
2189 
2190 /*
2191 ** Pop the parser's stack once.
2192 **
2193 ** If there is a destructor routine associated with the token which
2194 ** is popped from the stack, then call it.
2195 */
2196 static void yy_pop_parser_stack(yyParser *pParser){
2197  Assert( pParser->yystack.size() > 1 );
2198  yyStackEntry *yytos = &pParser->yystack.back();
2199 
2200  LOGLINE(QUERYPARSER, "Popping " << ParseTokenName(yytos->major));
2201  yy_destructor(pParser, yytos->major, &yytos->minor);
2202  pParser->yystack.pop_back();
2203 }
2204 
2205 /*
2206 ** Clear all secondary memory allocations from the parser
2207 */
2208 static
2209 void ParseFinalize(yyParser *pParser){
2210  while( pParser->yystack.size() > 1 ) yy_pop_parser_stack(pParser);
2211 }
2212 
2213 #ifndef Parse_ENGINEALWAYSONSTACK
2214 /*
2215 ** Deallocate and destroy a parser. Destructors are called for
2216 ** all stack elements before shutting the parser down.
2217 **
2218 ** If the YYPARSEFREENEVERNULL macro exists (for example because it
2219 ** is defined in a %include section of the input grammar) then it is
2220 ** assumed that the input pointer is never NULL.
2221 */
2222 static
2223 void ParseFree(
2224  yyParser *pParser /* The parser to be deleted */
2225 ){
2226  delete pParser;
2227 }
2228 #endif /* Parse_ENGINEALWAYSONSTACK */
2229 
2230 /*
2231 ** Return the peak depth of the stack for a parser.
2232 */
2233 #ifdef YYTRACKMAXSTACKDEPTH
2234 int ParseStackPeak(yyParser *pParser){
2235  return pParser->yyhwm;
2236 }
2237 #endif
2238 
2239 /* This array of booleans keeps track of the parser statement
2240 ** coverage. The element yycoverage[X][Y] is set when the parser
2241 ** is in state X and has a lookahead token Y. In a well-tested
2242 ** systems, every element of this matrix should end up being set.
2243 */
2244 #if defined(YYCOVERAGE)
2245 static unsigned char yycoverage[YYNSTATE][YYNTOKEN];
2246 #endif
2247 
2248 /*
2249 ** Write into out a description of every state/lookahead combination that
2250 **
2251 ** (1) has not been used by the parser, and
2252 ** (2) is not a syntax error.
2253 **
2254 ** Return the number of missed state/lookahead combinations.
2255 */
2256 #if defined(YYCOVERAGE)
2257 int ParseCoverage(FILE *out){
2258  int stateno, iLookAhead, i;
2259  int nMissed = 0;
2260  for(stateno=0; stateno<YYNSTATE; stateno++){
2261  i = yy_shift_ofst[stateno];
2262  for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
2263  if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
2264  if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
2265  if( out ){
2266  fprintf(out,"State %d lookahead %s %s\n", stateno,
2267  yyTokenName[iLookAhead],
2268  yycoverage[stateno][iLookAhead] ? "ok" : "missed");
2269  }
2270  }
2271  }
2272  return nMissed;
2273 }
2274 #endif
2275 
2276 /*
2277 ** Find the appropriate action for a parser given the terminal
2278 ** look-ahead token iLookAhead.
2279 */
2280 static unsigned int yy_find_shift_action(
2281  yyParser *pParser, /* The parser */
2282  YYCODETYPE iLookAhead /* The look-ahead token */
2283 ){
2284  int i;
2285  int stateno = pParser->yystack.back().stateno;
2286 
2287  if( stateno>YY_MAX_SHIFT ) return stateno;
2288  Assert( stateno <= YY_SHIFT_COUNT );
2289 #if defined(YYCOVERAGE)
2290  yycoverage[stateno][iLookAhead] = 1;
2291 #endif
2292  do{
2293  i = yy_shift_ofst[stateno];
2294  Assert( i>=0 );
2295  Assert( i+YYNTOKEN<=(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) );
2296  Assert( iLookAhead!=YYNOCODE );
2297  Assert( iLookAhead < YYNTOKEN );
2298  i += iLookAhead;
2299  if( yy_lookahead[i]!=iLookAhead ){
2300 #ifdef YYFALLBACK
2301  YYCODETYPE iFallback; /* Fallback token */
2302  if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
2303  && (iFallback = yyFallback[iLookAhead])!=0 ){
2304  LOGLINE(QUERYPARSER,
2305  "FALLBACK " << ParseTokenName(iLookAhead) << " => " <<
2306  ParseTokenName(iFallback));
2307  Assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
2308  iLookAhead = iFallback;
2309  continue;
2310  }
2311 #endif
2312 #ifdef YYWILDCARD
2313  {
2314  int j = i - iLookAhead + YYWILDCARD;
2315  if(
2316 #if YY_SHIFT_MIN+YYWILDCARD<0
2317  j>=0 &&
2318 #endif
2319 #if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT
2320  j<YY_ACTTAB_COUNT &&
2321 #endif
2322  yy_lookahead[j]==YYWILDCARD && iLookAhead>0
2323  ){
2324  LOGLINE(QUERYPARSER,
2325  "WILDCARD " << ParseTokenName(iLookAhead) << " => " <<
2326  ParseTokenName(YYWILDCARD));
2327  return yy_action[j];
2328  }
2329  }
2330 #endif /* YYWILDCARD */
2331  return yy_default[stateno];
2332  }else{
2333  return yy_action[i];
2334  }
2335  }while(1);
2336 }
2337 
2338 /*
2339 ** Find the appropriate action for a parser given the non-terminal
2340 ** look-ahead token iLookAhead.
2341 */
2343  int stateno, /* Current state number */
2344  YYCODETYPE iLookAhead /* The look-ahead token */
2345 ){
2346  int i;
2347 #ifdef YYERRORSYMBOL
2348  if( stateno>YY_REDUCE_COUNT ){
2349  return yy_default[stateno];
2350  }
2351 #else
2352  Assert( stateno<=YY_REDUCE_COUNT );
2353 #endif
2354  i = yy_reduce_ofst[stateno];
2355  Assert( iLookAhead!=YYNOCODE );
2356  i += iLookAhead;
2357 #ifdef YYERRORSYMBOL
2358  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
2359  return yy_default[stateno];
2360  }
2361 #else
2362  Assert( i>=0 && i<YY_ACTTAB_COUNT );
2363  Assert( yy_lookahead[i]==iLookAhead );
2364 #endif
2365  return yy_action[i];
2366 }
2367 
2368 /*
2369 ** The following routine is called if the stack overflows.
2370 ** In Xapian this can never happen as we use std::vector to provide a stack
2371 ** of indefinite size.
2372 */
2373 #if 0
2374 static void yyStackOverflow(yyParser *yypParser){
2376  yypParser->yyidx--;
2377 #ifndef NDEBUG
2378  if( yyTraceFILE ){
2379  fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
2380  }
2381 #endif
2382  while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
2383  /* Here code is inserted which will execute if the parser
2384  ** stack ever overflows */
2385 /******** Begin %stack_overflow code ******************************************/
2386 /******** End %stack_overflow code ********************************************/
2387  ParseARG_STORE; /* Suppress warning about unused %extra_argument var */
2388 }
2389 #endif
2390 
2391 /*
2392 ** Print tracing information for a SHIFT action
2393 */
2394 #ifdef XAPIAN_DEBUG_LOG
2395 static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){
2396  if( yyNewState<YYNSTATE ){
2397  LOGLINE(QUERYPARSER, zTag << " '" <<
2398  yyTokenName[yypParser->yystack.back().major] <<
2399  "', go to state " << yyNewState);
2400  }else{
2401  LOGLINE(QUERYPARSER, zTag << " '" <<
2402  yyTokenName[yypParser->yystack.back().major] <<
2403  "', pending reduce " << yyNewState - YY_MIN_REDUCE);
2404  }
2405 }
2406 #else
2407 # define yyTraceShift(X,Y,Z)
2408 #endif
2409 
2410 /*
2411 ** Perform a shift action.
2412 */
2413 static void yy_shift(
2414  yyParser *yypParser, /* The parser to be shifted */
2415  int yyNewState, /* The new state to shift in */
2416  int yyMajor, /* The major token to shift in */
2417  ParseTOKENTYPE yyMinor /* The minor token to shift in */
2418 ){
2419  if( yyNewState > YY_MAX_SHIFT ){
2420  yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
2421  }
2422  yypParser->yystack.push_back(yyStackEntry(yyNewState, yyMajor, yyMinor));
2423 #ifdef YYTRACKMAXSTACKDEPTH
2424  if( (int)(yypParser->yystack.size()>yypParser->yyhwm ){
2425  yypParser->yyhwm++;
2426  Assert( yypParser->yyhwm == (int)(yypParser->yystack.size() );
2427  }
2428 #endif
2429  yyTraceShift(yypParser, yyNewState, "Shift");
2430 }
2431 
2432 /* The following table contains information about every rule that
2433 ** is used during the reduce.
2434 */
2435 static const struct {
2436  YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */
2437  signed char nrhs; /* Negative of the number of RHS symbols in the rule */
2438 } yyRuleInfo[] = {
2439  { 27, -1 }, /* (0) query ::= expr */
2440  { 27, 0 }, /* (1) query ::= */
2441  { 28, -3 }, /* (2) expr ::= bool_arg AND bool_arg */
2442  { 28, -3 }, /* (3) expr ::= bool_arg NOT bool_arg */
2443  { 28, -4 }, /* (4) expr ::= bool_arg AND NOT bool_arg */
2444  { 28, -4 }, /* (5) expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2445  { 28, -3 }, /* (6) expr ::= bool_arg OR bool_arg */
2446  { 28, -3 }, /* (7) expr ::= bool_arg XOR bool_arg */
2447  { 28, -3 }, /* (8) expr ::= bool_arg SYN bool_arg */
2448  { 30, 0 }, /* (9) bool_arg ::= */
2449  { 29, -1 }, /* (10) prob_expr ::= prob */
2450  { 31, -1 }, /* (11) prob ::= RANGE */
2451  { 31, -2 }, /* (12) prob ::= stop_prob RANGE */
2452  { 31, -2 }, /* (13) prob ::= stop_term stop_term */
2453  { 31, -2 }, /* (14) prob ::= prob stop_term */
2454  { 31, -2 }, /* (15) prob ::= LOVE term */
2455  { 31, -3 }, /* (16) prob ::= stop_prob LOVE term */
2456  { 31, -2 }, /* (17) prob ::= HATE term */
2457  { 31, -3 }, /* (18) prob ::= stop_prob HATE term */
2458  { 31, -2 }, /* (19) prob ::= HATE BOOLEAN_FILTER */
2459  { 31, -3 }, /* (20) prob ::= stop_prob HATE BOOLEAN_FILTER */
2460  { 31, -1 }, /* (21) prob ::= BOOLEAN_FILTER */
2461  { 31, -2 }, /* (22) prob ::= stop_prob BOOLEAN_FILTER */
2462  { 31, -2 }, /* (23) prob ::= LOVE BOOLEAN_FILTER */
2463  { 31, -3 }, /* (24) prob ::= stop_prob LOVE BOOLEAN_FILTER */
2464  { 33, -1 }, /* (25) stop_prob ::= stop_term */
2465  { 34, -1 }, /* (26) stop_term ::= TERM */
2466  { 32, -1 }, /* (27) term ::= TERM */
2467  { 35, -1 }, /* (28) compound_term ::= EDIT_TERM */
2468  { 35, -1 }, /* (29) compound_term ::= WILD_TERM */
2469  { 35, -1 }, /* (30) compound_term ::= PARTIAL_TERM */
2470  { 35, -3 }, /* (31) compound_term ::= QUOTE phrase QUOTE */
2471  { 35, -1 }, /* (32) compound_term ::= phrased_term */
2472  { 35, -1 }, /* (33) compound_term ::= group */
2473  { 35, -1 }, /* (34) compound_term ::= near_expr */
2474  { 35, -1 }, /* (35) compound_term ::= adj_expr */
2475  { 35, -3 }, /* (36) compound_term ::= BRA expr KET */
2476  { 35, -2 }, /* (37) compound_term ::= SYNONYM TERM */
2477  { 35, -4 }, /* (38) compound_term ::= SYNONYM QUOTE phrase QUOTE */
2478  { 35, -1 }, /* (39) compound_term ::= UNBROKEN_WORDS */
2479  { 36, -1 }, /* (40) phrase ::= TERM */
2480  { 36, -1 }, /* (41) phrase ::= UNBROKEN_WORDS */
2481  { 36, -2 }, /* (42) phrase ::= phrase TERM */
2482  { 36, -2 }, /* (43) phrase ::= phrase UNBROKEN_WORDS */
2483  { 37, -2 }, /* (44) phrased_term ::= TERM PHR_TERM */
2484  { 37, -2 }, /* (45) phrased_term ::= phrased_term PHR_TERM */
2485  { 38, -2 }, /* (46) group ::= TERM GROUP_TERM */
2486  { 38, -2 }, /* (47) group ::= group GROUP_TERM */
2487  { 38, -2 }, /* (48) group ::= group EMPTY_GROUP_OK */
2488  { 39, -3 }, /* (49) near_expr ::= TERM NEAR TERM */
2489  { 39, -3 }, /* (50) near_expr ::= near_expr NEAR TERM */
2490  { 40, -3 }, /* (51) adj_expr ::= TERM ADJ TERM */
2491  { 40, -3 }, /* (52) adj_expr ::= adj_expr ADJ TERM */
2492  { 28, -1 }, /* (53) expr ::= prob_expr */
2493  { 30, -1 }, /* (54) bool_arg ::= expr */
2494  { 29, -1 }, /* (55) prob_expr ::= term */
2495  { 33, -1 }, /* (56) stop_prob ::= prob */
2496  { 34, -1 }, /* (57) stop_term ::= compound_term */
2497  { 32, -1 }, /* (58) term ::= compound_term */
2498 };
2499 
2500 static void yy_accept(yyParser*); /* Forward Declaration */
2501 
2502 /*
2503 ** Perform a reduce action and the shift that must immediately
2504 ** follow the reduce.
2505 **
2506 ** The yyLookahead and yyLookaheadToken parameters provide reduce actions
2507 ** access to the lookahead token (if any). The yyLookahead will be YYNOCODE
2508 ** if the lookahead token has already been consumed. As this procedure is
2509 ** only called from one place, optimizing compilers will in-line it, which
2510 ** means that the extra parameters have no performance impact.
2511 */
2512 static void yy_reduce(
2513  yyParser *yypParser, /* The parser */
2514  unsigned int yyruleno, /* Number of the rule by which to reduce */
2515  int yyLookahead, /* Lookahead token, or YYNOCODE if none */
2516  ParseTOKENTYPE yyLookaheadToken /* Value of the lookahead token */
2517 ){
2518  int yygoto; /* The next state */
2519  int yyact; /* The next action */
2520  yyStackEntry *yymsp; /* The top of the parser's stack */
2521  int yysize; /* Amount to pop the stack */
2523  (void)yyLookahead;
2524  (void)yyLookaheadToken;
2525  yymsp = &yypParser->yystack.back();
2526  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
2527 #ifdef XAPIAN_DEBUG_LOG
2528  {
2529  yysize = yyRuleInfo[yyruleno].nrhs;
2530  if( yysize ){
2531  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2532  ParseRuleName(yyruleno) << "], go to state " <<
2533  yymsp[yysize].stateno);
2534  } else {
2535  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2536  ParseRuleName(yyruleno) << "].");
2537  }
2538  }
2539 #endif /* XAPIAN_DEBUG_LOG */
2540  /* yygotominor = yyzerominor; */
2541 
2542  /* Check that the stack is large enough to grow by a single entry
2543  ** if the RHS of the rule is empty. This ensures that there is room
2544  ** enough on the stack to push the LHS value without invalidating
2545  ** pointers into the stack. */
2546  if( yyRuleInfo[yyruleno].nrhs==0 ){
2547 #if 1
2548  yypParser->yystack.resize(yypParser->yystack.size() + 1);
2549  yymsp = &(yypParser->yystack.back()) - 1;
2550 #else
2551 #ifdef YYTRACKMAXSTACKDEPTH
2552  if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
2553  yypParser->yyhwm++;
2554  Assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
2555  }
2556 #endif
2557 #if YYSTACKDEPTH>0
2558  if( yypParser->yytos>=yypParser->yystackEnd ){
2559  yyStackOverflow(yypParser);
2560  return;
2561  }
2562 #else
2563  if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
2564  if( yyGrowStack(yypParser) ){
2565  yyStackOverflow(yypParser);
2566  return;
2567  }
2568  yymsp = yypParser->yytos;
2569  }
2570 #endif
2571 #endif
2572  }
2573 
2574  switch( yyruleno ){
2575  /* Beginning here are the reduction cases. A typical example
2576  ** follows:
2577  ** case 0:
2578  ** #line <lineno> <grammarfile>
2579  ** { ... } // User supplied code
2580  ** #line <lineno> <thisfile>
2581  ** break;
2582  */
2583 /********** Begin reduce actions **********************************************/
2584  YYMINORTYPE yylhsminor;
2585  case 0: /* query ::= expr */
2586 #line 2277 "queryparser/queryparser.lemony"
2587 {
2588  // Save the parsed query in the State structure so we can return it.
2589  if (yymsp[0].minor.yy1) {
2590  state->query = *yymsp[0].minor.yy1;
2591  delete yymsp[0].minor.yy1;
2592  } else {
2593  state->query = Query();
2594  }
2595 }
2596 #line 2597 "queryparser/queryparser_internal.cc"
2597  break;
2598  case 1: /* query ::= */
2599 #line 2287 "queryparser/queryparser.lemony"
2600 {
2601  // Handle a query string with no terms in.
2602  state->query = Query();
2603 }
2604 #line 2605 "queryparser/queryparser_internal.cc"
2605  break;
2606  case 2: /* expr ::= bool_arg AND bool_arg */
2607 #line 2299 "queryparser/queryparser.lemony"
2608 {
2609  VET_BOOL_ARGS(yymsp[-2].minor.yy1, yymsp[0].minor.yy1, "AND");
2610  *yymsp[-2].minor.yy1 &= *yymsp[0].minor.yy1;
2611  delete yymsp[0].minor.yy1;
2612 }
2613 #line 2614 "queryparser/queryparser_internal.cc"
2614  yy_destructor(yypParser,4,&yymsp[-1].minor);
2615  break;
2616  case 3: /* expr ::= bool_arg NOT bool_arg */
2617 #line 2305 "queryparser/queryparser.lemony"
2618 {
2619  if (!yymsp[-2].minor.yy1 && (state->flags & QueryParser::FLAG_PURE_NOT)) {
2620  // 'NOT foo' -> '(0 * <alldocuments>) NOT foo'
2621  //
2622  // We scale the <alldocuments> by 0 so it doesn't count towards the
2623  // number of matching subqueries since that allows the query optimiser
2624  // to eliminate it if other subqueries are combined in an AND-like
2625  // way (e.g. 'bar AND (NOT foo)').
2626  yymsp[-2].minor.yy1 = new Query(0.0, Query(string(), 1, 0));
2627  }
2628  VET_BOOL_ARGS(yymsp[-2].minor.yy1, yymsp[0].minor.yy1, "NOT");
2629  *yymsp[-2].minor.yy1 &= ~*yymsp[0].minor.yy1;
2630  delete yymsp[0].minor.yy1;
2631 }
2632 #line 2633 "queryparser/queryparser_internal.cc"
2633  yy_destructor(yypParser,5,&yymsp[-1].minor);
2634  break;
2635  case 4: /* expr ::= bool_arg AND NOT bool_arg */
2636 #line 2320 "queryparser/queryparser.lemony"
2637 {
2638  VET_BOOL_ARGS(yymsp[-3].minor.yy1, yymsp[0].minor.yy1, "AND NOT");
2639  *yymsp[-3].minor.yy1 &= ~*yymsp[0].minor.yy1;
2640  delete yymsp[0].minor.yy1;
2641 }
2642 #line 2643 "queryparser/queryparser_internal.cc"
2643  yy_destructor(yypParser,4,&yymsp[-2].minor);
2644  yy_destructor(yypParser,5,&yymsp[-1].minor);
2645  break;
2646  case 5: /* expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2647 #line 2326 "queryparser/queryparser.lemony"
2648 {
2649  VET_BOOL_ARGS(yymsp[-3].minor.yy1, yymsp[0].minor.yy1, "AND");
2650  *yymsp[-3].minor.yy1 &= ~*yymsp[0].minor.yy1;
2651  delete yymsp[0].minor.yy1;
2652 }
2653 #line 2654 "queryparser/queryparser_internal.cc"
2654  yy_destructor(yypParser,4,&yymsp[-2].minor);
2655  yy_destructor(yypParser,10,&yymsp[-1].minor);
2656  break;
2657  case 6: /* expr ::= bool_arg OR bool_arg */
2658 #line 2332 "queryparser/queryparser.lemony"
2659 {
2660  VET_BOOL_ARGS(yymsp[-2].minor.yy1, yymsp[0].minor.yy1, "OR");
2661  *yymsp[-2].minor.yy1 |= *yymsp[0].minor.yy1;
2662  delete yymsp[0].minor.yy1;
2663 }
2664 #line 2665 "queryparser/queryparser_internal.cc"
2665  yy_destructor(yypParser,2,&yymsp[-1].minor);
2666  break;
2667  case 7: /* expr ::= bool_arg XOR bool_arg */
2668 #line 2338 "queryparser/queryparser.lemony"
2669 {
2670  VET_BOOL_ARGS(yymsp[-2].minor.yy1, yymsp[0].minor.yy1, "XOR");
2671  *yymsp[-2].minor.yy1 ^= *yymsp[0].minor.yy1;
2672  delete yymsp[0].minor.yy1;
2673 }
2674 #line 2675 "queryparser/queryparser_internal.cc"
2675  yy_destructor(yypParser,3,&yymsp[-1].minor);
2676  break;
2677  case 8: /* expr ::= bool_arg SYN bool_arg */
2678 #line 2344 "queryparser/queryparser.lemony"
2679 {
2680  VET_BOOL_ARGS(yymsp[-2].minor.yy1, yymsp[0].minor.yy1, "SYN");
2681  *yymsp[-2].minor.yy1 = Query(Query::OP_SYNONYM, *yymsp[-2].minor.yy1, *yymsp[0].minor.yy1);
2682  delete yymsp[0].minor.yy1;
2683 }
2684 #line 2685 "queryparser/queryparser_internal.cc"
2685  yy_destructor(yypParser,12,&yymsp[-1].minor);
2686  break;
2687  case 9: /* bool_arg ::= */
2688 #line 2357 "queryparser/queryparser.lemony"
2689 {
2690  // Set the argument to NULL, which enables the bool_arg-using rules in
2691  // expr above to report uses of AND, OR, etc which don't have two
2692  // arguments.
2693  yymsp[1].minor.yy1 = NULL;
2694 }
2695 #line 2696 "queryparser/queryparser_internal.cc"
2696  break;
2697  case 10: /* prob_expr ::= prob */
2698 #line 2369 "queryparser/queryparser.lemony"
2699 {
2700  yylhsminor.yy1 = yymsp[0].minor.yy18->query;
2701  yymsp[0].minor.yy18->query = NULL;
2702  // Handle any "+ terms".
2703  if (yymsp[0].minor.yy18->love) {
2704  if (yymsp[0].minor.yy18->love->empty()) {
2705  // +<nothing>.
2706  delete yylhsminor.yy1;
2707  yylhsminor.yy1 = yymsp[0].minor.yy18->love;
2708  } else if (yylhsminor.yy1) {
2709  swap(yylhsminor.yy1, yymsp[0].minor.yy18->love);
2710  add_to_query(yylhsminor.yy1, Query::OP_AND_MAYBE, yymsp[0].minor.yy18->love);
2711  } else {
2712  yylhsminor.yy1 = yymsp[0].minor.yy18->love;
2713  }
2714  yymsp[0].minor.yy18->love = NULL;
2715  }
2716  // Handle any boolean filters.
2717  if (!yymsp[0].minor.yy18->filter.empty()) {
2718  if (yylhsminor.yy1) {
2719  add_to_query(yylhsminor.yy1, Query::OP_FILTER, yymsp[0].minor.yy18->merge_filters());
2720  } else {
2721  // Make the query a boolean one.
2722  yylhsminor.yy1 = new Query(Query::OP_SCALE_WEIGHT, yymsp[0].minor.yy18->merge_filters(), 0.0);
2723  }
2724  }
2725  // Handle any "- terms".
2726  if (yymsp[0].minor.yy18->hate && !yymsp[0].minor.yy18->hate->empty()) {
2727  if (!yylhsminor.yy1) {
2728  // Can't just hate!
2729  yy_parse_failed(yypParser);
2730  return;
2731  }
2732  *yylhsminor.yy1 = Query(Query::OP_AND_NOT, *yylhsminor.yy1, *yymsp[0].minor.yy18->hate);
2733  }
2734  delete yymsp[0].minor.yy18;
2735 }
2736 #line 2737 "queryparser/queryparser_internal.cc"
2737  yymsp[0].minor.yy1 = yylhsminor.yy1;
2738  break;
2739  case 11: /* prob ::= RANGE */
2740 #line 2417 "queryparser/queryparser.lemony"
2741 {
2742  string grouping = yymsp[0].minor.yy0->name;
2743  const Query & range = yymsp[0].minor.yy0->as_range_query();
2744  yymsp[0].minor.yy18 = new ProbQuery; /*P-overwrites-R*/
2745  yymsp[0].minor.yy18->add_filter_range(grouping, range);
2746 }
2747 #line 2748 "queryparser/queryparser_internal.cc"
2748  break;
2749  case 12: /* prob ::= stop_prob RANGE */
2750 #line 2424 "queryparser/queryparser.lemony"
2751 {
2752  string grouping = yymsp[0].minor.yy0->name;
2753  const Query & range = yymsp[0].minor.yy0->as_range_query();
2754  yymsp[-1].minor.yy18->append_filter_range(grouping, range);
2755 }
2756 #line 2757 "queryparser/queryparser_internal.cc"
2757  break;
2758  case 13: /* prob ::= stop_term stop_term */
2759 #line 2430 "queryparser/queryparser.lemony"
2760 {
2761  yymsp[-1].minor.yy18 = new ProbQuery(yymsp[-1].minor.yy1); /*P-overwrites-T*/
2762  if (yymsp[0].minor.yy1) {
2763  Query::op op = state->default_op();
2764  if (yymsp[-1].minor.yy18->query && is_positional(op)) {
2765  // If default_op is OP_NEAR or OP_PHRASE, set the window size to
2766  // 11 for the first pair of terms and it will automatically grow
2767  // by one for each subsequent term.
2768  Query * subqs[2] = { yymsp[-1].minor.yy18->query, yymsp[0].minor.yy1 };
2769  *(yymsp[-1].minor.yy18->query) = Query(op, subqs, subqs + 2, 11);
2770  delete yymsp[0].minor.yy1;
2771  } else {
2772  add_to_query(yymsp[-1].minor.yy18->query, op, yymsp[0].minor.yy1);
2773  }
2774  }
2775 }
2776 #line 2777 "queryparser/queryparser_internal.cc"
2777  break;
2778  case 14: /* prob ::= prob stop_term */
2779 #line 2447 "queryparser/queryparser.lemony"
2780 {
2781  // If yymsp[0].minor.yy1 is a stopword, there's nothing to do here.
2782  if (yymsp[0].minor.yy1) add_to_query(yymsp[-1].minor.yy18->query, state->default_op(), yymsp[0].minor.yy1);
2783 }
2784 #line 2785 "queryparser/queryparser_internal.cc"
2785  break;
2786  case 15: /* prob ::= LOVE term */
2787 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2788 #line 2452 "queryparser/queryparser.lemony"
2789 {
2790  yymsp[-1].minor.yy18 = new ProbQuery;
2791  if (state->default_op() == Query::OP_AND) {
2792  yymsp[-1].minor.yy18->query = yymsp[0].minor.yy1;
2793  } else {
2794  yymsp[-1].minor.yy18->love = yymsp[0].minor.yy1;
2795  }
2796 }
2797 #line 2798 "queryparser/queryparser_internal.cc"
2798 }
2799  break;
2800  case 16: /* prob ::= stop_prob LOVE term */
2801 #line 2461 "queryparser/queryparser.lemony"
2802 {
2803  if (state->default_op() == Query::OP_AND) {
2804  /* The default op is AND, so we just put loved terms into the query
2805  * (in this case the only effect of love is to ignore the stopword
2806  * list). */
2807  add_to_query(yymsp[-2].minor.yy18->query, Query::OP_AND, yymsp[0].minor.yy1);
2808  } else {
2809  add_to_query(yymsp[-2].minor.yy18->love, Query::OP_AND, yymsp[0].minor.yy1);
2810  }
2811 }
2812 #line 2813 "queryparser/queryparser_internal.cc"
2813  yy_destructor(yypParser,8,&yymsp[-1].minor);
2814  break;
2815  case 17: /* prob ::= HATE term */
2816 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2817 #line 2472 "queryparser/queryparser.lemony"
2818 {
2819  yymsp[-1].minor.yy18 = new ProbQuery;
2820  yymsp[-1].minor.yy18->hate = yymsp[0].minor.yy1;
2821 }
2822 #line 2823 "queryparser/queryparser_internal.cc"
2823 }
2824  break;
2825  case 18: /* prob ::= stop_prob HATE term */
2826 #line 2477 "queryparser/queryparser.lemony"
2827 {
2828  add_to_query(yymsp[-2].minor.yy18->hate, Query::OP_OR, yymsp[0].minor.yy1);
2829 }
2830 #line 2831 "queryparser/queryparser_internal.cc"
2831  yy_destructor(yypParser,9,&yymsp[-1].minor);
2832  break;
2833  case 19: /* prob ::= HATE BOOLEAN_FILTER */
2834 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2835 #line 2481 "queryparser/queryparser.lemony"
2836 {
2837  yymsp[-1].minor.yy18 = new ProbQuery;
2838  yymsp[-1].minor.yy18->hate = new Query(yymsp[0].minor.yy0->get_query());
2839  delete yymsp[0].minor.yy0;
2840 }
2841 #line 2842 "queryparser/queryparser_internal.cc"
2842 }
2843  break;
2844  case 20: /* prob ::= stop_prob HATE BOOLEAN_FILTER */
2845 #line 2487 "queryparser/queryparser.lemony"
2846 {
2847  add_to_query(yymsp[-2].minor.yy18->hate, Query::OP_OR, yymsp[0].minor.yy0->get_query());
2848  delete yymsp[0].minor.yy0;
2849 }
2850 #line 2851 "queryparser/queryparser_internal.cc"
2851  yy_destructor(yypParser,9,&yymsp[-1].minor);
2852  break;
2853  case 21: /* prob ::= BOOLEAN_FILTER */
2854 #line 2492 "queryparser/queryparser.lemony"
2855 {
2856  yylhsminor.yy18 = new ProbQuery;
2857  yylhsminor.yy18->add_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2858  delete yymsp[0].minor.yy0;
2859 }
2860 #line 2861 "queryparser/queryparser_internal.cc"
2861  yymsp[0].minor.yy18 = yylhsminor.yy18;
2862  break;
2863  case 22: /* prob ::= stop_prob BOOLEAN_FILTER */
2864 #line 2498 "queryparser/queryparser.lemony"
2865 {
2866  yymsp[-1].minor.yy18->append_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2867  delete yymsp[0].minor.yy0;
2868 }
2869 #line 2870 "queryparser/queryparser_internal.cc"
2870  break;
2871  case 23: /* prob ::= LOVE BOOLEAN_FILTER */
2872 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2873 #line 2503 "queryparser/queryparser.lemony"
2874 {
2875  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2876  yymsp[-1].minor.yy18 = new ProbQuery;
2877  yymsp[-1].minor.yy18->filter[yymsp[0].minor.yy0->get_grouping()] = yymsp[0].minor.yy0->get_query();
2878  delete yymsp[0].minor.yy0;
2879 }
2880 #line 2881 "queryparser/queryparser_internal.cc"
2881 }
2882  break;
2883  case 24: /* prob ::= stop_prob LOVE BOOLEAN_FILTER */
2884 #line 2510 "queryparser/queryparser.lemony"
2885 {
2886  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2887  // We OR filters with the same prefix...
2888  Query & q = yymsp[-2].minor.yy18->filter[yymsp[0].minor.yy0->get_grouping()];
2889  q |= yymsp[0].minor.yy0->get_query();
2890  delete yymsp[0].minor.yy0;
2891 }
2892 #line 2893 "queryparser/queryparser_internal.cc"
2893  yy_destructor(yypParser,8,&yymsp[-1].minor);
2894  break;
2895  case 25: /* stop_prob ::= stop_term */
2896 #line 2525 "queryparser/queryparser.lemony"
2897 {
2898  yymsp[0].minor.yy18 = new ProbQuery(yymsp[0].minor.yy1); /*P-overwrites-T*/
2899 }
2900 #line 2901 "queryparser/queryparser_internal.cc"
2901  break;
2902  case 26: /* stop_term ::= TERM */
2903 #line 2538 "queryparser/queryparser.lemony"
2904 {
2905  if (state->is_stopword(yymsp[0].minor.yy0)) {
2906  yylhsminor.yy1 = NULL;
2907  state->add_to_stoplist(yymsp[0].minor.yy0);
2908  } else {
2909  yylhsminor.yy1 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2910  }
2911  delete yymsp[0].minor.yy0;
2912 }
2913 #line 2914 "queryparser/queryparser_internal.cc"
2914  yymsp[0].minor.yy1 = yylhsminor.yy1;
2915  break;
2916  case 27: /* term ::= TERM */
2917 #line 2555 "queryparser/queryparser.lemony"
2918 {
2919  yylhsminor.yy1 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2920  delete yymsp[0].minor.yy0;
2921 }
2922 #line 2923 "queryparser/queryparser_internal.cc"
2923  yymsp[0].minor.yy1 = yylhsminor.yy1;
2924  break;
2925  case 28: /* compound_term ::= EDIT_TERM */
2926 #line 2570 "queryparser/queryparser.lemony"
2927 { yymsp[0].minor.yy1 = yymsp[0].minor.yy0->as_fuzzy_query(state); /*T-overwrites-U*/ }
2928 #line 2929 "queryparser/queryparser_internal.cc"
2929  break;
2930  case 29: /* compound_term ::= WILD_TERM */
2931 #line 2573 "queryparser/queryparser.lemony"
2932 { yymsp[0].minor.yy1 = yymsp[0].minor.yy0->as_wildcarded_query(state); /*T-overwrites-U*/ }
2933 #line 2934 "queryparser/queryparser_internal.cc"
2934  break;
2935  case 30: /* compound_term ::= PARTIAL_TERM */
2936 #line 2576 "queryparser/queryparser.lemony"
2937 { yymsp[0].minor.yy1 = yymsp[0].minor.yy0->as_partial_query(state); /*T-overwrites-U*/ }
2938 #line 2939 "queryparser/queryparser_internal.cc"
2939  break;
2940  case 31: /* compound_term ::= QUOTE phrase QUOTE */
2941 { yy_destructor(yypParser,21,&yymsp[-2].minor);
2942 #line 2579 "queryparser/queryparser.lemony"
2943 { yymsp[-2].minor.yy1 = yymsp[-1].minor.yy36->as_phrase_query(); }
2944 #line 2945 "queryparser/queryparser_internal.cc"
2945  yy_destructor(yypParser,21,&yymsp[0].minor);
2946 }
2947  break;
2948  case 32: /* compound_term ::= phrased_term */
2949 #line 2582 "queryparser/queryparser.lemony"
2950 { yymsp[0].minor.yy1 = yymsp[0].minor.yy36->as_phrase_query(); /*T-overwrites-P*/ }
2951 #line 2952 "queryparser/queryparser_internal.cc"
2952  break;
2953  case 33: /* compound_term ::= group */
2954 #line 2585 "queryparser/queryparser.lemony"
2955 { yymsp[0].minor.yy1 = yymsp[0].minor.yy32->as_group(state); /*T-overwrites-P*/ }
2956 #line 2957 "queryparser/queryparser_internal.cc"
2957  break;
2958  case 34: /* compound_term ::= near_expr */
2959 #line 2588 "queryparser/queryparser.lemony"
2960 { yymsp[0].minor.yy1 = yymsp[0].minor.yy36->as_near_query(); /*T-overwrites-P*/ }
2961 #line 2962 "queryparser/queryparser_internal.cc"
2962  break;
2963  case 35: /* compound_term ::= adj_expr */
2964 #line 2591 "queryparser/queryparser.lemony"
2965 { yymsp[0].minor.yy1 = yymsp[0].minor.yy36->as_adj_query(); /*T-overwrites-P*/ }
2966 #line 2967 "queryparser/queryparser_internal.cc"
2967  break;
2968  case 36: /* compound_term ::= BRA expr KET */
2969 { yy_destructor(yypParser,22,&yymsp[-2].minor);
2970 #line 2594 "queryparser/queryparser.lemony"
2971 { yymsp[-2].minor.yy1 = yymsp[-1].minor.yy1; }
2972 #line 2973 "queryparser/queryparser_internal.cc"
2973  yy_destructor(yypParser,23,&yymsp[0].minor);
2974 }
2975  break;
2976  case 37: /* compound_term ::= SYNONYM TERM */
2977 { yy_destructor(yypParser,11,&yymsp[-1].minor);
2978 #line 2596 "queryparser/queryparser.lemony"
2979 {
2980  yymsp[-1].minor.yy1 = new Query(yymsp[0].minor.yy0->get_query_with_synonyms());
2981  delete yymsp[0].minor.yy0;
2982 }
2983 #line 2984 "queryparser/queryparser_internal.cc"
2984 }
2985  break;
2986  case 38: /* compound_term ::= SYNONYM QUOTE phrase QUOTE */
2987 { yy_destructor(yypParser,11,&yymsp[-3].minor);
2988 #line 2602 "queryparser/queryparser.lemony"
2989 { yymsp[-3].minor.yy1 = yymsp[-1].minor.yy36->as_synonym_phrase_query(state); }
2990 #line 2991 "queryparser/queryparser_internal.cc"
2991  yy_destructor(yypParser,21,&yymsp[-2].minor);
2992  yy_destructor(yypParser,21,&yymsp[0].minor);
2993 }
2994  break;
2995  case 39: /* compound_term ::= UNBROKEN_WORDS */
2996 #line 2604 "queryparser/queryparser.lemony"
2997 {
2998  { yymsp[0].minor.yy1 = yymsp[0].minor.yy0->as_unbroken_query(); /*T-overwrites-U*/ }
2999 }
3000 #line 3001 "queryparser/queryparser_internal.cc"
3001  break;
3002  case 40: /* phrase ::= TERM */
3003 #line 2614 "queryparser/queryparser.lemony"
3004 {
3005  yylhsminor.yy36 = Terms::create(state);
3006  yylhsminor.yy36->add_positional_term(yymsp[0].minor.yy0);
3007 }
3008 #line 3009 "queryparser/queryparser_internal.cc"
3009  yymsp[0].minor.yy36 = yylhsminor.yy36;
3010  break;
3011  case 41: /* phrase ::= UNBROKEN_WORDS */
3012 #line 2619 "queryparser/queryparser.lemony"
3013 {
3014  yylhsminor.yy36 = Terms::create(state);
3015  yymsp[0].minor.yy0->as_positional_unbroken(yylhsminor.yy36);
3016 }
3017 #line 3018 "queryparser/queryparser_internal.cc"
3018  yymsp[0].minor.yy36 = yylhsminor.yy36;
3019  break;
3020  case 42: /* phrase ::= phrase TERM */
3021  case 45: /* phrased_term ::= phrased_term PHR_TERM */ yytestcase(yyruleno==45);
3022 #line 2624 "queryparser/queryparser.lemony"
3023 {
3024  yymsp[-1].minor.yy36->add_positional_term(yymsp[0].minor.yy0);
3025 }
3026 #line 3027 "queryparser/queryparser_internal.cc"
3027  break;
3028  case 43: /* phrase ::= phrase UNBROKEN_WORDS */
3029 #line 2628 "queryparser/queryparser.lemony"
3030 {
3031  yymsp[0].minor.yy0->as_positional_unbroken(yymsp[-1].minor.yy36);
3032 }
3033 #line 3034 "queryparser/queryparser_internal.cc"
3034  break;
3035  case 44: /* phrased_term ::= TERM PHR_TERM */
3036 #line 2639 "queryparser/queryparser.lemony"
3037 {
3038  yylhsminor.yy36 = Terms::create(state);
3039  yylhsminor.yy36->add_positional_term(yymsp[-1].minor.yy0);
3040  yylhsminor.yy36->add_positional_term(yymsp[0].minor.yy0);
3041 }
3042 #line 3043 "queryparser/queryparser_internal.cc"
3043  yymsp[-1].minor.yy36 = yylhsminor.yy36;
3044  break;
3045  case 46: /* group ::= TERM GROUP_TERM */
3046 #line 2655 "queryparser/queryparser.lemony"
3047 {
3048  yymsp[-1].minor.yy32 = TermGroup::create(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); /*P-overwrites-T*/
3049 }
3050 #line 3051 "queryparser/queryparser_internal.cc"
3051  break;
3052  case 47: /* group ::= group GROUP_TERM */
3053 #line 2659 "queryparser/queryparser.lemony"
3054 {
3055  yymsp[-1].minor.yy32->add_term(yymsp[0].minor.yy0);
3056 }
3057 #line 3058 "queryparser/queryparser_internal.cc"
3058  break;
3059  case 48: /* group ::= group EMPTY_GROUP_OK */
3060 #line 2663 "queryparser/queryparser.lemony"
3061 {
3062  yymsp[-1].minor.yy32->set_empty_ok();
3063 }
3064 #line 3065 "queryparser/queryparser_internal.cc"
3065  yy_destructor(yypParser,25,&yymsp[0].minor);
3066  break;
3067  case 49: /* near_expr ::= TERM NEAR TERM */
3068  case 51: /* adj_expr ::= TERM ADJ TERM */ yytestcase(yyruleno==51);
3069 #line 2673 "queryparser/queryparser.lemony"
3070 {
3071  yylhsminor.yy36 = Terms::create(state);
3072  yylhsminor.yy36->add_positional_term(yymsp[-2].minor.yy0);
3073  yylhsminor.yy36->add_positional_term(yymsp[0].minor.yy0);
3074  if (yymsp[-1].minor.yy0) {
3075  yylhsminor.yy36->adjust_window(yymsp[-1].minor.yy0->get_termpos());
3076  delete yymsp[-1].minor.yy0;
3077  }
3078 }
3079 #line 3080 "queryparser/queryparser_internal.cc"
3080  yymsp[-2].minor.yy36 = yylhsminor.yy36;
3081  break;
3082  case 50: /* near_expr ::= near_expr NEAR TERM */
3083  case 52: /* adj_expr ::= adj_expr ADJ TERM */ yytestcase(yyruleno==52);
3084 #line 2683 "queryparser/queryparser.lemony"
3085 {
3086  yymsp[-2].minor.yy36->add_positional_term(yymsp[0].minor.yy0);
3087  if (yymsp[-1].minor.yy0) {
3088  yymsp[-2].minor.yy36->adjust_window(yymsp[-1].minor.yy0->get_termpos());
3089  delete yymsp[-1].minor.yy0;
3090  }
3091 }
3092 #line 3093 "queryparser/queryparser_internal.cc"
3093  break;
3094  default:
3095  /* (53) expr ::= prob_expr (OPTIMIZED OUT) */ Assert(yyruleno!=53);
3096  /* (54) bool_arg ::= expr */ yytestcase(yyruleno==54);
3097  /* (55) prob_expr ::= term (OPTIMIZED OUT) */ Assert(yyruleno!=55);
3098  /* (56) stop_prob ::= prob */ yytestcase(yyruleno==56);
3099  /* (57) stop_term ::= compound_term */ yytestcase(yyruleno==57);
3100  /* (58) term ::= compound_term */ yytestcase(yyruleno==58);
3101  break;
3102 /********** End reduce actions ************************************************/
3103  }
3104  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
3105  yygoto = yyRuleInfo[yyruleno].lhs;
3106  yysize = yyRuleInfo[yyruleno].nrhs;
3107  yyact = yy_find_reduce_action(yymsp[yysize].stateno,static_cast<YYCODETYPE>(yygoto));
3108 
3109  /* There are no SHIFTREDUCE actions on nonterminals because the table
3110  ** generator has simplified them to pure REDUCE actions. */
3111  Assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );
3112 
3113  /* It is not possible for a REDUCE to be followed by an error */
3114  Assert( yyact!=YY_ERROR_ACTION );
3115 
3116  yymsp += yysize+1;
3117  if (yysize) {
3118  yypParser->yystack.resize(UNSIGNED_OVERFLOW_OK(yypParser->yystack.size() +
3119  (yysize+1)));
3120  }
3121  yymsp->stateno = static_cast<YYACTIONTYPE>(yyact);
3122  yymsp->major = static_cast<YYCODETYPE>(yygoto);
3123  yyTraceShift(yypParser, yyact, "... then shift");
3124 }
3125 
3126 /*
3127 ** The following code executes when the parse fails
3128 */
3129 #ifndef YYNOERRORRECOVERY
3130 static void yy_parse_failed(
3131  yyParser *yypParser /* The parser */
3132 ){
3134  LOGLINE(QUERYPARSER, "Fail!");
3135  while( yypParser->yystack.size() > 1 ) yy_pop_parser_stack(yypParser);
3136  /* Here code is inserted which will be executed whenever the
3137  ** parser fails */
3138 /************ Begin %parse_failure code ***************************************/
3139 #line 2221 "queryparser/queryparser.lemony"
3140 
3141  // If we've not already set an error message, set a default one.
3142  if (!state->error) state->error = "parse error";
3143 #line 3144 "queryparser/queryparser_internal.cc"
3144 /************ End %parse_failure code *****************************************/
3145  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
3146 }
3147 #endif /* YYNOERRORRECOVERY */
3148 
3149 /*
3150 ** The following code executes when a syntax error first occurs.
3151 */
3152 static void yy_syntax_error(
3153  yyParser *yypParser, /* The parser */
3154  int yymajor, /* The major type of the error token */
3155  ParseTOKENTYPE yyminor /* The minor type of the error token */
3156 ){
3158  (void)yymajor;
3159  (void)yyminor;
3160 #define TOKEN yyminor
3161 /************ Begin %syntax_error code ****************************************/
3162 #line 2226 "queryparser/queryparser.lemony"
3163 
3164  yy_parse_failed(yypParser);
3165 #line 3166 "queryparser/queryparser_internal.cc"
3166 /************ End %syntax_error code ******************************************/
3167  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
3168 }
3169 
3170 /*
3171 ** The following is executed when the parser accepts
3172 */
3173 static void yy_accept(
3174  yyParser *yypParser /* The parser */
3175 ){
3177  LOGLINE(QUERYPARSER, "Accept!");
3178 #ifndef YYNOERRORRECOVERY
3179  yypParser->yyerrcnt = -1;
3180 #endif
3181  AssertEq( yypParser->yystack.size(), 1 );
3182  /* Here code is inserted which will be executed whenever the
3183  ** parser accepts */
3184 /*********** Begin %parse_accept code *****************************************/
3185 /*********** End %parse_accept code *******************************************/
3186  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
3187 }
3188 
3189 /* The main parser program.
3190 ** The first argument is a pointer to a structure obtained from
3191 ** "ParseAlloc" which describes the current state of the parser.
3192 ** The second argument is the major token number. The third is
3193 ** the minor token. The fourth optional argument is whatever the
3194 ** user wants (and specified in the grammar) and is available for
3195 ** use by the action routines.
3196 **
3197 ** Inputs:
3198 ** <ul>
3199 ** <li> A pointer to the parser (an opaque structure.)
3200 ** <li> The major token number.
3201 ** <li> The minor token number.
3202 ** <li> An option argument of a grammar-specified type.
3203 ** </ul>
3204 **
3205 ** Outputs:
3206 ** None.
3207 */
3208 static
3209 void Parse(
3210  yyParser *yypParser, /* The parser */
3211  int yymajor, /* The major token code number */
3212  ParseTOKENTYPE yyminor /* The value for the token */
3213  ParseARG_PDECL /* Optional %extra_argument parameter */
3214 ){
3215  YYMINORTYPE yyminorunion;
3216  unsigned int yyact; /* The parser action. */
3217 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
3218  int yyendofinput; /* True if we are at the end of input */
3219 #endif
3220 #ifdef YYERRORSYMBOL
3221  int yyerrorhit = 0; /* True if yymajor has invoked an error */
3222 #endif
3223 
3224 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
3225  yyendofinput = (yymajor==0);
3226 #endif
3228 
3229 #ifdef XAPIAN_DEBUG_LOG
3230  {
3231  int stateno = yypParser->yystack.back().stateno;
3232  if( stateno < YY_MIN_REDUCE ){
3233  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
3234  "'," << (yyminor ? yyminor->name : "<<null>>") <<
3235  "in state " << stateno);
3236  }else{
3237  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
3238  "'," << (yyminor ? yyminor->name : "<<null>>") <<
3239  "with pending reduce " << stateno-YY_MIN_REDUCE);
3240  }
3241  }
3242 #endif
3243 
3244  do{
3245  yyact = yy_find_shift_action(yypParser,static_cast<YYCODETYPE>(yymajor));
3246  if( yyact >= YY_MIN_REDUCE ){
3247  yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,yyminor);
3248  }else if( yyact <= YY_MAX_SHIFTREDUCE ){
3249  yy_shift(yypParser,yyact,yymajor,yyminor);
3250 #ifndef YYNOERRORRECOVERY
3251  yypParser->yyerrcnt--;
3252 #endif
3253  yymajor = YYNOCODE;
3254  }else if( yyact==YY_ACCEPT_ACTION ){
3255  yypParser->yystack.pop_back();
3256  yy_accept(yypParser);
3257  return;
3258  }else{
3259  Assert( yyact == YY_ERROR_ACTION );
3260  yyminorunion.yy0 = yyminor;
3261 #ifdef YYERRORSYMBOL
3262  int yymx;
3263 #endif
3264  LOGLINE(QUERYPARSER, "Syntax Error!");
3265 #ifdef YYERRORSYMBOL
3266  /* A syntax error has occurred.
3267  ** The response to an error depends upon whether or not the
3268  ** grammar defines an error token "ERROR".
3269  **
3270  ** This is what we do if the grammar does define ERROR:
3271  **
3272  ** * Call the %syntax_error function.
3273  **
3274  ** * Begin popping the stack until we enter a state where
3275  ** it is legal to shift the error symbol, then shift
3276  ** the error symbol.
3277  **
3278  ** * Set the error count to three.
3279  **
3280  ** * Begin accepting and shifting new tokens. No new error
3281  ** processing will occur until three tokens have been
3282  ** shifted successfully.
3283  **
3284  */
3285  if( yypParser->yyerrcnt<0 ){
3286  yy_syntax_error(yypParser,yymajor,yyminor);
3287  }
3288  yymx = yypParser->yystack.back().major;
3289  if( yymx==YYERRORSYMBOL || yyerrorhit ){
3290  LOGLINE(QUERYPARSER, "Discard input token " << ParseTokenName(yymajor));
3291  yy_destructor(yypParser, static_cast<YYCODETYPE>(yymajor), &yyminorunion);
3292  yymajor = YYNOCODE;
3293  }else{
3294  while( !yypParser->yystack.empty()
3295  && yymx != YYERRORSYMBOL
3296  && (yyact = yy_find_reduce_action(
3297  yypParser->yystack.back().stateno,
3298  YYERRORSYMBOL)) >= YY_MIN_REDUCE
3299  ){
3300  yy_pop_parser_stack(yypParser);
3301  }
3302  if( yypParser->yystack.empty() || yymajor==0 ){
3303  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
3304  yy_parse_failed(yypParser);
3305 #ifndef YYNOERRORRECOVERY
3306  yypParser->yyerrcnt = -1;
3307 #endif
3308  yymajor = YYNOCODE;
3309  }else if( yymx!=YYERRORSYMBOL ){
3310  yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
3311  }
3312  }
3313  yypParser->yyerrcnt = 3;
3314  yyerrorhit = 1;
3315 #elif defined(YYNOERRORRECOVERY)
3316  /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
3317  ** do any kind of error recovery. Instead, simply invoke the syntax
3318  ** error routine and continue going as if nothing had happened.
3319  **
3320  ** Applications can set this macro (for example inside %include) if
3321  ** they intend to abandon the parse upon the first syntax error seen.
3322  */
3323  yy_syntax_error(yypParser,yymajor, yyminor);
3324  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
3325  yymajor = YYNOCODE;
3326 
3327 #else /* YYERRORSYMBOL is not defined */
3328  /* This is what we do if the grammar does not define ERROR:
3329  **
3330  ** * Report an error message, and throw away the input token.
3331  **
3332  ** * If the input token is $, then fail the parse.
3333  **
3334  ** As before, subsequent error messages are suppressed until
3335  ** three input tokens have been successfully shifted.
3336  */
3337  if( yypParser->yyerrcnt<=0 ){
3338  yy_syntax_error(yypParser,yymajor, yyminor);
3339  }
3340  yypParser->yyerrcnt = 3;
3341  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
3342  if( yyendofinput ){
3343  yy_parse_failed(yypParser);
3344 #ifndef YYNOERRORRECOVERY
3345  yypParser->yyerrcnt = -1;
3346 #endif
3347  }
3348  yymajor = YYNOCODE;
3349 #endif
3350  }
3351  }while( yymajor!=YYNOCODE && yypParser->yystack.size() > 1 );
3352 #ifdef XAPIAN_DEBUG_LOG
3353  {
3354  int i;
3355  LOGLINE(QUERYPARSER, "Return. Stack=");
3356  for(i=1; i<=(int)yypParser->yystack.size(); i++)
3357  LOGLINE(QUERYPARSER, yyTokenName[yypParser->yystack[i].major]);
3358  }
3359 #endif
3360  return;
3361 }
3362 
3363 // Select C++ syntax highlighting in vim editor: vim: syntax=cpp
3364 #line 1088 "queryparser/queryparser.lemony"
3365 
3366 
3367 Query
3368 QueryParser::Internal::parse_query(string_view qs, unsigned flags,
3369  string_view default_prefix)
3370 {
3371 #ifndef USE_ICU
3372  // Overall it seems best to check for this up front - otherwise we create
3373  // the unhelpful situation where a failure to enable ICU in the build could
3374  // be missed because queries in scripts which don't need word splitting
3375  // still work fine.
3376  if (flags & FLAG_WORD_BREAKS) {
3377  throw Xapian::FeatureUnavailableError("FLAG_WORD_BREAKS requires "
3378  "building Xapian to use ICU");
3379  }
3380 #endif
3381  bool try_word_break =
3382  (flags & (FLAG_NGRAMS|FLAG_WORD_BREAKS)) || is_ngram_enabled();
3383 
3384  // Set ranges if we may have to handle ranges in the query.
3385  bool ranges = !rangeprocs.empty() && (qs.find("..") != string::npos);
3386 
3387  termpos term_pos = 1;
3388  Utf8Iterator it(qs), end;
3389 
3390  State state(this, flags);
3391 
3392  // To successfully apply more than one spelling correction to a query
3393  // string, we must keep track of the offset due to previous corrections.
3394  int correction_offset = 0;
3395  corrected_query.resize(0);
3396 
3397  // Stack of prefixes, used for phrases and subexpressions.
3398  list<const FieldInfo *> prefix_stack;
3399 
3400  // If default_prefix is specified, use it. Otherwise, use any list
3401  // that has been set for the empty prefix.
3402  const FieldInfo def_pfx = FieldInfo{NON_BOOLEAN}.append(default_prefix);
3403  {
3404  const FieldInfo * default_field_info = &def_pfx;
3405  if (default_prefix.empty()) {
3406  auto f = field_map.find(string_view{});
3407  if (f != field_map.end()) default_field_info = &(f->second);
3408  }
3409 
3410  // We always have the current prefix on the top of the stack.
3411  prefix_stack.push_back(default_field_info);
3412  }
3413 
3414  yyParser parser;
3415 
3416  unsigned newprev = ' ';
3417 main_lex_loop:
3418  enum {
3419  DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP,
3420  IN_GROUP2, EXPLICIT_SYNONYM
3421  } mode = DEFAULT;
3422  while (it != end && !state.error) {
3423  bool last_was_operator = false;
3424  bool last_was_operator_needing_term = false;
3425  if (mode == EXPLICIT_SYNONYM) mode = DEFAULT;
3426  if (false) {
3427 just_had_operator:
3428  if (it == end) break;
3429  mode = DEFAULT;
3430  last_was_operator_needing_term = false;
3431  last_was_operator = true;
3432  }
3433  if (false) {
3434 just_had_operator_needing_term:
3435  last_was_operator_needing_term = true;
3436  last_was_operator = true;
3437  }
3438  if (mode == IN_PHRASED_TERM) mode = DEFAULT;
3439  if (is_whitespace(*it)) {
3440  newprev = ' ';
3441  ++it;
3442  it = find_if(it, end, is_not_whitespace);
3443  if (it == end) break;
3444  }
3445 
3446  if (ranges &&
3447  (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2)) {
3448  // Scan forward to see if this could be the "start of range"
3449  // token. Sadly this has O(n²) tendencies, though at least
3450  // "n" is the number of words in a query which is likely to
3451  // remain fairly small. FIXME: can we tokenise more elegantly?
3452  Utf8Iterator it_initial = it;
3453  Utf8Iterator p = it;
3454  unsigned ch = 0;
3455  while (p != end) {
3456  if (ch == '.' && *p == '.') {
3457  string a;
3458  while (it != p) {
3459  Unicode::append_utf8(a, *it++);
3460  }
3461  // Trim off the trailing ".".
3462  a.resize(a.size() - 1);
3463  ++p;
3464  // Either end of the range can be empty (for an open-ended
3465  // range) but both can't be empty.
3466  if (!a.empty() || (p != end && *p > ' ' && *p != ')')) {
3467  string b;
3468  // Allow any character except whitespace and ')' in the
3469  // upper bound.
3470  while (p != end && *p > ' ' && *p != ')') {
3471  Unicode::append_utf8(b, *p++);
3472  }
3473  Term * range = state.range(a, b);
3474  if (!range) {
3475  state.error = "Unknown range operation";
3476  if (a.find(':', 1) == string::npos) {
3477  goto done;
3478  }
3479  // Might be a boolean filter with ".." in. Leave
3480  // state.error in case it isn't.
3481  it = it_initial;
3482  break;
3483  }
3484  Parse(&parser, RANGE, range, &state);
3485  }
3486  it = p;
3487  goto main_lex_loop;
3488  }
3489  ch = *p;
3490  // Allow any character except whitespace and '(' in the lower
3491  // bound.
3492  if (ch <= ' ' || ch == '(') break;
3493  ++p;
3494  }
3495  }
3496 
3497  if (!is_wordchar(*it)) {
3498  unsigned prev = newprev;
3499  Utf8Iterator p = it;
3500  unsigned ch = *it++;
3501  newprev = ch;
3502  // Drop out of IN_GROUP mode.
3503  if (mode == IN_GROUP || mode == IN_GROUP2)
3504  mode = DEFAULT;
3505  switch (ch) {
3506  case '"':
3507  case 0x201c: // Left curly double quote.
3508  case 0x201d: // Right curly double quote.
3509  // Quoted phrase.
3510  if (mode == DEFAULT) {
3511  // Skip whitespace.
3512  it = find_if(it, end, is_not_whitespace);
3513  if (it == end) {
3514  // Ignore an unmatched " at the end of the query to
3515  // avoid generating an empty pair of QUOTEs which will
3516  // cause a parse error.
3517  goto done;
3518  }
3519  if (is_double_quote(*it)) {
3520  // Ignore empty "" (but only if we're not already
3521  // IN_QUOTES as we don't merge two adjacent quoted
3522  // phrases!)
3523  newprev = *it++;
3524  break;
3525  }
3526  }
3527  if (flags & QueryParser::FLAG_PHRASE) {
3528  if (ch == '"' && it != end && *it == '"') {
3529  ++it;
3530  // Handle "" inside a quoted phrase as an escaped " for
3531  // consistency with quoted boolean terms.
3532  break;
3533  }
3534  Parse(&parser, QUOTE, NULL, &state);
3535  if (mode == DEFAULT) {
3536  mode = IN_QUOTES;
3537  } else {
3538  // Remove the prefix we pushed for this phrase.
3539  if (mode == IN_PREFIXED_QUOTES)
3540  prefix_stack.pop_back();
3541  mode = DEFAULT;
3542  }
3543  }
3544  break;
3545 
3546  case '+': case '-': // Loved or hated term/phrase/subexpression.
3547  // Ignore + or - at the end of the query string.
3548  if (it == end) goto done;
3549  if (prev > ' ' && prev != '(') {
3550  // Or if not after whitespace or an open bracket.
3551  break;
3552  }
3553  if (is_whitespace(*it) || *it == '+' || *it == '-') {
3554  // Ignore + or - followed by a space, or further + or -.
3555  // Postfix + (such as in C++ and H+) is handled as part of
3556  // the term lexing code in parse_term().
3557  newprev = *it++;
3558  break;
3559  }
3560  if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
3561  int token;
3562  if (ch == '+') {
3563  token = LOVE;
3564  } else if (last_was_operator) {
3565  token = HATE_AFTER_AND;
3566  } else {
3567  token = HATE;
3568  }
3569  Parse(&parser, token, NULL, &state);
3570  goto just_had_operator_needing_term;
3571  }
3572  // Need to prevent the term after a LOVE or HATE starting a
3573  // term group...
3574  break;
3575 
3576  case '(': // Bracketed subexpression.
3577  // Skip whitespace.
3578  it = find_if(it, end, is_not_whitespace);
3579  // Ignore ( at the end of the query string.
3580  if (it == end) goto done;
3581  if (prev > ' ' && strchr("()+-", prev) == NULL) {
3582  // Or if not after whitespace or a bracket or '+' or '-'.
3583  break;
3584  }
3585  if (*it == ')') {
3586  // Ignore empty ().
3587  newprev = *it++;
3588  break;
3589  }
3590  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3591  prefix_stack.push_back(prefix_stack.back());
3592  Parse(&parser, BRA, NULL, &state);
3593  }
3594  break;
3595 
3596  case ')': // End of bracketed subexpression.
3597  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3598  // Remove the prefix we pushed for the corresponding BRA.
3599  // If brackets are unmatched, it's a syntax error, but
3600  // that's no excuse to SEGV!
3601  if (prefix_stack.size() > 1) prefix_stack.pop_back();
3602  Parse(&parser, KET, NULL, &state);
3603  }
3604  break;
3605 
3606  case '~': // Synonym expansion.
3607  // Ignore at the end of the query string.
3608  if (it == end) goto done;
3609  if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
3610  if (prev > ' ' && strchr("+-(", prev) == NULL) {
3611  // Or if not after whitespace, +, -, or an open bracket.
3612  break;
3613  }
3614  if (!is_wordchar(*it) && !is_double_quote(*it)) {
3615  // Ignore if not followed by a word character.
3616  break;
3617  }
3618  Parse(&parser, SYNONYM, NULL, &state);
3619  mode = EXPLICIT_SYNONYM;
3620  if (!is_double_quote(*it))
3621  goto just_had_operator_needing_term;
3622 
3623  // Support ~"foo bar" syntax to explicitly expand
3624  // a multi-word synonym.
3625 
3626  // Skip whitespace.
3627  ++it;
3628  it = find_if(it, end, is_not_whitespace);
3629  if (it == end) {
3630  // Ignore an unmatched " at the end of the query to
3631  // avoid generating an empty pair of QUOTEs which will
3632  // cause a parse error.
3633  goto done;
3634  }
3635  if (is_double_quote(*it)) {
3636  // Ignore empty ~"".
3637  newprev = *it++;
3638  break;
3639  }
3640  Parse(&parser, QUOTE, NULL, &state);
3641  mode = IN_QUOTES;
3642  }
3643  break;
3644  case '*':
3645  if (flags & FLAG_WILDCARD_MULTI) {
3646  it = p;
3647  goto leading_wildcard;
3648  }
3649  break;
3650  case '?':
3651  if (flags & FLAG_WILDCARD_SINGLE) {
3652  it = p;
3653  goto leading_wildcard;
3654  }
3655  break;
3656  }
3657  // Skip any other characters.
3658  continue;
3659  }
3660 
3661  Assert(is_wordchar(*it));
3662 
3663 leading_wildcard:
3664  size_t term_start_index = it.raw() - qs.data();
3665 
3666  newprev = 'A'; // Any letter will do...
3667 
3668  // A term, a prefix, or a boolean operator.
3669  const FieldInfo * field_info = NULL;
3670  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2 || mode == EXPLICIT_SYNONYM) &&
3671  !field_map.empty()) {
3672  // Check for a fieldname prefix (e.g. title:historical).
3673  Utf8Iterator p = find_if(it, end, is_not_wordchar);
3674  if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
3675  string field;
3676  p = it;
3677  while (*p != ':')
3678  Unicode::append_utf8(field, *p++);
3679  auto f = field_map.find(field);
3680  if (f != field_map.end()) {
3681  // Special handling for prefixed fields, depending on the
3682  // type of the prefix.
3683  unsigned ch = *++p;
3684  field_info = &(f->second);
3685 
3686  if (field_info->type != NON_BOOLEAN) {
3687  // Drop out of IN_GROUP if we're in it.
3688  if (mode == IN_GROUP || mode == IN_GROUP2)
3689  mode = DEFAULT;
3690  it = p;
3691  string name;
3692  if (it != end && is_double_quote(*it)) {
3693  // Quoted boolean term (can contain any character).
3694  bool fancy = (*it != '"');
3695  ++it;
3696  while (it != end) {
3697  if (*it == '"') {
3698  // Interpret "" as an escaped ".
3699  if (++it == end || *it != '"')
3700  break;
3701  } else if (fancy && is_double_quote(*it)) {
3702  // If the opening quote was ASCII, then the
3703  // closing one must be too - otherwise
3704  // the user can't protect non-ASCII double
3705  // quote characters by quoting or escaping.
3706  ++it;
3707  break;
3708  }
3709  Unicode::append_utf8(name, *it++);
3710  }
3711  } else {
3712  // Can't boolean filter prefix a subexpression, so
3713  // just use anything following the prefix until the
3714  // next space or ')' as part of the boolean filter
3715  // term.
3716  while (it != end && *it > ' ' && *it != ')')
3717  Unicode::append_utf8(name, *it++);
3718  }
3719  // Build the unstemmed form in field.
3720  field += ':';
3721  field += name;
3722  // Clear any pending range error.
3723  state.error = NULL;
3724  Term * token = new Term(&state, name, field_info, field);
3725  Parse(&parser, BOOLEAN_FILTER, token, &state);
3726  continue;
3727  }
3728 
3729  if ((flags & FLAG_PHRASE) && is_double_quote(ch)) {
3730  // Prefixed phrase, e.g.: subject:"space flight"
3731  mode = IN_PREFIXED_QUOTES;
3732  Parse(&parser, QUOTE, NULL, &state);
3733  it = p;
3734  newprev = ch;
3735  ++it;
3736  prefix_stack.push_back(field_info);
3737  continue;
3738  }
3739 
3740  if (ch == '(' && (flags & FLAG_BOOLEAN)) {
3741  // Prefixed subexpression, e.g.: title:(fast NEAR food)
3742  mode = DEFAULT;
3743  Parse(&parser, BRA, NULL, &state);
3744  it = p;
3745  newprev = ch;
3746  ++it;
3747  prefix_stack.push_back(field_info);
3748  continue;
3749  }
3750 
3751  if (ch != ':') {
3752  // Allow 'path:/usr/local' but not 'foo::bar::baz'.
3753  while (is_phrase_generator(ch)) {
3754  if (++p == end)
3755  goto not_prefix;
3756  ch = *p;
3757  }
3758  }
3759 
3760  if (is_wordchar(ch) || is_extended_wildcard(ch, flags)) {
3761  // Prefixed term.
3762  it = p;
3763  } else {
3764 not_prefix:
3765  // It looks like a prefix but isn't, so parse it as
3766  // text instead.
3767  field_info = NULL;
3768  }
3769  }
3770  }
3771  }
3772 
3773 phrased_term:
3774  bool was_acronym;
3775  bool needs_word_break = false;
3776  size_t first_wildcard = string::npos;
3777  size_t term_char_count;
3778  unsigned edit_distance = NO_EDIT_DISTANCE;
3779  string term = parse_term(it, end, try_word_break, flags,
3780  needs_word_break, was_acronym, first_wildcard,
3781  term_char_count, edit_distance);
3782 
3783  if (first_wildcard == string::npos &&
3784  edit_distance == NO_EDIT_DISTANCE &&
3785  (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) &&
3786  (flags & FLAG_BOOLEAN) &&
3787  // Don't want to interpret A.N.D. as an AND operator.
3788  !was_acronym &&
3789  !field_info &&
3790  term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {
3791  // Boolean operators.
3792  string op = term;
3793  if (flags & FLAG_BOOLEAN_ANY_CASE) {
3794  for (string::iterator i = op.begin(); i != op.end(); ++i) {
3795  *i = C_toupper(*i);
3796  }
3797  }
3798  if (op.size() == 3) {
3799  if (op == "AND") {
3800  Parse(&parser, AND, NULL, &state);
3801  goto just_had_operator;
3802  }
3803  if (op == "NOT") {
3804  Parse(&parser, NOT, NULL, &state);
3805  goto just_had_operator;
3806  }
3807  if (op == "XOR") {
3808  Parse(&parser, XOR, NULL, &state);
3809  goto just_had_operator;
3810  }
3811  if (op == "ADJ") {
3812  if (it != end && *it == '/') {
3813  size_t width = 0;
3814  Utf8Iterator p = it;
3815  while (++p != end && U_isdigit(*p)) {
3816  width = (width * 10) + (*p - '0');
3817  }
3818  if (width && (p == end || is_whitespace(*p))) {
3819  it = p;
3820  Parse(&parser, ADJ, new Term(width), &state);
3821  goto just_had_operator;
3822  }
3823  } else {
3824  Parse(&parser, ADJ, NULL, &state);
3825  goto just_had_operator;
3826  }
3827  }
3828  if (op == "SYN") {
3829  Parse(&parser, SYN, NULL, &state);
3830  goto just_had_operator;
3831  }
3832  } else if (op.size() == 2) {
3833  if (op == "OR") {
3834  Parse(&parser, OR, NULL, &state);
3835  goto just_had_operator;
3836  }
3837  } else if (op.size() == 4) {
3838  if (op == "NEAR") {
3839  if (it != end && *it == '/') {
3840  size_t width = 0;
3841  Utf8Iterator p = it;
3842  while (++p != end && U_isdigit(*p)) {
3843  width = (width * 10) + (*p - '0');
3844  }
3845  if (width && (p == end || is_whitespace(*p))) {
3846  it = p;
3847  Parse(&parser, NEAR, new Term(width), &state);
3848  goto just_had_operator;
3849  }
3850  } else {
3851  Parse(&parser, NEAR, NULL, &state);
3852  goto just_had_operator;
3853  }
3854  }
3855  }
3856  }
3857 
3858  // If no prefix is set, use the default one.
3859  if (!field_info) field_info = prefix_stack.back();
3860 
3861  Assert(field_info->type == NON_BOOLEAN);
3862 
3863  {
3864  string unstemmed_term(term);
3866 
3867  // Reuse stem_strategy - STEM_SOME here means "stem terms except
3868  // when used with positional operators".
3869  stem_strategy stem_term = stem_action;
3870  if (stem_term != STEM_NONE) {
3871  if (stemmer.is_none()) {
3872  stem_term = STEM_NONE;
3873  } else if (first_wildcard != string::npos ||
3874  edit_distance != NO_EDIT_DISTANCE) {
3875  stem_term = STEM_NONE;
3876  } else if (stem_term == STEM_SOME ||
3877  stem_term == STEM_SOME_FULL_POS) {
3878  if (!should_stem(unstemmed_term, state)) {
3879  // E.g. don't stem `Tony` or `Keating`.
3880  stem_term = STEM_NONE;
3881  } else if (it != end && is_stem_preventer(*it)) {
3882  // E.g. don't stem `tony` in `tony@example.org`.
3883  stem_term = STEM_NONE;
3884  }
3885  }
3886  }
3887 
3888  if (first_wildcard != string::npos) {
3889  if (first_wildcard < state.get_min_wildcard_prefix_len()) {
3890  errmsg = "Too few characters before wildcard";
3891  return state.query;
3892  }
3893  }
3894 
3895  Term * term_obj = new Term(&state, term, field_info,
3896  unstemmed_term, stem_term, term_pos++,
3897  edit_distance);
3898 
3899  if (first_wildcard != string::npos ||
3900  edit_distance != NO_EDIT_DISTANCE) {
3901  if (mode == IN_GROUP || mode == IN_GROUP2) {
3902  // Drop out of IN_GROUP and flag that the group
3903  // can be empty if all members are stopwords.
3904  if (mode == IN_GROUP2)
3905  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3906  mode = DEFAULT;
3907  }
3908  Parse(&parser,
3909  first_wildcard != string::npos ? WILD_TERM : EDIT_TERM,
3910  term_obj,
3911  &state);
3912  continue;
3913  }
3914 
3915  if (needs_word_break) {
3916  Parse(&parser, UNBROKEN_WORDS, term_obj, &state);
3917  // Drop out of IN_GROUP mode.
3918  if (mode == IN_GROUP || mode == IN_GROUP2)
3919  mode = DEFAULT;
3920  if (it == end) break;
3921  continue;
3922  }
3923 
3924  if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3925  if (it == end && (flags & FLAG_PARTIAL)) {
3926  auto min_len = state.get_min_partial_prefix_len();
3927  if (term_char_count >= min_len) {
3928  if (mode == IN_GROUP || mode == IN_GROUP2) {
3929  // Drop out of IN_GROUP and flag that the group
3930  // can be empty if all members are stopwords.
3931  if (mode == IN_GROUP2)
3932  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3933  mode = DEFAULT;
3934  }
3935  // Final term of a partial match query, with no
3936  // following characters - treat as a wildcard.
3937  Parse(&parser, PARTIAL_TERM, term_obj, &state);
3938  continue;
3939  }
3940  }
3941  }
3942 
3943  // Check spelling, if we're a normal term, and any of the prefixes
3944  // are empty.
3945  if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
3946  const auto& prefixes = field_info->prefixes;
3947  for (const string& prefix : prefixes) {
3948  if (!prefix.empty())
3949  continue;
3950  const string & suggest = db.get_spelling_suggestion(term);
3951  if (!suggest.empty()) {
3952  if (corrected_query.empty()) corrected_query = qs;
3953  size_t term_end_index = it.raw() - qs.data();
3954  size_t n = term_end_index - term_start_index;
3955  size_t pos = UNSIGNED_OVERFLOW_OK(term_start_index + correction_offset);
3956  corrected_query.replace(pos, n, suggest);
3957  UNSIGNED_OVERFLOW_OK(correction_offset += suggest.size());
3958  UNSIGNED_OVERFLOW_OK(correction_offset -= n);
3959  }
3960  break;
3961  }
3962  }
3963 
3964  if (mode == IN_PHRASED_TERM) {
3965  Parse(&parser, PHR_TERM, term_obj, &state);
3966  } else {
3967  // See if the next token will be PHR_TERM - if so, this one
3968  // needs to be TERM not GROUP_TERM.
3969  if ((mode == IN_GROUP || mode == IN_GROUP2) &&
3970  is_phrase_generator(*it)) {
3971  // FIXME: can we clean this up?
3972  Utf8Iterator p = it;
3973  do {
3974  ++p;
3975  } while (p != end && is_phrase_generator(*p));
3976  // Don't generate a phrase unless the phrase generators are
3977  // immediately followed by another term.
3978  if (p != end && is_wordchar(*p)) {
3979  mode = DEFAULT;
3980  }
3981  }
3982 
3983  int token = TERM;
3984  if (mode == IN_GROUP || mode == IN_GROUP2) {
3985  mode = IN_GROUP2;
3986  token = GROUP_TERM;
3987  }
3988  Parse(&parser, token, term_obj, &state);
3989  if (token == TERM && mode != DEFAULT)
3990  continue;
3991  }
3992  }
3993 
3994  if (it == end) break;
3995 
3996  if (is_phrase_generator(*it)) {
3997  // Skip multiple phrase generators.
3998  do {
3999  ++it;
4000  } while (it != end && is_phrase_generator(*it));
4001  // Don't generate a phrase unless the phrase generators are
4002  // immediately followed by another term.
4003  if (it != end && is_wordchar(*it)) {
4004  mode = IN_PHRASED_TERM;
4005  term_start_index = it.raw() - qs.data();
4006  goto phrased_term;
4007  }
4008  } else if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
4009  int old_mode = mode;
4010  mode = DEFAULT;
4011  if (!last_was_operator_needing_term && is_whitespace(*it)) {
4012  newprev = ' ';
4013  // Skip multiple whitespace.
4014  do {
4015  ++it;
4016  } while (it != end && is_whitespace(*it));
4017  // Don't generate a group unless the terms are only separated
4018  // by whitespace.
4019  if (it != end && is_wordchar(*it)) {
4020  if (old_mode == IN_GROUP || old_mode == IN_GROUP2) {
4021  mode = IN_GROUP2;
4022  } else {
4023  mode = IN_GROUP;
4024  }
4025  }
4026  }
4027  }
4028  }
4029 done:
4030  if (!state.error) {
4031  // Implicitly close any unclosed quotes.
4032  if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
4033  Parse(&parser, QUOTE, NULL, &state);
4034 
4035  // Implicitly close all unclosed brackets.
4036  while (prefix_stack.size() > 1) {
4037  Parse(&parser, KET, NULL, &state);
4038  prefix_stack.pop_back();
4039  }
4040  Parse(&parser, 0, NULL, &state);
4041  }
4042 
4043  errmsg = state.error;
4044  return state.query;
4045 }
4046 
4047 #line 4048 "queryparser/queryparser_internal.cc"
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
char name[9]
Definition: dbcheck.cc:57
Iterator returning unigrams and bigrams.
Definition: word-breaker.h:71
Parser State shared between the lexer and the parser.
unsigned get_min_partial_prefix_len() const
unsigned flags
State(QueryParser::Internal *qpi_, unsigned flags_)
Query::op default_op() const
Xapian::termcount get_max_partial_expansion() const
Term * range(const string &a, const string &b)
Database get_database() const
Xapian::termcount get_max_fuzzy_expansion() const
int get_max_partial_type() const
void add_to_stoplist(const Term *term)
void stoplist_resize(size_t s)
int get_max_wildcard_type() const
const Stopper * get_stopper() const
unsigned get_min_wildcard_prefix_len() const
int get_max_fuzzy_type() const
Query::op effective_default_op
QueryParser::Internal * qpi
Xapian::termcount get_max_wildcard_expansion() const
QueryParser::stop_strategy get_stopper_strategy() const
void add_to_unstem(const string &term, const string &unstemmed)
const char * error
size_t stoplist_size() const
string stem_term(const string &term)
bool is_stopword(const Term *term) const
unsigned int should_stem_mask
Xapian::Query value_type
bool operator!=(const SynonymIterator &o) const
bool operator==(const SynonymIterator &o) const
const Xapian::Query operator*() const
SynonymIterator(const Xapian::TermIterator &i_, Xapian::termpos pos_=0, const Xapian::Query *first_=NULL)
SynonymIterator & operator++()
const Xapian::Query * first
Xapian::TermIterator i
std::input_iterator_tag iterator_category
Xapian::termcount_diff difference_type
Xapian::Query * pointer
Xapian::Query & reference
A group of terms separated only by whitespace.
Query * as_group(State *state) const
Convert to a Xapian::Query * using default_op.
TermGroup(Term *t1, Term *t2)
static TermGroup * create(Term *t1, Term *t2)
Factory function - ensures heap allocation.
void add_term(Term *term)
Add a Term object to this TermGroup object.
bool empty_ok
Controls how to handle a group where all terms are stopwords.
vector< Term * > terms
void set_empty_ok()
Set the empty_ok flag.
Class used to pass information about a token from lexer to parser.
string get_grouping() const
Term(const string &name_, termpos pos_)
QueryParser::stem_strategy stem
const FieldInfo * field_info
Term(const string &name_)
string make_term(const string &prefix) const
void need_positions()
Query get_query_with_synonyms() const
void as_positional_unbroken(Terms *terms) const
Handle text without explicit word breaks in a positional context.
Query get_query() const
Query * as_fuzzy_query(State *state) const
Term(const string &name_, const FieldInfo *field_info_)
Term(const Xapian::Query &q, const string &grouping)
Query * as_partial_query(State *state_) const
Build a query for a term at the very end of the query string when FLAG_PARTIAL is in use.
Query * as_wildcarded_query(State *state) const
Term(termpos pos_)
Query get_query_with_auto_synonyms() const
Query as_range_query() const
Range query.
string unstemmed
Query * as_unbroken_query() const
Build a query for a string of words without explicit word breaks.
Term(State *state_, const string &name_, const FieldInfo *field_info_, const string &unstemmed_, QueryParser::stem_strategy stem_=QueryParser::STEM_NONE, termpos pos_=0, unsigned edit_distance_=NO_EDIT_DISTANCE)
unsigned edit_distance
termpos get_termpos() const
Some terms which form a positional sub-query.
Query * as_adj_query() const
Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const
Convert to a query using the given operator and window size.
size_t window
Window size.
static Terms * create(State *state)
Factory function - ensures heap allocation.
bool uniform_prefixes
Keep track of whether the terms added all have the same list of prefixes.
const vector< string > * prefixes
The list of prefixes of the terms added.
Query * as_near_query() const
Convert to a Xapian::Query * using OP_NEAR.
Terms(bool no_pos)
void adjust_window(size_t alternative_window)
Query * as_phrase_query() const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
Query opwindow_subq(Query::op op, const vector< Query > &v, Xapian::termcount w) const
vector< Term * > terms
void add_positional_term(Term *term)
Add an unstemmed Term object to this Terms object.
Query * as_synonym_phrase_query(State *state) const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
An indexed database of documents.
Definition: database.h:75
Xapian::TermIterator synonym_keys_begin(std::string_view prefix={}) const
An iterator which returns all terms which have synonyms.
Definition: database.cc:484
Xapian::TermIterator synonyms_end(std::string_view) const noexcept
End iterator corresponding to synonyms_begin(term).
Definition: database.h:514
Xapian::TermIterator synonym_keys_end(std::string_view={}) const noexcept
End iterator corresponding to synonym_keys_begin(prefix).
Definition: database.h:526
Xapian::TermIterator synonyms_begin(std::string_view term) const
An iterator which returns all the synonyms for a given term.
Definition: database.cc:478
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:707
Base class for field processors.
Definition: queryparser.h:468
Xapian::valueno get_slot() const
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
Xapian::Internal::opt_intrusive_ptr< const Stopper > stopper
std::list< std::string > stoplist
std::multimap< std::string, std::string, std::less<> > unstem
std::list< RangeProc > rangeprocs
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:516
stop_strategy
Stopper strategies, for use with set_stopper_strategy().
Definition: queryparser.h:796
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:788
Class representing a query.
Definition: query.h:45
op get_type() const noexcept
Get the type of the top level of the query.
Definition: query.cc:275
op
Query operators.
Definition: query.h:78
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
Definition: query.h:158
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:290
@ OP_NEAR
Match only documents where all subqueries match near each other.
Definition: query.h:140
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:152
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
Definition: query.h:231
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
@ LEAF_TERM
Value returned by get_type() for a term.
Definition: query.h:280
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:223
@ OP_INVALID
Construct an invalid query.
Definition: query.h:277
bool empty() const noexcept
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:661
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:48
Xapian::Internal::intrusive_ptr< StemImplementation > internal
Definition: stem.h:77
bool is_none() const
Return true if this is a no-op stemmer.
Definition: stem.h:193
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
Class for iterating over a list of terms.
Definition: termiterator.h:41
void skip_to(std::string_view term)
Advance the iterator to term term.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:39
const char * raw() const
Return the raw const char* pointer for the current position.
Definition: unicode.h:55
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
string term
PositionList * p
Xapian::termpos pos
Debug logging macros.
#define LOGLINE(a, b)
Definition: debuglog.h:485
Hierarchy of classes which Xapian can throw as exceptions.
#define true
Definition: header.h:8
#define false
Definition: header.h:9
string str(int value)
Convert int to std::string.
Definition: str.cc:91
category get_category(int info)
Definition: unicode.h:283
void append_utf8(std::string &s, unsigned ch)
Append the UTF-8 representation of a single Unicode character to a std::string.
Definition: unicode.h:344
unsigned tolower(unsigned ch)
Convert a Unicode character to lowercase.
Definition: unicode.h:388
@ LOWERCASE_LETTER
Letter, lowercase (Ll)
Definition: unicode.h:231
@ MODIFIER_LETTER
Letter, modifier (Lm)
Definition: unicode.h:233
@ OTHER_LETTER
Letter, other (Lo)
Definition: unicode.h:234
@ DECIMAL_DIGIT_NUMBER
Number, decimal digit (Nd)
Definition: unicode.h:238
@ TITLECASE_LETTER
Letter, titlecase (Lt)
Definition: unicode.h:232
@ UPPERCASE_LETTER
Letter, uppercase (Lu)
Definition: unicode.h:230
bool is_wordchar(unsigned ch)
Test if a given Unicode character is "word character".
Definition: unicode.h:355
bool is_currency(unsigned ch)
Test if a given Unicode character is a currency symbol.
Definition: unicode.h:383
bool is_whitespace(unsigned ch)
Test if a given Unicode character is a whitespace character.
Definition: unicode.h:373
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:71
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
Various assertion macros.
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
Xapian::Query internals.
static void yy_pop_parser_stack(yyParser *pParser)
bool is_not_whitespace(unsigned ch)
const unsigned UNICODE_IGNORE
Value representing "ignore this" when returned by check_infix() or check_infix_digit().
YYCODETYPE lhs
#define YYCODETYPE
#define VET_BOOL_ARGS(A, B, OP_TXT)
static const YYCODETYPE yy_lookahead[]
static unsigned int yy_find_shift_action(yyParser *pParser, YYCODETYPE iLookAhead)
#define YY_SHIFT_MIN
bool is_extended_wildcard(unsigned ch, unsigned flags)
#define ParseARG_FETCH
bool should_stem(const string &term, const State &state)
static const YYACTIONTYPE yy_action[]
#define ParseARG_PDECL
static void yy_syntax_error(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor)
static const YYACTIONTYPE yy_default[]
static void add_to_query(Query *&q, Query::op op, Query *term)
bool U_isupper(unsigned ch)
struct yyParser yyParser
#define YYACTIONTYPE
#define YYNOCODE
#define YY_MIN_SHIFTREDUCE
#define YY_ERROR_ACTION
static const unsigned short int yy_shift_ofst[]
static constexpr unsigned NO_EDIT_DISTANCE
#define YYNSTATE
#define YY_ACTTAB_COUNT
#define YY_MIN_REDUCE
bool is_not_wordchar(unsigned ch)
bool is_digit(unsigned ch)
static void yy_shift(yyParser *yypParser, int yyNewState, int yyMajor, ParseTOKENTYPE yyMinor)
unsigned check_infix_digit(unsigned ch)
bool is_suffix(unsigned ch)
#define YY_MAX_SHIFT
static const short yy_reduce_ofst[]
static const struct @17 yyRuleInfo[]
static void yy_accept(yyParser *)
static int yy_find_reduce_action(int stateno, YYCODETYPE iLookAhead)
#define ParseARG_SDECL
#define YY_SHIFT_COUNT
bool U_isdigit(unsigned ch)
#define YY_REDUCE_COUNT
#define YY_ACCEPT_ACTION
bool is_stem_preventer(unsigned ch)
#define yytestcase(X)
bool is_positional(Xapian::Query::op op)
#define YYNTOKEN
static constexpr unsigned DEFAULT_EDIT_DISTANCE
static void ParseFinalize(yyParser *pParser)
bool is_phrase_generator(unsigned ch)
unsigned check_infix(unsigned ch)
#define ParseARG_STORE
#define YY_MAX_SHIFTREDUCE
#define yyTraceShift(X, Y, Z)
#define ParseTOKENTYPE
bool U_isalpha(unsigned ch)
#define YYSTACKDEPTH
bool prefix_needs_colon(const string &prefix, unsigned ch)
signed char nrhs
#define YY_SHIFT_MAX
static void yy_parse_failed(yyParser *)
bool is_double_quote(unsigned ch)
static void ParseInit(yyParser *pParser)
static void Parse(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor ParseARG_PDECL)
static void yy_reduce(yyParser *yypParser, unsigned int yyruleno, int yyLookahead, ParseTOKENTYPE yyLookaheadToken)
static void yy_destructor(yyParser *yypParser, YYCODETYPE yymajor, YYMINORTYPE *yypminor)
The non-lemon-generated parts of the QueryParser class.
@ NON_BOOLEAN
@ BOOLEAN_EXCLUSIVE
#define HATE_AFTER_AND
#define OR
#define SYNONYM
#define XOR
#define BRA
#define EDIT_TERM
#define EMPTY_GROUP_OK
#define HATE
#define QUOTE
#define NEAR
#define LOVE
#define TERM
#define UNBROKEN_WORDS
#define PARTIAL_TERM
#define RANGE
#define SYN
#define KET
#define AND
#define NOT
#define ADJ
#define WILD_TERM
#define GROUP_TERM
#define BOOLEAN_FILTER
#define PHR_TERM
static Xapian::Stem stemmer
Definition: stemtest.cc:42
Convert types to std::string.
Various handy string-related helpers.
char C_toupper(char ch)
Definition: stringutils.h:231
bool startswith(std::string_view s, char pfx)
Definition: stringutils.h:56
Information about how to handle a field prefix in the query string.
std::string grouping
std::vector< std::string > prefixes
Field prefix strings.
filter_type type
The type of this field.
void append_filter(const string &grouping, const Query &qnew)
Query merge_filters() const
void add_filter_range(const string &grouping, const Query &range)
void append_filter_range(const string &grouping, const Query &range)
ProbQuery(Query *query_)
map< string, Query > filter
void add_filter(const string &grouping, const Query &q)
Definition: header.h:215
ParseARG_SDECL vector< yyStackEntry > yystack
yyStackEntry(YYACTIONTYPE stateno_, YYCODETYPE major_, ParseTOKENTYPE minor_)
Unicode and UTF-8 related classes and functions.
ParseTOKENTYPE yy0
bool is_unbroken_script(unsigned p)
Definition: word-breaker.cc:51
size_t get_unbroken(Xapian::Utf8Iterator &it)
bool is_ngram_enabled()
Should we use the n-gram code?
Definition: word-breaker.cc:43
Handle text without explicit word breaks.