xapian-core  1.4.25
queryparser_internal.cc
Go to the documentation of this file.
1 /*
2 ** 2000-05-29
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** Driver template for the LEMON parser generator.
13 **
14 ** Synced with upstream:
15 ** https://www.sqlite.org/src/artifact/468a155e8729cfbccfe1d85bf60d064f1dab76167a51149ec5c7928a2de63953
16 **
17 ** The "lemon" program processes an LALR(1) input grammar file, then uses
18 ** this template to construct a parser. The "lemon" program inserts text
19 ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
20 ** interstitial "-" characters) contained in this template is changed into
21 ** the value of the %name directive from the grammar. Otherwise, the content
22 ** of this template is copied straight through into the generate parser
23 ** source file.
24 **
25 ** The following is the concatenation of all %include directives from the
26 ** input grammar file:
27 */
28 /************ Begin %include sections from the grammar ************************/
29 #line 1 "queryparser/queryparser.lemony"
30 
34 /* Copyright (C) 2004-2023 Olly Betts
35  * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
36  * Copyright (C) 2010 Adam Sjøgren
37  *
38  * This program is free software; you can redistribute it and/or
39  * modify it under the terms of the GNU General Public License as
40  * published by the Free Software Foundation; either version 2 of the
41  * License, or (at your option) any later version.
42  *
43  * This program is distributed in the hope that it will be useful,
44  * but WITHOUT ANY WARRANTY; without even the implied warranty of
45  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46  * GNU General Public License for more details.
47  *
48  * You should have received a copy of the GNU General Public License
49  * along with this program; if not, write to the Free Software
50  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
51  * USA
52  */
53 
54 #include <config.h>
55 
56 #include "queryparser_internal.h"
57 
58 #include "api/queryinternal.h"
59 #include "omassert.h"
60 #include "str.h"
61 #include "stringutils.h"
62 #include "xapian/error.h"
63 #include "xapian/unicode.h"
64 
65 // Include the list of token values lemon generates.
66 #include "queryparser_token.h"
67 
68 #include "word-breaker.h"
69 
70 #include <algorithm>
71 #include <cstring>
72 #include <limits>
73 #include <list>
74 #include <string>
75 #include <vector>
76 
77 // We create the yyParser on the stack.
78 #define Parse_ENGINEALWAYSONSTACK
79 
80 using namespace std;
81 
82 using namespace Xapian;
83 
84 inline bool
85 U_isupper(unsigned ch) {
86  return ch < 128 && C_isupper(static_cast<unsigned char>(ch));
87 }
88 
89 inline bool
90 U_isdigit(unsigned ch) {
91  return ch < 128 && C_isdigit(static_cast<unsigned char>(ch));
92 }
93 
94 inline bool
95 U_isalpha(unsigned ch) {
96  return ch < 128 && C_isalpha(static_cast<unsigned char>(ch));
97 }
98 
100 
101 inline bool
102 is_not_whitespace(unsigned ch) {
103  return !is_whitespace(ch);
104 }
105 
107 
108 inline bool
109 is_not_wordchar(unsigned ch) {
110  return !is_wordchar(ch);
111 }
112 
113 inline bool
114 is_digit(unsigned ch) {
116 }
117 
118 // FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
119 // and there's the risk of hyphens getting stuck onto the end of terms...
120 inline bool
121 is_suffix(unsigned ch) {
122  return ch == '+' || ch == '#';
123 }
124 
125 inline bool
126 is_double_quote(unsigned ch) {
127  // We simply treat all double quotes as equivalent, which is a bit crude,
128  // but it isn't clear that it would actually better to require them to
129  // match up exactly.
130  //
131  // 0x201c is Unicode opening double quote.
132  // 0x201d is Unicode closing double quote.
133  return ch == '"' || ch == 0x201c || ch == 0x201d;
134 }
135 
136 inline bool
137 prefix_needs_colon(const string & prefix, unsigned ch)
138 {
139  if (!U_isupper(ch) && ch != ':') return false;
140  string::size_type len = prefix.length();
141  return (len > 1 && prefix[len - 1] != ':');
142 }
143 
145 
146 inline bool
148 {
149  return (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR);
150 }
151 
152 class Terms;
153 
160 class Term {
162 
163  public:
164  string name;
166  string unstemmed;
170 
171  Term(const string &name_, termpos pos_)
172  : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
173  explicit Term(const string &name_)
174  : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
175  Term(const string &name_, const FieldInfo * field_info_)
176  : name(name_), field_info(field_info_),
177  stem(QueryParser::STEM_NONE), pos(0) { }
178  explicit Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
179  Term(State * state_, const string &name_, const FieldInfo * field_info_,
180  const string &unstemmed_,
181  QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
182  termpos pos_ = 0)
183  : state(state_), name(name_), field_info(field_info_),
184  unstemmed(unstemmed_), stem(stem_), pos(pos_) { }
185  // For RANGE tokens.
186  Term(const Xapian::Query & q, const string & grouping)
187  : name(grouping), query(q) { }
188 
189  string make_term(const string & prefix) const;
190 
191  void need_positions() {
192  if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
193  }
194 
195  termpos get_termpos() const { return pos; }
196 
197  string get_grouping() const {
198  return field_info->grouping;
199  }
200 
201  Query * as_wildcarded_query(State * state) const;
202 
211  Query * as_partial_query(State * state_) const;
212 
214  Query* as_unbroken_query() const;
215 
217  void as_positional_unbroken(Terms* terms) const;
218 
220  Query as_range_query() const;
221 
222  Query get_query() const;
223 
224  Query get_query_with_synonyms() const;
225 
226  Query get_query_with_auto_synonyms() const;
227 };
228 
230 class State {
232 
233  public:
235  const char* error = NULL;
236  unsigned flags;
238 
239  State(QueryParser::Internal * qpi_, unsigned flags_)
240  : qpi(qpi_), flags(flags_), effective_default_op(qpi_->default_op)
241  {
242  if ((flags & QueryParser::FLAG_NO_POSITIONS)) {
243  if (is_positional(effective_default_op)) {
244  effective_default_op = Query::OP_AND;
245  }
246  }
247  }
248 
249  string stem_term(const string &term) {
250  return qpi->stemmer(term);
251  }
252 
253  void add_to_stoplist(const Term * term) {
254  qpi->stoplist.push_back(term->name);
255  }
256 
257  void add_to_unstem(const string & term, const string & unstemmed) {
258  qpi->unstem.insert(make_pair(term, unstemmed));
259  }
260 
261  Term * range(const string &a, const string &b) {
262  for (auto i : qpi->rangeprocs) {
263  Xapian::Query range_query = (i.proc)->check_range(a, b);
264  Xapian::Query::op op = range_query.get_type();
265  switch (op) {
267  break;
271  if (i.default_grouping) {
273  static_cast<Xapian::Internal::QueryValueBase*>(
274  range_query.internal.get());
275  Xapian::valueno slot = base->get_slot();
276  return new Term(range_query, str(slot));
277  }
278  // FALLTHRU
280  return new Term(range_query, i.grouping);
281  default:
282  return new Term(range_query, string());
283  }
284  }
285  return NULL;
286  }
287 
289  return effective_default_op;
290  }
291 
292  bool is_stopword(const Term *term) const {
293  return qpi->stopper.get() && (*qpi->stopper)(term->name);
294  }
295 
297  return qpi->db;
298  }
299 
300  const Stopper * get_stopper() const {
301  return qpi->stopper.get();
302  }
303 
304  size_t stoplist_size() const {
305  return qpi->stoplist.size();
306  }
307 
308  void stoplist_resize(size_t s) {
309  qpi->stoplist.resize(s);
310  }
311 
313  return qpi->max_wildcard_expansion;
314  }
315 
316  int get_max_wildcard_type() const {
317  return qpi->max_wildcard_type;
318  }
319 
321  return qpi->max_partial_expansion;
322  }
323 
324  int get_max_partial_type() const {
325  return qpi->max_partial_type;
326  }
327 };
328 
329 string
330 Term::make_term(const string & prefix) const
331 {
332  string term;
333  if (stem != QueryParser::STEM_NONE && stem != QueryParser::STEM_ALL)
334  term += 'Z';
335  if (!prefix.empty()) {
336  term += prefix;
337  if (prefix_needs_colon(prefix, name[0])) term += ':';
338  }
339  if (stem != QueryParser::STEM_NONE) {
340  term += state->stem_term(name);
341  } else {
342  term += name;
343  }
344 
345  if (!unstemmed.empty())
346  state->add_to_unstem(term, unstemmed);
347  return term;
348 }
349 
350 // Iterator shim to allow building a synonym query from a TermIterator pair.
353 
355 
357 
358  public:
360  Xapian::termpos pos_ = 0,
361  const Xapian::Query * first_ = NULL)
362  : i(i_), pos(pos_), first(first_) { }
363 
365  if (first)
366  first = NULL;
367  else
368  ++i;
369  return *this;
370  }
371 
372  const Xapian::Query operator*() const {
373  if (first) return *first;
374  return Xapian::Query(*i, 1, pos);
375  }
376 
377  bool operator==(const SynonymIterator & o) const {
378  return i == o.i && first == o.first;
379  }
380 
381  bool operator!=(const SynonymIterator & o) const {
382  return !(*this == o);
383  }
384 
385  typedef std::input_iterator_tag iterator_category;
390 };
391 
392 Query
394 {
395  // Handle single-word synonyms with each prefix.
396  const auto& prefixes = field_info->prefixes;
397  if (prefixes.empty()) {
398  Assert(field_info->proc.get());
399  return (*field_info->proc)(name);
400  }
401 
402  Query q = get_query();
403 
404  for (auto&& prefix : prefixes) {
405  // First try the unstemmed term:
406  string term;
407  if (!prefix.empty()) {
408  term += prefix;
409  if (prefix_needs_colon(prefix, name[0])) term += ':';
410  }
411  term += name;
412 
413  Xapian::Database db = state->get_database();
414  Xapian::TermIterator syn = db.synonyms_begin(term);
415  Xapian::TermIterator end = db.synonyms_end(term);
416  if (syn == end && stem != QueryParser::STEM_NONE) {
417  // If that has no synonyms, try the stemmed form:
418  term = 'Z';
419  if (!prefix.empty()) {
420  term += prefix;
421  if (prefix_needs_colon(prefix, name[0])) term += ':';
422  }
423  term += state->stem_term(name);
424  syn = db.synonyms_begin(term);
425  end = db.synonyms_end(term);
426  }
427  q = Query(q.OP_SYNONYM,
428  SynonymIterator(syn, pos, &q),
429  SynonymIterator(end));
430  }
431  return q;
432 }
433 
434 Query
436 {
437  const unsigned MASK_ENABLE_AUTO_SYNONYMS =
438  QueryParser::FLAG_AUTO_SYNONYMS |
439  QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS;
440  if (state->flags & MASK_ENABLE_AUTO_SYNONYMS)
441  return get_query_with_synonyms();
442 
443  return get_query();
444 }
445 
446 static void
448 {
449  Assert(term);
450  if (q) {
451  if (op == Query::OP_OR) {
452  *q |= *term;
453  } else if (op == Query::OP_AND) {
454  *q &= *term;
455  } else {
456  *q = Query(op, *q, *term);
457  }
458  delete term;
459  } else {
460  q = term;
461  }
462 }
463 
464 static void
465 add_to_query(Query *& q, Query::op op, const Query & term)
466 {
467  if (q) {
468  if (op == Query::OP_OR) {
469  *q |= term;
470  } else if (op == Query::OP_AND) {
471  *q &= term;
472  } else {
473  *q = Query(op, *q, term);
474  }
475  } else {
476  q = new Query(term);
477  }
478 }
479 
480 Query
482 {
483  const auto& prefixes = field_info->prefixes;
484  if (prefixes.empty()) {
485  Assert(field_info->proc.get());
486  return (*field_info->proc)(name);
487  }
488  auto piter = prefixes.begin();
489  Query q(make_term(*piter), 1, pos);
490  while (++piter != prefixes.end()) {
491  q |= Query(make_term(*piter), 1, pos);
492  }
493  return q;
494 }
495 
496 Query *
498 {
499  const auto& prefixes = field_info->prefixes;
501  int max_type = state_->get_max_wildcard_type();
502  vector<Query> subqs;
503  subqs.reserve(prefixes.size());
504  for (string root : prefixes) {
505  root += name;
506  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
507  subqs.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
508  Query::OP_OR));
509  }
510  Query * q = new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
511  delete this;
512  return q;
513 }
514 
515 Query *
517 {
519  int max_type = state_->get_max_partial_type();
520  vector<Query> subqs_partial; // A synonym of all the partial terms.
521  vector<Query> subqs_full; // A synonym of all the full terms.
522 
523  for (const string& prefix : field_info->prefixes) {
524  string root = prefix;
525  root += name;
526  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
527  subqs_partial.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
528  Query::OP_OR));
529  // Add the term, as it would normally be handled, as an alternative.
530  subqs_full.push_back(Query(make_term(prefix), 1, pos));
531  }
532  Query * q = new Query(Query::OP_OR,
533  Query(Query::OP_SYNONYM,
534  subqs_partial.begin(), subqs_partial.end()),
535  Query(Query::OP_SYNONYM,
536  subqs_full.begin(), subqs_full.end()));
537  delete this;
538  return q;
539 }
540 
541 Query *
543 {
544  vector<Query> prefix_subqs;
545  vector<Query> ngram_subqs;
546  const auto& prefixes = field_info->prefixes;
547  for (const string& prefix : prefixes) {
548  for (NgramIterator tk(name); tk != NgramIterator(); ++tk) {
549  ngram_subqs.push_back(Query(prefix + *tk, 1, pos));
550  }
551  prefix_subqs.push_back(Query(Query::OP_AND,
552  ngram_subqs.begin(), ngram_subqs.end()));
553  ngram_subqs.clear();
554  }
555  Query * q = new Query(Query::OP_OR,
556  prefix_subqs.begin(), prefix_subqs.end());
557  delete this;
558  return q;
559 }
560 
561 Query
563 {
564  Query q = query;
565  delete this;
566  return q;
567 }
568 
569 inline bool
571 {
572  // These characters generate a phrase search.
573  // Ordered mostly by frequency of calls to this function done when
574  // running the testcases in api_queryparser.cc.
575  return (ch && ch < 128 && strchr(".-/:\\@", ch) != NULL);
576 }
577 
578 inline bool
579 is_stem_preventer(unsigned ch)
580 {
581  return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
582 }
583 
584 inline bool
585 should_stem(const string & term)
586 {
587  const unsigned int SHOULD_STEM_MASK =
590  (1 << Unicode::MODIFIER_LETTER) |
591  (1 << Unicode::OTHER_LETTER);
592  Utf8Iterator u(term);
593  return ((SHOULD_STEM_MASK >> Unicode::get_category(*u)) & 1);
594 }
595 
599 const unsigned UNICODE_IGNORE = numeric_limits<unsigned>::max();
600 
601 inline unsigned check_infix(unsigned ch) {
602  if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
603  // Unicode includes all these except '&' in its word boundary rules,
604  // as well as 0x2019 (which we handle below) and ':' (for Swedish
605  // apparently, but we ignore this for now as it's problematic in
606  // real world cases).
607  return ch;
608  }
609  if (ch >= 0x200b) {
610  // 0x2019 is Unicode apostrophe and single closing quote.
611  // 0x201b is Unicode single opening quote with the tail rising.
612  if (ch == 0x2019 || ch == 0x201b)
613  return '\'';
614  if (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff)
615  return UNICODE_IGNORE;
616  }
617  return 0;
618 }
619 
620 inline unsigned check_infix_digit(unsigned ch) {
621  // This list of characters comes from Unicode's word identifying algorithm.
622  switch (ch) {
623  case ',':
624  case '.':
625  case ';':
626  case 0x037e: // GREEK QUESTION MARK
627  case 0x0589: // ARMENIAN FULL STOP
628  case 0x060D: // ARABIC DATE SEPARATOR
629  case 0x07F8: // NKO COMMA
630  case 0x2044: // FRACTION SLASH
631  case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
632  case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
633  case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
634  return ch;
635  }
636  if (ch >= 0x200b && (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff))
637  return UNICODE_IGNORE;
638  return 0;
639 }
640 
641 // Prototype a function lemon generates, but which we want to call before that
642 // in the generated source code file.
643 struct yyParser;
644 static void yy_parse_failed(yyParser *);
645 
646 void
647 QueryParser::Internal::add_prefix(const string &field, const string &prefix)
648 {
649  map<string, FieldInfo>::iterator p = field_map.find(field);
650  if (p == field_map.end()) {
651  field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, prefix)));
652  } else {
653  // Check that this is the same type of filter as the existing one(s).
654  if (p->second.type != NON_BOOLEAN) {
655  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter");
656  }
657  if (p->second.proc.get())
658  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
659  p->second.prefixes.push_back(prefix);
660  }
661 }
662 
663 void
664 QueryParser::Internal::add_prefix(const string &field, FieldProcessor *proc)
665 {
666  map<string, FieldInfo>::iterator p = field_map.find(field);
667  if (p == field_map.end()) {
668  field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, proc)));
669  } else {
670  // Check that this is the same type of filter as the existing one(s).
671  if (p->second.type != NON_BOOLEAN) {
672  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter");
673  }
674  if (!p->second.prefixes.empty())
675  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
676  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported");
677  }
678 }
679 
680 void
681 QueryParser::Internal::add_boolean_prefix(const string &field,
682  const string &prefix,
683  const string* grouping)
684 {
685  // Don't allow the empty prefix to be set as boolean as it doesn't
686  // really make sense.
687  if (field.empty())
688  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
689  if (!grouping) grouping = &field;
690  filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE;
691  map<string, FieldInfo>::iterator p = field_map.find(field);
692  if (p == field_map.end()) {
693  field_map.insert(make_pair(field, FieldInfo(type, prefix, *grouping)));
694  } else {
695  // Check that this is the same type of filter as the existing one(s).
696  if (p->second.type != type) {
697  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME
698  }
699  if (p->second.proc.get())
700  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
701  p->second.prefixes.push_back(prefix); // FIXME grouping
702  }
703 }
704 
705 void
706 QueryParser::Internal::add_boolean_prefix(const string &field,
707  FieldProcessor *proc,
708  const string* grouping)
709 {
710  // Don't allow the empty prefix to be set as boolean as it doesn't
711  // really make sense.
712  if (field.empty())
713  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
714  if (!grouping) grouping = &field;
715  filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE;
716  map<string, FieldInfo>::iterator p = field_map.find(field);
717  if (p == field_map.end()) {
718  field_map.insert(make_pair(field, FieldInfo(type, proc, *grouping)));
719  } else {
720  // Check that this is the same type of filter as the existing one(s).
721  if (p->second.type != type) {
722  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME
723  }
724  if (!p->second.prefixes.empty())
725  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
726  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported");
727  }
728 }
729 
730 string
731 QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end,
732  bool try_word_break, bool& needs_word_break,
733  bool &was_acronym)
734 {
735  string term;
736  // Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
737  // Don't worry if there's a trailing '.' or not.
738  if (U_isupper(*it)) {
739  string t;
740  Utf8Iterator p = it;
741  do {
742  Unicode::append_utf8(t, *p++);
743  } while (p != end && *p == '.' && ++p != end && U_isupper(*p));
744  // One letter does not make an acronym! If we handled a single
745  // uppercase letter here, we wouldn't catch M&S below.
746  if (t.length() > 1) {
747  // Check there's not a (lower case) letter or digit
748  // immediately after it.
749  // FIXME: should I.B.M..P.T.O be a range search?
750  if (p == end || !is_wordchar(*p)) {
751  it = p;
752  swap(term, t);
753  }
754  }
755  }
756  was_acronym = !term.empty();
757 
758  if (try_word_break && term.empty() && is_unbroken_script(*it)) {
759  const char* start = it.raw();
760  get_unbroken(it);
761  term.assign(start, it.raw() - start);
762  needs_word_break = true;
763  }
764 
765  if (term.empty()) {
766  unsigned prevch = *it;
767  Unicode::append_utf8(term, prevch);
768  while (++it != end) {
769  if (try_word_break && is_unbroken_script(*it)) break;
770  unsigned ch = *it;
771  if (!is_wordchar(ch)) {
772  // Treat a single embedded '&' or "'" or similar as a word
773  // character (e.g. AT&T, Fred's). Also, normalise
774  // apostrophes to ASCII apostrophe.
775  Utf8Iterator p = it;
776  ++p;
777  if (p == end || !is_wordchar(*p)) break;
778  unsigned nextch = *p;
779  if (is_digit(prevch) && is_digit(nextch)) {
780  ch = check_infix_digit(ch);
781  } else {
782  ch = check_infix(ch);
783  }
784  if (!ch) break;
785  if (ch == UNICODE_IGNORE)
786  continue;
787  }
788  Unicode::append_utf8(term, ch);
789  prevch = ch;
790  }
791  if (it != end && is_suffix(*it)) {
792  string suff_term = term;
793  Utf8Iterator p = it;
794  // Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
795  do {
796  if (suff_term.size() - term.size() == 3) {
797  suff_term.resize(0);
798  break;
799  }
800  suff_term += *p;
801  } while (is_suffix(*++p));
802  if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
803  // If the suffixed term doesn't exist, check that the
804  // non-suffixed term does. This also takes care of
805  // the case when QueryParser::set_database() hasn't
806  // been called.
807  bool use_suff_term = false;
808  string lc = Unicode::tolower(suff_term);
809  if (db.term_exists(lc)) {
810  use_suff_term = true;
811  } else {
812  lc = Unicode::tolower(term);
813  if (!db.term_exists(lc)) use_suff_term = true;
814  }
815  if (use_suff_term) {
816  term = suff_term;
817  it = p;
818  }
819  }
820  }
821  }
822  return term;
823 }
824 
825 #line 1416 "queryparser/queryparser.lemony"
826 
827 
828 struct ProbQuery {
829  Query* query = NULL;
830  Query* love = NULL;
831  Query* hate = NULL;
832  // filter is a map from prefix to a query for that prefix. Queries with
833  // the same prefix are combined with OR, and the results of this are
834  // combined with AND to get the full filter.
835  map<string, Query> filter;
836 
838 
839  explicit
840  ProbQuery(Query* query_) : query(query_) {}
841 
843  delete query;
844  delete love;
845  delete hate;
846  }
847 
848  void add_filter(const string& grouping, const Query & q) {
849  filter[grouping] = q;
850  }
851 
852  void append_filter(const string& grouping, const Query & qnew) {
853  auto it = filter.find(grouping);
854  if (it == filter.end()) {
855  filter.insert(make_pair(grouping, qnew));
856  } else {
857  Query & q = it->second;
858  // We OR multiple filters with the same prefix if they're
859  // exclusive, otherwise we AND them.
860  bool exclusive = !grouping.empty();
861  if (exclusive) {
862  q |= qnew;
863  } else {
864  q &= qnew;
865  }
866  }
867  }
868 
869  void add_filter_range(const string& grouping, const Query & range) {
870  filter[grouping] = range;
871  }
872 
873  void append_filter_range(const string& grouping, const Query & range) {
874  Query & q = filter[grouping];
875  q |= range;
876  }
877 
879  auto i = filter.begin();
880  Assert(i != filter.end());
881  Query q = i->second;
882  while (++i != filter.end()) {
883  q &= i->second;
884  }
885  return q;
886  }
887 };
888 
890 class TermGroup {
891  vector<Term *> terms;
892 
898  bool empty_ok;
899 
900  TermGroup(Term* t1, Term* t2) : empty_ok(false) {
901  add_term(t1);
902  add_term(t2);
903  }
904 
905  public:
907  static TermGroup* create(Term* t1, Term* t2) {
908  return new TermGroup(t1, t2);
909  }
910 
912  for (auto&& t : terms) {
913  delete t;
914  }
915  }
916 
918  void add_term(Term * term) {
919  terms.push_back(term);
920  }
921 
923  void set_empty_ok() { empty_ok = true; }
924 
926  Query * as_group(State *state) const;
927 };
928 
929 Query *
931 {
932  const Xapian::Stopper * stopper = state->get_stopper();
933  size_t stoplist_size = state->stoplist_size();
934  bool default_op_is_positional = is_positional(state->default_op());
935 reprocess:
936  Query::op default_op = state->default_op();
937  vector<Query> subqs;
938  subqs.reserve(terms.size());
939  if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
940  // Check for multi-word synonyms.
941  Database db = state->get_database();
942 
943  string key;
944  vector<Term*>::size_type begin = 0;
945  vector<Term*>::size_type i = begin;
946  while (terms.size() - i > 0) {
947  size_t longest_match = 0;
948  // This value is never used, but GCC 4.8 warns with
949  // -Wmaybe-uninitialized (GCC 5.4 doesn't).
950  vector<Term*>::size_type longest_match_end = 0;
951  if (terms.size() - i >= 2) {
952  // Greedily try to match as many consecutive words as possible.
953  key = terms[i]->name;
954  key += ' ';
955  key += terms[i + 1]->name;
956  TermIterator synkey(db.synonym_keys_begin(key));
957  TermIterator synend(db.synonym_keys_end(key));
958  if (synkey != synend) {
959  longest_match = key.size();
960  longest_match_end = i + 2;
961  for (auto j = i + 2; j < terms.size(); ++j) {
962  key += ' ';
963  key += terms[j]->name;
964  synkey.skip_to(key);
965  if (synkey == synend)
966  break;
967  const string& found = *synkey;
968  if (!startswith(found, key))
969  break;
970  if (found.size() == key.size()) {
971  longest_match = key.size();
972  longest_match_end = j + 1;
973  }
974  }
975  }
976  }
977  if (longest_match == 0) {
978  // No multi-synonym matches at position i.
979  if (stopper && (*stopper)(terms[i]->name)) {
980  state->add_to_stoplist(terms[i]);
981  } else {
982  if (default_op_is_positional)
983  terms[i]->need_positions();
984  subqs.push_back(terms[i]->get_query_with_auto_synonyms());
985  }
986  begin = ++i;
987  continue;
988  }
989  i = longest_match_end;
990  key.resize(longest_match);
991 
992  vector<Query> subqs2;
993  for (auto j = begin; j != i; ++j) {
994  if (stopper && (*stopper)(terms[j]->name)) {
995  state->add_to_stoplist(terms[j]);
996  } else {
997  if (default_op_is_positional)
998  terms[i]->need_positions();
999  subqs2.push_back(terms[j]->get_query());
1000  }
1001  }
1002  Query q_original_terms;
1003  if (default_op_is_positional) {
1004  q_original_terms = Query(default_op,
1005  subqs2.begin(), subqs2.end(),
1006  subqs2.size() + 9);
1007  } else {
1008  q_original_terms = Query(default_op,
1009  subqs2.begin(), subqs2.end());
1010  }
1011  subqs2.clear();
1012 
1013  // Use the position of the first term for the synonyms.
1014  TermIterator syn = db.synonyms_begin(key);
1015  Query q(Query::OP_SYNONYM,
1016  SynonymIterator(syn, terms[begin]->pos, &q_original_terms),
1017  SynonymIterator(db.synonyms_end(key)));
1018  subqs.push_back(q);
1019 
1020  begin = i;
1021  }
1022  } else {
1023  vector<Term*>::const_iterator i;
1024  for (i = terms.begin(); i != terms.end(); ++i) {
1025  if (stopper && (*stopper)((*i)->name)) {
1026  state->add_to_stoplist(*i);
1027  } else {
1028  if (default_op_is_positional)
1029  (*i)->need_positions();
1030  subqs.push_back((*i)->get_query_with_auto_synonyms());
1031  }
1032  }
1033  }
1034 
1035  if (!empty_ok && stopper && subqs.empty() &&
1036  stoplist_size < state->stoplist_size()) {
1037  // This group is all stopwords, so roll-back, disable stopper
1038  // temporarily, and reprocess this group.
1039  state->stoplist_resize(stoplist_size);
1040  stopper = NULL;
1041  goto reprocess;
1042  }
1043 
1044  Query * q = NULL;
1045  if (!subqs.empty()) {
1046  if (default_op_is_positional) {
1047  q = new Query(default_op, subqs.begin(), subqs.end(),
1048  subqs.size() + 9);
1049  } else {
1050  q = new Query(default_op, subqs.begin(), subqs.end());
1051  }
1052  }
1053  delete this;
1054  return q;
1055 }
1056 
1058 class Terms {
1059  vector<Term *> terms;
1060 
1066  size_t window;
1067 
1078 
1082  const vector<string>* prefixes;
1083 
1085  const vector<Query>& v,
1086  Xapian::termcount w) const {
1087  if (op == Query::OP_AND) {
1088  return Query(op, v.begin(), v.end());
1089  }
1090  return Query(op, v.begin(), v.end(), w);
1091  }
1092 
1095  if (window == size_t(-1)) op = Query::OP_AND;
1096  Query * q = NULL;
1097  size_t n_terms = terms.size();
1098  Xapian::termcount w = w_delta + terms.size();
1099  if (uniform_prefixes) {
1100  if (prefixes) {
1101  for (auto&& prefix : *prefixes) {
1102  vector<Query> subqs;
1103  subqs.reserve(n_terms);
1104  for (Term* t : terms) {
1105  subqs.push_back(Query(t->make_term(prefix), 1, t->pos));
1106  }
1107  add_to_query(q, Query::OP_OR, opwindow_subq(op, subqs, w));
1108  }
1109  }
1110  } else {
1111  vector<Query> subqs;
1112  subqs.reserve(n_terms);
1113  for (Term* t : terms) {
1114  subqs.push_back(t->get_query());
1115  }
1116  q = new Query(opwindow_subq(op, subqs, w));
1117  }
1118 
1119  delete this;
1120  return q;
1121  }
1122 
1123  explicit Terms(bool no_pos)
1124  : window(no_pos ? size_t(-1) : 0),
1125  uniform_prefixes(true),
1126  prefixes(NULL) { }
1127 
1128  public:
1130  static Terms* create(State* state) {
1131  return new Terms(state->flags & QueryParser::FLAG_NO_POSITIONS);
1132  }
1133 
1135  for (auto&& t : terms) {
1136  delete t;
1137  }
1138  }
1139 
1141  void add_positional_term(Term * term) {
1142  const auto& term_prefixes = term->field_info->prefixes;
1143  if (terms.empty()) {
1144  prefixes = &term_prefixes;
1145  } else if (uniform_prefixes && prefixes != &term_prefixes) {
1146  if (*prefixes != term_prefixes) {
1147  prefixes = NULL;
1148  uniform_prefixes = false;
1149  }
1150  }
1151  term->need_positions();
1152  terms.push_back(term);
1153  }
1154 
1155  void adjust_window(size_t alternative_window) {
1156  if (alternative_window > window) window = alternative_window;
1157  }
1158 
1161  return as_opwindow_query(Query::OP_PHRASE, 0);
1162  }
1163 
1165  Query * as_near_query() const {
1166  // The common meaning of 'a NEAR b' is "a within 10 terms of b", which
1167  // means a window size of 11. For more than 2 terms, we just add one
1168  // to the window size for each extra term.
1169  size_t w = window;
1170  if (w == 0) w = 10;
1171  return as_opwindow_query(Query::OP_NEAR, w - 1);
1172  }
1173 
1175  Query * as_adj_query() const {
1176  // The common meaning of 'a ADJ b' is "a at most 10 terms before b",
1177  // which means a window size of 11. For more than 2 terms, we just add
1178  // one to the window size for each extra term.
1179  size_t w = window;
1180  if (w == 0) w = 10;
1181  return as_opwindow_query(Query::OP_PHRASE, w - 1);
1182  }
1183 };
1184 
1185 void
1187 {
1188  // Add each individual character to the phrase.
1189  string t;
1190  for (Utf8Iterator it(name); it != Utf8Iterator(); ++it) {
1191  Unicode::append_utf8(t, *it);
1192  Term * c = new Term(state, t, field_info, unstemmed, stem, pos);
1193  terms->add_positional_term(c);
1194  t.resize(0);
1195  }
1196 
1197  // FIXME: we want to add the n-grams as filters too for efficiency.
1198 
1199  delete this;
1200 }
1201 
1202 // Helper macro to check for missing arguments to a boolean operator.
1203 #define VET_BOOL_ARGS(A, B, OP_TXT) \
1204  do {\
1205  if (!A || !B) {\
1206  state->error = "Syntax: <expression> " OP_TXT " <expression>";\
1207  yy_parse_failed(yypParser);\
1208  return;\
1209  }\
1210  } while (0)
1211 
1212 #line 1213 "queryparser/queryparser_internal.cc"
1213 /**************** End of %include directives **********************************/
1214 /* These constants specify the various numeric values for terminal symbols
1215 ** in a format understandable to "makeheaders". This section is blank unless
1216 ** "lemon" is run with the "-m" command-line option.
1217 ***************** Begin makeheaders token definitions *************************/
1218 /**************** End makeheaders token definitions ***************************/
1219 
1220 /* The next section is a series of control #defines.
1221 ** various aspects of the generated parser.
1222 ** YYCODETYPE is the data type used to store the integer codes
1223 ** that represent terminal and non-terminal symbols.
1224 ** "unsigned char" is used if there are fewer than
1225 ** 256 symbols. Larger types otherwise.
1226 ** YYNOCODE is a number of type YYCODETYPE that is not used for
1227 ** any terminal or nonterminal symbol.
1228 ** YYFALLBACK If defined, this indicates that one or more tokens
1229 ** (also known as: "terminal symbols") have fall-back
1230 ** values which should be used if the original symbol
1231 ** would not parse. This permits keywords to sometimes
1232 ** be used as identifiers, for example.
1233 ** YYACTIONTYPE is the data type used for "action codes" - numbers
1234 ** that indicate what to do in response to the next
1235 ** token.
1236 ** ParseTOKENTYPE is the data type used for minor type for terminal
1237 ** symbols. Background: A "minor type" is a semantic
1238 ** value associated with a terminal or non-terminal
1239 ** symbols. For example, for an "ID" terminal symbol,
1240 ** the minor type might be the name of the identifier.
1241 ** Each non-terminal can have a different minor type.
1242 ** Terminal symbols all have the same minor type, though.
1243 ** This macros defines the minor type for terminal
1244 ** symbols.
1245 ** YYMINORTYPE is the data type used for all minor types.
1246 ** This is typically a union of many types, one of
1247 ** which is ParseTOKENTYPE. The entry in the union
1248 ** for terminal symbols is called "yy0".
1249 ** YYSTACKDEPTH is the maximum depth of the parser's stack. If
1250 ** zero the stack is dynamically sized using realloc()
1251 ** ParseARG_SDECL A static variable declaration for the %extra_argument
1252 ** ParseARG_PDECL A parameter declaration for the %extra_argument
1253 ** ParseARG_STORE Code to store %extra_argument into yypParser
1254 ** ParseARG_FETCH Code to extract %extra_argument from yypParser
1255 ** YYERRORSYMBOL is the code number of the error symbol. If not
1256 ** defined, then do no error processing.
1257 ** YYNSTATE the combined number of states.
1258 ** YYNRULE the number of rules in the grammar
1259 ** YYNTOKEN Number of terminal symbols
1260 ** YY_MAX_SHIFT Maximum value for shift actions
1261 ** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1262 ** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1263 ** YY_ERROR_ACTION The yy_action[] code for syntax error
1264 ** YY_ACCEPT_ACTION The yy_action[] code for accept
1265 ** YY_NO_ACTION The yy_action[] code for no-op
1266 ** YY_MIN_REDUCE Minimum value for reduce actions
1267 ** YY_MAX_REDUCE Maximum value for reduce actions
1268 */
1269 #ifndef INTERFACE
1270 # define INTERFACE 1
1271 #endif
1272 /************* Begin control #defines *****************************************/
1273 #define YYCODETYPE unsigned char
1274 #define YYNOCODE 40
1275 #define YYACTIONTYPE unsigned char
1276 #define ParseTOKENTYPE Term *
1277 typedef union {
1278  int yyinit;
1284  int yy46;
1285 } YYMINORTYPE;
1286 #ifndef YYSTACKDEPTH
1287 #define YYSTACKDEPTH 100
1288 #endif
1289 #define ParseARG_SDECL State * state;
1290 #define ParseARG_PDECL ,State * state
1291 #define ParseARG_FETCH State * state = yypParser->state
1292 #define ParseARG_STORE yypParser->state = state
1293 #define YYNSTATE 35
1294 #define YYNRULE 56
1295 #define YYNTOKEN 24
1296 #define YY_MAX_SHIFT 34
1297 #define YY_MIN_SHIFTREDUCE 77
1298 #define YY_MAX_SHIFTREDUCE 132
1299 #define YY_ERROR_ACTION 133
1300 #define YY_ACCEPT_ACTION 134
1301 #define YY_NO_ACTION 135
1302 #define YY_MIN_REDUCE 136
1303 #define YY_MAX_REDUCE 191
1304 /************* End control #defines *******************************************/
1305 
1306 /* Define the yytestcase() macro to be a no-op if is not already defined
1307 ** otherwise.
1308 **
1309 ** Applications can choose to define yytestcase() in the %include section
1310 ** to a macro that can assist in verifying code coverage. For production
1311 ** code the yytestcase() macro should be turned off. But it is useful
1312 ** for testing.
1313 */
1314 #ifndef yytestcase
1315 # define yytestcase(X)
1316 #endif
1317 
1318 
1319 /* Next are the tables used to determine what action to take based on the
1320 ** current state and lookahead token. These tables are used to implement
1321 ** functions that take a state number and lookahead value and return an
1322 ** action integer.
1323 **
1324 ** Suppose the action integer is N. Then the action is determined as
1325 ** follows
1326 **
1327 ** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
1328 ** token onto the stack and goto state N.
1329 **
1330 ** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
1331 ** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
1332 **
1333 ** N == YY_ERROR_ACTION A syntax error has occurred.
1334 **
1335 ** N == YY_ACCEPT_ACTION The parser accepts its input.
1336 **
1337 ** N == YY_NO_ACTION No such action. Denotes unused
1338 ** slots in the yy_action[] table.
1339 **
1340 ** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
1341 ** and YY_MAX_REDUCE
1342 **
1343 ** The action table is constructed as a single large table named yy_action[].
1344 ** Given state S and lookahead X, the action is computed as either:
1345 **
1346 ** (A) N = yy_action[ yy_shift_ofst[S] + X ]
1347 ** (B) N = yy_default[S]
1348 **
1349 ** The (A) formula is preferred. The B formula is used instead if
1350 ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
1351 **
1352 ** The formulas above are for computing the action when the lookahead is
1353 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after
1354 ** a reduce action) then the yy_reduce_ofst[] array is used in place of
1355 ** the yy_shift_ofst[] array.
1356 **
1357 ** The following are the tables generated in this section:
1358 **
1359 ** yy_action[] A single table containing all actions.
1360 ** yy_lookahead[] A table containing the lookahead for each entry in
1361 ** yy_action. Used to detect hash collisions.
1362 ** yy_shift_ofst[] For each state, the offset into yy_action for
1363 ** shifting terminals.
1364 ** yy_reduce_ofst[] For each state, the offset into yy_action for
1365 ** shifting non-terminals after a reduce.
1366 ** yy_default[] Default action for each state.
1367 **
1368 *********** Begin parsing tables **********************************************/
1369 #define YY_ACTTAB_COUNT (326)
1370 static const YYACTIONTYPE yy_action[] = {
1371  /* 0 */ 134, 34, 34, 20, 8, 34, 18, 13, 16, 27,
1372  /* 10 */ 31, 23, 30, 28, 3, 21, 112, 10, 9, 2,
1373  /* 20 */ 25, 15, 111, 114, 104, 105, 97, 87, 14, 4,
1374  /* 30 */ 137, 113, 126, 115, 12, 11, 1, 7, 10, 9,
1375  /* 40 */ 124, 25, 15, 98, 88, 104, 105, 97, 87, 14,
1376  /* 50 */ 4, 29, 113, 138, 138, 138, 8, 138, 18, 13,
1377  /* 60 */ 16, 119, 31, 23, 30, 28, 141, 141, 141, 8,
1378  /* 70 */ 141, 18, 13, 16, 125, 31, 23, 30, 28, 140,
1379  /* 80 */ 140, 140, 8, 140, 18, 13, 16, 123, 31, 23,
1380  /* 90 */ 30, 28, 26, 26, 20, 8, 26, 18, 13, 16,
1381  /* 100 */ 136, 31, 23, 30, 28, 24, 24, 24, 8, 24,
1382  /* 110 */ 18, 13, 16, 135, 31, 23, 30, 28, 22, 22,
1383  /* 120 */ 22, 8, 22, 18, 13, 16, 135, 31, 23, 30,
1384  /* 130 */ 28, 139, 139, 139, 8, 139, 18, 13, 16, 121,
1385  /* 140 */ 31, 23, 30, 28, 10, 9, 135, 25, 15, 122,
1386  /* 150 */ 135, 104, 105, 97, 87, 14, 4, 135, 113, 135,
1387  /* 160 */ 189, 189, 135, 25, 19, 135, 135, 104, 105, 189,
1388  /* 170 */ 189, 14, 4, 162, 113, 162, 162, 162, 162, 33,
1389  /* 180 */ 32, 33, 32, 116, 135, 135, 120, 118, 120, 118,
1390  /* 190 */ 106, 25, 17, 117, 162, 104, 105, 95, 135, 14,
1391  /* 200 */ 4, 135, 113, 25, 17, 135, 135, 104, 105, 99,
1392  /* 210 */ 135, 14, 4, 135, 113, 25, 17, 135, 135, 104,
1393  /* 220 */ 105, 96, 135, 14, 4, 135, 113, 25, 17, 135,
1394  /* 230 */ 135, 104, 105, 100, 135, 14, 4, 135, 113, 25,
1395  /* 240 */ 19, 135, 135, 104, 105, 135, 135, 14, 4, 135,
1396  /* 250 */ 113, 135, 149, 149, 135, 31, 23, 30, 28, 152,
1397  /* 260 */ 135, 135, 152, 135, 31, 23, 30, 28, 135, 150,
1398  /* 270 */ 135, 135, 150, 135, 31, 23, 30, 28, 153, 135,
1399  /* 280 */ 135, 153, 135, 31, 23, 30, 28, 151, 135, 135,
1400  /* 290 */ 151, 135, 31, 23, 30, 28, 135, 148, 148, 135,
1401  /* 300 */ 31, 23, 30, 28, 191, 135, 191, 191, 191, 191,
1402  /* 310 */ 6, 5, 1, 7, 5, 1, 7, 135, 135, 135,
1403  /* 320 */ 135, 135, 135, 135, 135, 191,
1404 };
1405 static const YYCODETYPE yy_lookahead[] = {
1406  /* 0 */ 25, 26, 27, 28, 29, 30, 31, 32, 33, 7,
1407  /* 10 */ 35, 36, 37, 38, 5, 34, 12, 8, 9, 10,
1408  /* 20 */ 11, 12, 21, 12, 15, 16, 17, 18, 19, 20,
1409  /* 30 */ 0, 22, 12, 22, 8, 9, 4, 5, 8, 9,
1410  /* 40 */ 12, 11, 12, 17, 18, 15, 16, 17, 18, 19,
1411  /* 50 */ 20, 6, 22, 26, 27, 28, 29, 30, 31, 32,
1412  /* 60 */ 33, 14, 35, 36, 37, 38, 26, 27, 28, 29,
1413  /* 70 */ 30, 31, 32, 33, 12, 35, 36, 37, 38, 26,
1414  /* 80 */ 27, 28, 29, 30, 31, 32, 33, 12, 35, 36,
1415  /* 90 */ 37, 38, 26, 27, 28, 29, 30, 31, 32, 33,
1416  /* 100 */ 0, 35, 36, 37, 38, 26, 27, 28, 29, 30,
1417  /* 110 */ 31, 32, 33, 39, 35, 36, 37, 38, 26, 27,
1418  /* 120 */ 28, 29, 30, 31, 32, 33, 39, 35, 36, 37,
1419  /* 130 */ 38, 26, 27, 28, 29, 30, 31, 32, 33, 13,
1420  /* 140 */ 35, 36, 37, 38, 8, 9, 39, 11, 12, 23,
1421  /* 150 */ 39, 15, 16, 17, 18, 19, 20, 39, 22, 39,
1422  /* 160 */ 8, 9, 39, 11, 12, 39, 39, 15, 16, 17,
1423  /* 170 */ 18, 19, 20, 0, 22, 2, 3, 4, 5, 6,
1424  /* 180 */ 7, 6, 7, 12, 39, 39, 13, 14, 13, 14,
1425  /* 190 */ 19, 11, 12, 22, 21, 15, 16, 17, 39, 19,
1426  /* 200 */ 20, 39, 22, 11, 12, 39, 39, 15, 16, 17,
1427  /* 210 */ 39, 19, 20, 39, 22, 11, 12, 39, 39, 15,
1428  /* 220 */ 16, 17, 39, 19, 20, 39, 22, 11, 12, 39,
1429  /* 230 */ 39, 15, 16, 17, 39, 19, 20, 39, 22, 11,
1430  /* 240 */ 12, 39, 39, 15, 16, 39, 39, 19, 20, 39,
1431  /* 250 */ 22, 39, 32, 33, 39, 35, 36, 37, 38, 30,
1432  /* 260 */ 39, 39, 33, 39, 35, 36, 37, 38, 39, 30,
1433  /* 270 */ 39, 39, 33, 39, 35, 36, 37, 38, 30, 39,
1434  /* 280 */ 39, 33, 39, 35, 36, 37, 38, 30, 39, 39,
1435  /* 290 */ 33, 39, 35, 36, 37, 38, 39, 32, 33, 39,
1436  /* 300 */ 35, 36, 37, 38, 0, 39, 2, 3, 4, 5,
1437  /* 310 */ 2, 3, 4, 5, 3, 4, 5, 39, 39, 39,
1438  /* 320 */ 39, 39, 39, 39, 39, 21, 39, 39, 39, 39,
1439  /* 330 */ 39, 39, 39, 39, 39, 39, 39, 39, 39,
1440 };
1441 #define YY_SHIFT_COUNT (34)
1442 #define YY_SHIFT_MIN (0)
1443 #define YY_SHIFT_MAX (311)
1444 static const unsigned short int yy_shift_ofst[] = {
1445  /* 0 */ 30, 9, 136, 136, 136, 136, 136, 136, 152, 180,
1446  /* 10 */ 192, 204, 216, 228, 11, 173, 304, 175, 26, 175,
1447  /* 20 */ 308, 171, 311, 126, 32, 4, 1, 20, 2, 28,
1448  /* 30 */ 45, 47, 62, 75, 100,
1449 };
1450 #define YY_REDUCE_COUNT (14)
1451 #define YY_REDUCE_MIN (-25)
1452 #define YY_REDUCE_MAX (265)
1453 static const short yy_reduce_ofst[] = {
1454  /* 0 */ -25, 27, 40, 53, 66, 79, 92, 105, 220, 229,
1455  /* 10 */ 239, 248, 257, 265, -19,
1456 };
1457 static const YYACTIONTYPE yy_default[] = {
1458  /* 0 */ 144, 144, 144, 144, 144, 144, 144, 144, 145, 133,
1459  /* 10 */ 133, 133, 133, 160, 133, 161, 190, 162, 133, 161,
1460  /* 20 */ 133, 133, 142, 167, 143, 133, 187, 133, 169, 133,
1461  /* 30 */ 168, 166, 133, 133, 187,
1462 };
1463 /********** End of lemon-generated parsing tables *****************************/
1464 
1465 /* The next table maps tokens (terminal symbols) into fallback tokens.
1466 ** If a construct like the following:
1467 **
1468 ** %fallback ID X Y Z.
1469 **
1470 ** appears in the grammar, then ID becomes a fallback token for X, Y,
1471 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
1472 ** but it does not parse, the type of the token is changed to ID and
1473 ** the parse is retried before an error is thrown.
1474 **
1475 ** This feature can be used, for example, to cause some keywords in a language
1476 ** to revert to identifiers if they keyword does not apply in the context where
1477 ** it appears.
1478 */
1479 #ifdef YYFALLBACK
1480 static const YYCODETYPE yyFallback[] = {
1481 };
1482 #endif /* YYFALLBACK */
1483 
1484 /* The following structure represents a single element of the
1485 ** parser's stack. Information stored includes:
1486 **
1487 ** + The state number for the parser at this level of the stack.
1488 **
1489 ** + The value of the token stored at this level of the stack.
1490 ** (In other words, the "major" token.)
1491 **
1492 ** + The semantic value stored at this level of the stack. This is
1493 ** the information used by the action routines in the grammar.
1494 ** It is sometimes called the "minor" token.
1495 **
1496 ** After the "shift" half of a SHIFTREDUCE action, the stateno field
1497 ** actually contains the reduce action for the second half of the
1498 ** SHIFTREDUCE.
1499 */
1502  stateno = 0;
1503  major = 0;
1504  }
1506  stateno = stateno_;
1507  major = major_;
1508  minor.yy0 = minor_;
1509  }
1510  YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
1511  YYCODETYPE major; /* The major token value. This is the code
1512  ** number for the token at this stack level */
1513  YYMINORTYPE minor; /* The user-supplied minor token value. This
1514  ** is the value of the token */
1515 };
1516 
1517 static void ParseInit(yyParser *pParser);
1518 static void ParseFinalize(yyParser *pParser);
1519 
1520 /* The state of the parser is completely contained in an instance of
1521 ** the following structure */
1522 struct yyParser {
1523 #ifdef YYTRACKMAXSTACKDEPTH
1524  int yyhwm; /* High-water mark of the stack */
1525 #endif
1526 #ifndef YYNOERRORRECOVERY
1527  int yyerrcnt; /* Shifts left before out of the error */
1528 #endif
1529  ParseARG_SDECL /* A place to hold %extra_argument */
1530  vector<yyStackEntry> yystack; /* The parser's stack */
1532  ParseInit(this);
1533  }
1535  ParseFinalize(this);
1536  }
1537 };
1538 typedef struct yyParser yyParser;
1539 
1540 #include "omassert.h"
1541 #include "debuglog.h"
1542 
1543 #if defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG)
1544 /* For tracing shifts, the names of all terminals and nonterminals
1545 ** are required. The following table supplies these names */
1546 static const char *const yyTokenName[] = {
1547  /* 0 */ "$",
1548  /* 1 */ "ERROR",
1549  /* 2 */ "OR",
1550  /* 3 */ "XOR",
1551  /* 4 */ "AND",
1552  /* 5 */ "NOT",
1553  /* 6 */ "NEAR",
1554  /* 7 */ "ADJ",
1555  /* 8 */ "LOVE",
1556  /* 9 */ "HATE",
1557  /* 10 */ "HATE_AFTER_AND",
1558  /* 11 */ "SYNONYM",
1559  /* 12 */ "TERM",
1560  /* 13 */ "GROUP_TERM",
1561  /* 14 */ "PHR_TERM",
1562  /* 15 */ "WILD_TERM",
1563  /* 16 */ "PARTIAL_TERM",
1564  /* 17 */ "BOOLEAN_FILTER",
1565  /* 18 */ "RANGE",
1566  /* 19 */ "QUOTE",
1567  /* 20 */ "BRA",
1568  /* 21 */ "KET",
1569  /* 22 */ "UNBROKEN_WORDS",
1570  /* 23 */ "EMPTY_GROUP_OK",
1571  /* 24 */ "error",
1572  /* 25 */ "query",
1573  /* 26 */ "expr",
1574  /* 27 */ "prob_expr",
1575  /* 28 */ "bool_arg",
1576  /* 29 */ "prob",
1577  /* 30 */ "term",
1578  /* 31 */ "stop_prob",
1579  /* 32 */ "stop_term",
1580  /* 33 */ "compound_term",
1581  /* 34 */ "phrase",
1582  /* 35 */ "phrased_term",
1583  /* 36 */ "group",
1584  /* 37 */ "near_expr",
1585  /* 38 */ "adj_expr",
1586 };
1587 
1588 /* For tracing reduce actions, the names of all rules are required.
1589 */
1590 static const char *const yyRuleName[] = {
1591  /* 0 */ "query ::= expr",
1592  /* 1 */ "query ::=",
1593  /* 2 */ "expr ::= bool_arg AND bool_arg",
1594  /* 3 */ "expr ::= bool_arg NOT bool_arg",
1595  /* 4 */ "expr ::= bool_arg AND NOT bool_arg",
1596  /* 5 */ "expr ::= bool_arg AND HATE_AFTER_AND bool_arg",
1597  /* 6 */ "expr ::= bool_arg OR bool_arg",
1598  /* 7 */ "expr ::= bool_arg XOR bool_arg",
1599  /* 8 */ "bool_arg ::=",
1600  /* 9 */ "prob_expr ::= prob",
1601  /* 10 */ "prob ::= RANGE",
1602  /* 11 */ "prob ::= stop_prob RANGE",
1603  /* 12 */ "prob ::= stop_term stop_term",
1604  /* 13 */ "prob ::= prob stop_term",
1605  /* 14 */ "prob ::= LOVE term",
1606  /* 15 */ "prob ::= stop_prob LOVE term",
1607  /* 16 */ "prob ::= HATE term",
1608  /* 17 */ "prob ::= stop_prob HATE term",
1609  /* 18 */ "prob ::= HATE BOOLEAN_FILTER",
1610  /* 19 */ "prob ::= stop_prob HATE BOOLEAN_FILTER",
1611  /* 20 */ "prob ::= BOOLEAN_FILTER",
1612  /* 21 */ "prob ::= stop_prob BOOLEAN_FILTER",
1613  /* 22 */ "prob ::= LOVE BOOLEAN_FILTER",
1614  /* 23 */ "prob ::= stop_prob LOVE BOOLEAN_FILTER",
1615  /* 24 */ "stop_prob ::= stop_term",
1616  /* 25 */ "stop_term ::= TERM",
1617  /* 26 */ "term ::= TERM",
1618  /* 27 */ "compound_term ::= WILD_TERM",
1619  /* 28 */ "compound_term ::= PARTIAL_TERM",
1620  /* 29 */ "compound_term ::= QUOTE phrase QUOTE",
1621  /* 30 */ "compound_term ::= phrased_term",
1622  /* 31 */ "compound_term ::= group",
1623  /* 32 */ "compound_term ::= near_expr",
1624  /* 33 */ "compound_term ::= adj_expr",
1625  /* 34 */ "compound_term ::= BRA expr KET",
1626  /* 35 */ "compound_term ::= SYNONYM TERM",
1627  /* 36 */ "compound_term ::= UNBROKEN_WORDS",
1628  /* 37 */ "phrase ::= TERM",
1629  /* 38 */ "phrase ::= UNBROKEN_WORDS",
1630  /* 39 */ "phrase ::= phrase TERM",
1631  /* 40 */ "phrase ::= phrase UNBROKEN_WORDS",
1632  /* 41 */ "phrased_term ::= TERM PHR_TERM",
1633  /* 42 */ "phrased_term ::= phrased_term PHR_TERM",
1634  /* 43 */ "group ::= TERM GROUP_TERM",
1635  /* 44 */ "group ::= group GROUP_TERM",
1636  /* 45 */ "group ::= group EMPTY_GROUP_OK",
1637  /* 46 */ "near_expr ::= TERM NEAR TERM",
1638  /* 47 */ "near_expr ::= near_expr NEAR TERM",
1639  /* 48 */ "adj_expr ::= TERM ADJ TERM",
1640  /* 49 */ "adj_expr ::= adj_expr ADJ TERM",
1641  /* 50 */ "expr ::= prob_expr",
1642  /* 51 */ "bool_arg ::= expr",
1643  /* 52 */ "prob_expr ::= term",
1644  /* 53 */ "stop_prob ::= prob",
1645  /* 54 */ "stop_term ::= compound_term",
1646  /* 55 */ "term ::= compound_term",
1647 };
1648 
1649 /*
1650 ** This function returns the symbolic name associated with a token
1651 ** value.
1652 */
1653 static const char *ParseTokenName(int tokenType){
1654  if( tokenType>=0 && tokenType<(int)(sizeof(yyTokenName)/sizeof(yyTokenName[0])) ){
1655  return yyTokenName[tokenType];
1656  }
1657  return "Unknown";
1658 }
1659 
1660 /*
1661 ** This function returns the symbolic name associated with a rule
1662 ** value.
1663 */
1664 static const char *ParseRuleName(int ruleNum){
1665  if( ruleNum>=0 && ruleNum<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
1666  return yyRuleName[ruleNum];
1667  }
1668  return "Unknown";
1669 }
1670 #endif /* defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG) */
1671 
1672 /* Datatype of the argument to the memory allocated passed as the
1673 ** second argument to ParseAlloc() below. This can be changed by
1674 ** putting an appropriate #define in the %include section of the input
1675 ** grammar.
1676 */
1677 #ifndef YYMALLOCARGTYPE
1678 # define YYMALLOCARGTYPE size_t
1679 #endif
1680 
1681 /* Initialize a new parser that has already been allocated.
1682 */
1683 static
1684 void ParseInit(yyParser *pParser){
1685 #ifdef YYTRACKMAXSTACKDEPTH
1686  pParser->yyhwm = 0;
1687 #endif
1688 #if 0
1689 #if YYSTACKDEPTH<=0
1690  pParser->yytos = NULL;
1691  pParser->yystack = NULL;
1692  pParser->yystksz = 0;
1693  if( yyGrowStack(pParser) ){
1694  pParser->yystack = &pParser->yystk0;
1695  pParser->yystksz = 1;
1696  }
1697 #endif
1698 #endif
1699 #ifndef YYNOERRORRECOVERY
1700  pParser->yyerrcnt = -1;
1701 #endif
1702 #if 0
1703  pParser->yytos = pParser->yystack;
1704  pParser->yystack[0].stateno = 0;
1705  pParser->yystack[0].major = 0;
1706 #if YYSTACKDEPTH>0
1707  pParser->yystackEnd = &pParser->yystack[YYSTACKDEPTH-1];
1708 #endif
1709 #else
1710  pParser->yystack.push_back(yyStackEntry());
1711 #endif
1712 }
1713 
1714 #ifndef Parse_ENGINEALWAYSONSTACK
1715 /*
1716 ** This function allocates a new parser.
1717 **
1718 ** Inputs:
1719 ** None.
1720 **
1721 ** Outputs:
1722 ** A pointer to a parser. This pointer is used in subsequent calls
1723 ** to Parse and ParseFree.
1724 */
1725 static yyParser *ParseAlloc(void){
1726  return new yyParser;
1727 }
1728 #endif /* Parse_ENGINEALWAYSONSTACK */
1729 
1730 
1731 /* The following function deletes the "minor type" or semantic value
1732 ** associated with a symbol. The symbol can be either a terminal
1733 ** or nonterminal. "yymajor" is the symbol code, and "yypminor" is
1734 ** a pointer to the value to be deleted. The code used to do the
1735 ** deletions is derived from the %destructor and/or %token_destructor
1736 ** directives of the input grammar.
1737 */
1738 static void yy_destructor(
1739  yyParser *yypParser, /* The parser */
1740  YYCODETYPE yymajor, /* Type code for object to destroy */
1741  YYMINORTYPE *yypminor /* The object to be destroyed */
1742 ){
1744  switch( yymajor ){
1745  /* Here is inserted the actions which take place when a
1746  ** terminal or non-terminal is destroyed. This can happen
1747  ** when the symbol is popped from the stack during a
1748  ** reduce or during error processing or when a parser is
1749  ** being destroyed before it is finished parsing.
1750  **
1751  ** Note: during a reduce, the only symbols destroyed are those
1752  ** which appear on the RHS of the rule, but which are *not* used
1753  ** inside the C code.
1754  */
1755 /********* Begin destructor definitions ***************************************/
1756  /* TERMINAL Destructor */
1757  case 1: /* ERROR */
1758  case 2: /* OR */
1759  case 3: /* XOR */
1760  case 4: /* AND */
1761  case 5: /* NOT */
1762  case 6: /* NEAR */
1763  case 7: /* ADJ */
1764  case 8: /* LOVE */
1765  case 9: /* HATE */
1766  case 10: /* HATE_AFTER_AND */
1767  case 11: /* SYNONYM */
1768  case 12: /* TERM */
1769  case 13: /* GROUP_TERM */
1770  case 14: /* PHR_TERM */
1771  case 15: /* WILD_TERM */
1772  case 16: /* PARTIAL_TERM */
1773  case 17: /* BOOLEAN_FILTER */
1774  case 18: /* RANGE */
1775  case 19: /* QUOTE */
1776  case 20: /* BRA */
1777  case 21: /* KET */
1778  case 22: /* UNBROKEN_WORDS */
1779  case 23: /* EMPTY_GROUP_OK */
1780 {
1781 #line 1805 "queryparser/queryparser.lemony"
1782  delete (yypminor->yy0);
1783 #line 1784 "queryparser/queryparser_internal.cc"
1784 }
1785  break;
1786  case 26: /* expr */
1787  case 27: /* prob_expr */
1788  case 28: /* bool_arg */
1789  case 30: /* term */
1790  case 32: /* stop_term */
1791  case 33: /* compound_term */
1792 {
1793 #line 1880 "queryparser/queryparser.lemony"
1794  delete (yypminor->yy39);
1795 #line 1796 "queryparser/queryparser_internal.cc"
1796 }
1797  break;
1798  case 29: /* prob */
1799  case 31: /* stop_prob */
1800 {
1801 #line 1989 "queryparser/queryparser.lemony"
1802  delete (yypminor->yy40);
1803 #line 1804 "queryparser/queryparser_internal.cc"
1804 }
1805  break;
1806  case 34: /* phrase */
1807  case 35: /* phrased_term */
1808  case 37: /* near_expr */
1809  case 38: /* adj_expr */
1810 {
1811 #line 2180 "queryparser/queryparser.lemony"
1812  delete (yypminor->yy32);
1813 #line 1814 "queryparser/queryparser_internal.cc"
1814 }
1815  break;
1816  case 36: /* group */
1817 {
1818 #line 2221 "queryparser/queryparser.lemony"
1819  delete (yypminor->yy14);
1820 #line 1821 "queryparser/queryparser_internal.cc"
1821 }
1822  break;
1823 /********* End destructor definitions *****************************************/
1824  default: break; /* If no destructor action specified: do nothing */
1825  }
1826  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
1827 }
1828 
1829 /*
1830 ** Pop the parser's stack once.
1831 **
1832 ** If there is a destructor routine associated with the token which
1833 ** is popped from the stack, then call it.
1834 */
1835 static void yy_pop_parser_stack(yyParser *pParser){
1836  Assert( pParser->yystack.size() > 1 );
1837  yyStackEntry *yytos = &pParser->yystack.back();
1838 
1839  LOGLINE(QUERYPARSER, "Popping " << ParseTokenName(yytos->major));
1840  yy_destructor(pParser, yytos->major, &yytos->minor);
1841  pParser->yystack.pop_back();
1842 }
1843 
1844 /*
1845 ** Clear all secondary memory allocations from the parser
1846 */
1847 static
1848 void ParseFinalize(yyParser *pParser){
1849  while( pParser->yystack.size() > 1 ) yy_pop_parser_stack(pParser);
1850 }
1851 
1852 #ifndef Parse_ENGINEALWAYSONSTACK
1853 /*
1854 ** Deallocate and destroy a parser. Destructors are called for
1855 ** all stack elements before shutting the parser down.
1856 **
1857 ** If the YYPARSEFREENEVERNULL macro exists (for example because it
1858 ** is defined in a %include section of the input grammar) then it is
1859 ** assumed that the input pointer is never NULL.
1860 */
1861 static
1862 void ParseFree(
1863  yyParser *pParser /* The parser to be deleted */
1864 ){
1865  delete pParser;
1866 }
1867 #endif /* Parse_ENGINEALWAYSONSTACK */
1868 
1869 /*
1870 ** Return the peak depth of the stack for a parser.
1871 */
1872 #ifdef YYTRACKMAXSTACKDEPTH
1873 int ParseStackPeak(yyParser *pParser){
1874  return pParser->yyhwm;
1875 }
1876 #endif
1877 
1878 /* This array of booleans keeps track of the parser statement
1879 ** coverage. The element yycoverage[X][Y] is set when the parser
1880 ** is in state X and has a lookahead token Y. In a well-tested
1881 ** systems, every element of this matrix should end up being set.
1882 */
1883 #if defined(YYCOVERAGE)
1884 static unsigned char yycoverage[YYNSTATE][YYNTOKEN];
1885 #endif
1886 
1887 /*
1888 ** Write into out a description of every state/lookahead combination that
1889 **
1890 ** (1) has not been used by the parser, and
1891 ** (2) is not a syntax error.
1892 **
1893 ** Return the number of missed state/lookahead combinations.
1894 */
1895 #if defined(YYCOVERAGE)
1896 int ParseCoverage(FILE *out){
1897  int stateno, iLookAhead, i;
1898  int nMissed = 0;
1899  for(stateno=0; stateno<YYNSTATE; stateno++){
1900  i = yy_shift_ofst[stateno];
1901  for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
1902  if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
1903  if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
1904  if( out ){
1905  fprintf(out,"State %d lookahead %s %s\n", stateno,
1906  yyTokenName[iLookAhead],
1907  yycoverage[stateno][iLookAhead] ? "ok" : "missed");
1908  }
1909  }
1910  }
1911  return nMissed;
1912 }
1913 #endif
1914 
1915 /*
1916 ** Find the appropriate action for a parser given the terminal
1917 ** look-ahead token iLookAhead.
1918 */
1919 static unsigned int yy_find_shift_action(
1920  yyParser *pParser, /* The parser */
1921  YYCODETYPE iLookAhead /* The look-ahead token */
1922 ){
1923  int i;
1924  int stateno = pParser->yystack.back().stateno;
1925 
1926  if( stateno>YY_MAX_SHIFT ) return stateno;
1927  Assert( stateno <= YY_SHIFT_COUNT );
1928 #if defined(YYCOVERAGE)
1929  yycoverage[stateno][iLookAhead] = 1;
1930 #endif
1931  do{
1932  i = yy_shift_ofst[stateno];
1933  Assert( i>=0 );
1934  Assert( i+YYNTOKEN<=(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) );
1935  Assert( iLookAhead!=YYNOCODE );
1936  Assert( iLookAhead < YYNTOKEN );
1937  i += iLookAhead;
1938  if( yy_lookahead[i]!=iLookAhead ){
1939 #ifdef YYFALLBACK
1940  YYCODETYPE iFallback; /* Fallback token */
1941  if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
1942  && (iFallback = yyFallback[iLookAhead])!=0 ){
1943  LOGLINE(QUERYPARSER,
1944  "FALLBACK " << ParseTokenName(iLookAhead) << " => " <<
1945  ParseTokenName(iFallback));
1946  Assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
1947  iLookAhead = iFallback;
1948  continue;
1949  }
1950 #endif
1951 #ifdef YYWILDCARD
1952  {
1953  int j = i - iLookAhead + YYWILDCARD;
1954  if(
1955 #if YY_SHIFT_MIN+YYWILDCARD<0
1956  j>=0 &&
1957 #endif
1958 #if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT
1959  j<YY_ACTTAB_COUNT &&
1960 #endif
1961  yy_lookahead[j]==YYWILDCARD && iLookAhead>0
1962  ){
1963  LOGLINE(QUERYPARSER,
1964  "WILDCARD " << ParseTokenName(iLookAhead) << " => " <<
1965  ParseTokenName(YYWILDCARD));
1966  return yy_action[j];
1967  }
1968  }
1969 #endif /* YYWILDCARD */
1970  return yy_default[stateno];
1971  }else{
1972  return yy_action[i];
1973  }
1974  }while(1);
1975 }
1976 
1977 /*
1978 ** Find the appropriate action for a parser given the non-terminal
1979 ** look-ahead token iLookAhead.
1980 */
1982  int stateno, /* Current state number */
1983  YYCODETYPE iLookAhead /* The look-ahead token */
1984 ){
1985  int i;
1986 #ifdef YYERRORSYMBOL
1987  if( stateno>YY_REDUCE_COUNT ){
1988  return yy_default[stateno];
1989  }
1990 #else
1991  Assert( stateno<=YY_REDUCE_COUNT );
1992 #endif
1993  i = yy_reduce_ofst[stateno];
1994  Assert( iLookAhead!=YYNOCODE );
1995  i += iLookAhead;
1996 #ifdef YYERRORSYMBOL
1997  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
1998  return yy_default[stateno];
1999  }
2000 #else
2001  Assert( i>=0 && i<YY_ACTTAB_COUNT );
2002  Assert( yy_lookahead[i]==iLookAhead );
2003 #endif
2004  return yy_action[i];
2005 }
2006 
2007 /*
2008 ** The following routine is called if the stack overflows.
2009 ** In Xapian this can never happen as we use std::vector to provide a stack
2010 ** of indefinite size.
2011 */
2012 #if 0
2013 static void yyStackOverflow(yyParser *yypParser){
2015  yypParser->yyidx--;
2016 #ifndef NDEBUG
2017  if( yyTraceFILE ){
2018  fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
2019  }
2020 #endif
2021  while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
2022  /* Here code is inserted which will execute if the parser
2023  ** stack ever overflows */
2024 /******** Begin %stack_overflow code ******************************************/
2025 /******** End %stack_overflow code ********************************************/
2026  ParseARG_STORE; /* Suppress warning about unused %extra_argument var */
2027 }
2028 #endif
2029 
2030 /*
2031 ** Print tracing information for a SHIFT action
2032 */
2033 #ifdef XAPIAN_DEBUG_LOG
2034 static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){
2035  if( yyNewState<YYNSTATE ){
2036  LOGLINE(QUERYPARSER, zTag << " '" <<
2037  yyTokenName[yypParser->yystack.back().major] <<
2038  "', go to state " << yyNewState);
2039  }else{
2040  LOGLINE(QUERYPARSER, zTag << " '" <<
2041  yyTokenName[yypParser->yystack.back().major] <<
2042  "', pending reduce " << yyNewState - YY_MIN_REDUCE);
2043  }
2044 }
2045 #else
2046 # define yyTraceShift(X,Y,Z)
2047 #endif
2048 
2049 /*
2050 ** Perform a shift action.
2051 */
2052 static void yy_shift(
2053  yyParser *yypParser, /* The parser to be shifted */
2054  int yyNewState, /* The new state to shift in */
2055  int yyMajor, /* The major token to shift in */
2056  ParseTOKENTYPE yyMinor /* The minor token to shift in */
2057 ){
2058  if( yyNewState > YY_MAX_SHIFT ){
2059  yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
2060  }
2061  yypParser->yystack.push_back(yyStackEntry(yyNewState, yyMajor, yyMinor));
2062 #ifdef YYTRACKMAXSTACKDEPTH
2063  if( (int)(yypParser->yystack.size()>yypParser->yyhwm ){
2064  yypParser->yyhwm++;
2065  Assert( yypParser->yyhwm == (int)(yypParser->yystack.size() );
2066  }
2067 #endif
2068  yyTraceShift(yypParser, yyNewState, "Shift");
2069 }
2070 
2071 /* The following table contains information about every rule that
2072 ** is used during the reduce.
2073 */
2074 static const struct {
2075  YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */
2076  signed char nrhs; /* Negative of the number of RHS symbols in the rule */
2077 } yyRuleInfo[] = {
2078  { 25, -1 }, /* (0) query ::= expr */
2079  { 25, 0 }, /* (1) query ::= */
2080  { 26, -3 }, /* (2) expr ::= bool_arg AND bool_arg */
2081  { 26, -3 }, /* (3) expr ::= bool_arg NOT bool_arg */
2082  { 26, -4 }, /* (4) expr ::= bool_arg AND NOT bool_arg */
2083  { 26, -4 }, /* (5) expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2084  { 26, -3 }, /* (6) expr ::= bool_arg OR bool_arg */
2085  { 26, -3 }, /* (7) expr ::= bool_arg XOR bool_arg */
2086  { 28, 0 }, /* (8) bool_arg ::= */
2087  { 27, -1 }, /* (9) prob_expr ::= prob */
2088  { 29, -1 }, /* (10) prob ::= RANGE */
2089  { 29, -2 }, /* (11) prob ::= stop_prob RANGE */
2090  { 29, -2 }, /* (12) prob ::= stop_term stop_term */
2091  { 29, -2 }, /* (13) prob ::= prob stop_term */
2092  { 29, -2 }, /* (14) prob ::= LOVE term */
2093  { 29, -3 }, /* (15) prob ::= stop_prob LOVE term */
2094  { 29, -2 }, /* (16) prob ::= HATE term */
2095  { 29, -3 }, /* (17) prob ::= stop_prob HATE term */
2096  { 29, -2 }, /* (18) prob ::= HATE BOOLEAN_FILTER */
2097  { 29, -3 }, /* (19) prob ::= stop_prob HATE BOOLEAN_FILTER */
2098  { 29, -1 }, /* (20) prob ::= BOOLEAN_FILTER */
2099  { 29, -2 }, /* (21) prob ::= stop_prob BOOLEAN_FILTER */
2100  { 29, -2 }, /* (22) prob ::= LOVE BOOLEAN_FILTER */
2101  { 29, -3 }, /* (23) prob ::= stop_prob LOVE BOOLEAN_FILTER */
2102  { 31, -1 }, /* (24) stop_prob ::= stop_term */
2103  { 32, -1 }, /* (25) stop_term ::= TERM */
2104  { 30, -1 }, /* (26) term ::= TERM */
2105  { 33, -1 }, /* (27) compound_term ::= WILD_TERM */
2106  { 33, -1 }, /* (28) compound_term ::= PARTIAL_TERM */
2107  { 33, -3 }, /* (29) compound_term ::= QUOTE phrase QUOTE */
2108  { 33, -1 }, /* (30) compound_term ::= phrased_term */
2109  { 33, -1 }, /* (31) compound_term ::= group */
2110  { 33, -1 }, /* (32) compound_term ::= near_expr */
2111  { 33, -1 }, /* (33) compound_term ::= adj_expr */
2112  { 33, -3 }, /* (34) compound_term ::= BRA expr KET */
2113  { 33, -2 }, /* (35) compound_term ::= SYNONYM TERM */
2114  { 33, -1 }, /* (36) compound_term ::= UNBROKEN_WORDS */
2115  { 34, -1 }, /* (37) phrase ::= TERM */
2116  { 34, -1 }, /* (38) phrase ::= UNBROKEN_WORDS */
2117  { 34, -2 }, /* (39) phrase ::= phrase TERM */
2118  { 34, -2 }, /* (40) phrase ::= phrase UNBROKEN_WORDS */
2119  { 35, -2 }, /* (41) phrased_term ::= TERM PHR_TERM */
2120  { 35, -2 }, /* (42) phrased_term ::= phrased_term PHR_TERM */
2121  { 36, -2 }, /* (43) group ::= TERM GROUP_TERM */
2122  { 36, -2 }, /* (44) group ::= group GROUP_TERM */
2123  { 36, -2 }, /* (45) group ::= group EMPTY_GROUP_OK */
2124  { 37, -3 }, /* (46) near_expr ::= TERM NEAR TERM */
2125  { 37, -3 }, /* (47) near_expr ::= near_expr NEAR TERM */
2126  { 38, -3 }, /* (48) adj_expr ::= TERM ADJ TERM */
2127  { 38, -3 }, /* (49) adj_expr ::= adj_expr ADJ TERM */
2128  { 26, -1 }, /* (50) expr ::= prob_expr */
2129  { 28, -1 }, /* (51) bool_arg ::= expr */
2130  { 27, -1 }, /* (52) prob_expr ::= term */
2131  { 31, -1 }, /* (53) stop_prob ::= prob */
2132  { 32, -1 }, /* (54) stop_term ::= compound_term */
2133  { 30, -1 }, /* (55) term ::= compound_term */
2134 };
2135 
2136 static void yy_accept(yyParser*); /* Forward Declaration */
2137 
2138 /*
2139 ** Perform a reduce action and the shift that must immediately
2140 ** follow the reduce.
2141 **
2142 ** The yyLookahead and yyLookaheadToken parameters provide reduce actions
2143 ** access to the lookahead token (if any). The yyLookahead will be YYNOCODE
2144 ** if the lookahead token has already been consumed. As this procedure is
2145 ** only called from one place, optimizing compilers will in-line it, which
2146 ** means that the extra parameters have no performance impact.
2147 */
2148 static void yy_reduce(
2149  yyParser *yypParser, /* The parser */
2150  unsigned int yyruleno, /* Number of the rule by which to reduce */
2151  int yyLookahead, /* Lookahead token, or YYNOCODE if none */
2152  ParseTOKENTYPE yyLookaheadToken /* Value of the lookahead token */
2153 ){
2154  int yygoto; /* The next state */
2155  int yyact; /* The next action */
2156  yyStackEntry *yymsp; /* The top of the parser's stack */
2157  int yysize; /* Amount to pop the stack */
2159  (void)yyLookahead;
2160  (void)yyLookaheadToken;
2161  yymsp = &yypParser->yystack.back();
2162  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
2163 #ifdef XAPIAN_DEBUG_LOG
2164  {
2165  yysize = yyRuleInfo[yyruleno].nrhs;
2166  if( yysize ){
2167  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2168  ParseRuleName(yyruleno) << "], go to state " <<
2169  yymsp[yysize].stateno);
2170  } else {
2171  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2172  ParseRuleName(yyruleno) << "].");
2173  }
2174  }
2175 #endif /* XAPIAN_DEBUG_LOG */
2176  /* yygotominor = yyzerominor; */
2177 
2178  /* Check that the stack is large enough to grow by a single entry
2179  ** if the RHS of the rule is empty. This ensures that there is room
2180  ** enough on the stack to push the LHS value without invalidating
2181  ** pointers into the stack. */
2182  if( yyRuleInfo[yyruleno].nrhs==0 ){
2183 #if 1
2184  yypParser->yystack.resize(yypParser->yystack.size() + 1);
2185  yymsp = &(yypParser->yystack.back()) - 1;
2186 #else
2187 #ifdef YYTRACKMAXSTACKDEPTH
2188  if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
2189  yypParser->yyhwm++;
2190  Assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
2191  }
2192 #endif
2193 #if YYSTACKDEPTH>0
2194  if( yypParser->yytos>=yypParser->yystackEnd ){
2195  yyStackOverflow(yypParser);
2196  return;
2197  }
2198 #else
2199  if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
2200  if( yyGrowStack(yypParser) ){
2201  yyStackOverflow(yypParser);
2202  return;
2203  }
2204  yymsp = yypParser->yytos;
2205  }
2206 #endif
2207 #endif
2208  }
2209 
2210  switch( yyruleno ){
2211  /* Beginning here are the reduction cases. A typical example
2212  ** follows:
2213  ** case 0:
2214  ** #line <lineno> <grammarfile>
2215  ** { ... } // User supplied code
2216  ** #line <lineno> <thisfile>
2217  ** break;
2218  */
2219 /********** Begin reduce actions **********************************************/
2220  YYMINORTYPE yylhsminor;
2221  case 0: /* query ::= expr */
2222 #line 1862 "queryparser/queryparser.lemony"
2223 {
2224  // Save the parsed query in the State structure so we can return it.
2225  if (yymsp[0].minor.yy39) {
2226  state->query = *yymsp[0].minor.yy39;
2227  delete yymsp[0].minor.yy39;
2228  } else {
2229  state->query = Query();
2230  }
2231 }
2232 #line 2233 "queryparser/queryparser_internal.cc"
2233  break;
2234  case 1: /* query ::= */
2235 #line 1872 "queryparser/queryparser.lemony"
2236 {
2237  // Handle a query string with no terms in.
2238  state->query = Query();
2239 }
2240 #line 2241 "queryparser/queryparser_internal.cc"
2241  break;
2242  case 2: /* expr ::= bool_arg AND bool_arg */
2243 #line 1884 "queryparser/queryparser.lemony"
2244 {
2245  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "AND");
2246  *yymsp[-2].minor.yy39 &= *yymsp[0].minor.yy39;
2247  delete yymsp[0].minor.yy39;
2248 }
2249 #line 2250 "queryparser/queryparser_internal.cc"
2250  yy_destructor(yypParser,4,&yymsp[-1].minor);
2251  break;
2252  case 3: /* expr ::= bool_arg NOT bool_arg */
2253 #line 1890 "queryparser/queryparser.lemony"
2254 {
2255  // 'NOT foo' -> '<alldocuments> NOT foo'
2256  if (!yymsp[-2].minor.yy39 && (state->flags & QueryParser::FLAG_PURE_NOT)) {
2257  yymsp[-2].minor.yy39 = new Query("", 1, 0);
2258  }
2259  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "NOT");
2260  *yymsp[-2].minor.yy39 &= ~*yymsp[0].minor.yy39;
2261  delete yymsp[0].minor.yy39;
2262 }
2263 #line 2264 "queryparser/queryparser_internal.cc"
2264  yy_destructor(yypParser,5,&yymsp[-1].minor);
2265  break;
2266  case 4: /* expr ::= bool_arg AND NOT bool_arg */
2267 #line 1900 "queryparser/queryparser.lemony"
2268 {
2269  VET_BOOL_ARGS(yymsp[-3].minor.yy39, yymsp[0].minor.yy39, "AND NOT");
2270  *yymsp[-3].minor.yy39 &= ~*yymsp[0].minor.yy39;
2271  delete yymsp[0].minor.yy39;
2272 }
2273 #line 2274 "queryparser/queryparser_internal.cc"
2274  yy_destructor(yypParser,4,&yymsp[-2].minor);
2275  yy_destructor(yypParser,5,&yymsp[-1].minor);
2276  break;
2277  case 5: /* expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2278 #line 1906 "queryparser/queryparser.lemony"
2279 {
2280  VET_BOOL_ARGS(yymsp[-3].minor.yy39, yymsp[0].minor.yy39, "AND");
2281  *yymsp[-3].minor.yy39 &= ~*yymsp[0].minor.yy39;
2282  delete yymsp[0].minor.yy39;
2283 }
2284 #line 2285 "queryparser/queryparser_internal.cc"
2285  yy_destructor(yypParser,4,&yymsp[-2].minor);
2286  yy_destructor(yypParser,10,&yymsp[-1].minor);
2287  break;
2288  case 6: /* expr ::= bool_arg OR bool_arg */
2289 #line 1912 "queryparser/queryparser.lemony"
2290 {
2291  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "OR");
2292  *yymsp[-2].minor.yy39 |= *yymsp[0].minor.yy39;
2293  delete yymsp[0].minor.yy39;
2294 }
2295 #line 2296 "queryparser/queryparser_internal.cc"
2296  yy_destructor(yypParser,2,&yymsp[-1].minor);
2297  break;
2298  case 7: /* expr ::= bool_arg XOR bool_arg */
2299 #line 1918 "queryparser/queryparser.lemony"
2300 {
2301  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "XOR");
2302  *yymsp[-2].minor.yy39 ^= *yymsp[0].minor.yy39;
2303  delete yymsp[0].minor.yy39;
2304 }
2305 #line 2306 "queryparser/queryparser_internal.cc"
2306  yy_destructor(yypParser,3,&yymsp[-1].minor);
2307  break;
2308  case 8: /* bool_arg ::= */
2309 #line 1931 "queryparser/queryparser.lemony"
2310 {
2311  // Set the argument to NULL, which enables the bool_arg-using rules in
2312  // expr above to report uses of AND, OR, etc which don't have two
2313  // arguments.
2314  yymsp[1].minor.yy39 = NULL;
2315 }
2316 #line 2317 "queryparser/queryparser_internal.cc"
2317  break;
2318  case 9: /* prob_expr ::= prob */
2319 #line 1943 "queryparser/queryparser.lemony"
2320 {
2321  yylhsminor.yy39 = yymsp[0].minor.yy40->query;
2322  yymsp[0].minor.yy40->query = NULL;
2323  // Handle any "+ terms".
2324  if (yymsp[0].minor.yy40->love) {
2325  if (yymsp[0].minor.yy40->love->empty()) {
2326  // +<nothing>.
2327  delete yylhsminor.yy39;
2328  yylhsminor.yy39 = yymsp[0].minor.yy40->love;
2329  } else if (yylhsminor.yy39) {
2330  swap(yylhsminor.yy39, yymsp[0].minor.yy40->love);
2331  add_to_query(yylhsminor.yy39, Query::OP_AND_MAYBE, yymsp[0].minor.yy40->love);
2332  } else {
2333  yylhsminor.yy39 = yymsp[0].minor.yy40->love;
2334  }
2335  yymsp[0].minor.yy40->love = NULL;
2336  }
2337  // Handle any boolean filters.
2338  if (!yymsp[0].minor.yy40->filter.empty()) {
2339  if (yylhsminor.yy39) {
2340  add_to_query(yylhsminor.yy39, Query::OP_FILTER, yymsp[0].minor.yy40->merge_filters());
2341  } else {
2342  // Make the query a boolean one.
2343  yylhsminor.yy39 = new Query(Query::OP_SCALE_WEIGHT, yymsp[0].minor.yy40->merge_filters(), 0.0);
2344  }
2345  }
2346  // Handle any "- terms".
2347  if (yymsp[0].minor.yy40->hate && !yymsp[0].minor.yy40->hate->empty()) {
2348  if (!yylhsminor.yy39) {
2349  // Can't just hate!
2350  yy_parse_failed(yypParser);
2351  return;
2352  }
2353  *yylhsminor.yy39 = Query(Query::OP_AND_NOT, *yylhsminor.yy39, *yymsp[0].minor.yy40->hate);
2354  }
2355  delete yymsp[0].minor.yy40;
2356 }
2357 #line 2358 "queryparser/queryparser_internal.cc"
2358  yymsp[0].minor.yy39 = yylhsminor.yy39;
2359  break;
2360  case 10: /* prob ::= RANGE */
2361 #line 1991 "queryparser/queryparser.lemony"
2362 {
2363  string grouping = yymsp[0].minor.yy0->name;
2364  const Query & range = yymsp[0].minor.yy0->as_range_query();
2365  yymsp[0].minor.yy40 = new ProbQuery; /*P-overwrites-R*/
2366  yymsp[0].minor.yy40->add_filter_range(grouping, range);
2367 }
2368 #line 2369 "queryparser/queryparser_internal.cc"
2369  break;
2370  case 11: /* prob ::= stop_prob RANGE */
2371 #line 1998 "queryparser/queryparser.lemony"
2372 {
2373  string grouping = yymsp[0].minor.yy0->name;
2374  const Query & range = yymsp[0].minor.yy0->as_range_query();
2375  yymsp[-1].minor.yy40->append_filter_range(grouping, range);
2376 }
2377 #line 2378 "queryparser/queryparser_internal.cc"
2378  break;
2379  case 12: /* prob ::= stop_term stop_term */
2380 #line 2004 "queryparser/queryparser.lemony"
2381 {
2382  yymsp[-1].minor.yy40 = new ProbQuery(yymsp[-1].minor.yy39); /*P-overwrites-T*/
2383  if (yymsp[0].minor.yy39) {
2384  Query::op op = state->default_op();
2385  if (yymsp[-1].minor.yy40->query && is_positional(op)) {
2386  // If default_op is OP_NEAR or OP_PHRASE, set the window size to
2387  // 11 for the first pair of terms and it will automatically grow
2388  // by one for each subsequent term.
2389  Query * subqs[2] = { yymsp[-1].minor.yy40->query, yymsp[0].minor.yy39 };
2390  *(yymsp[-1].minor.yy40->query) = Query(op, subqs, subqs + 2, 11);
2391  delete yymsp[0].minor.yy39;
2392  } else {
2393  add_to_query(yymsp[-1].minor.yy40->query, op, yymsp[0].minor.yy39);
2394  }
2395  }
2396 }
2397 #line 2398 "queryparser/queryparser_internal.cc"
2398  break;
2399  case 13: /* prob ::= prob stop_term */
2400 #line 2021 "queryparser/queryparser.lemony"
2401 {
2402  // If yymsp[0].minor.yy39 is a stopword, there's nothing to do here.
2403  if (yymsp[0].minor.yy39) add_to_query(yymsp[-1].minor.yy40->query, state->default_op(), yymsp[0].minor.yy39);
2404 }
2405 #line 2406 "queryparser/queryparser_internal.cc"
2406  break;
2407  case 14: /* prob ::= LOVE term */
2408 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2409 #line 2026 "queryparser/queryparser.lemony"
2410 {
2411  yymsp[-1].minor.yy40 = new ProbQuery;
2412  if (state->default_op() == Query::OP_AND) {
2413  yymsp[-1].minor.yy40->query = yymsp[0].minor.yy39;
2414  } else {
2415  yymsp[-1].minor.yy40->love = yymsp[0].minor.yy39;
2416  }
2417 }
2418 #line 2419 "queryparser/queryparser_internal.cc"
2419 }
2420  break;
2421  case 15: /* prob ::= stop_prob LOVE term */
2422 #line 2035 "queryparser/queryparser.lemony"
2423 {
2424  if (state->default_op() == Query::OP_AND) {
2425  /* The default op is AND, so we just put loved terms into the query
2426  * (in this case the only effect of love is to ignore the stopword
2427  * list). */
2428  add_to_query(yymsp[-2].minor.yy40->query, Query::OP_AND, yymsp[0].minor.yy39);
2429  } else {
2430  add_to_query(yymsp[-2].minor.yy40->love, Query::OP_AND, yymsp[0].minor.yy39);
2431  }
2432 }
2433 #line 2434 "queryparser/queryparser_internal.cc"
2434  yy_destructor(yypParser,8,&yymsp[-1].minor);
2435  break;
2436  case 16: /* prob ::= HATE term */
2437 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2438 #line 2046 "queryparser/queryparser.lemony"
2439 {
2440  yymsp[-1].minor.yy40 = new ProbQuery;
2441  yymsp[-1].minor.yy40->hate = yymsp[0].minor.yy39;
2442 }
2443 #line 2444 "queryparser/queryparser_internal.cc"
2444 }
2445  break;
2446  case 17: /* prob ::= stop_prob HATE term */
2447 #line 2051 "queryparser/queryparser.lemony"
2448 {
2449  add_to_query(yymsp[-2].minor.yy40->hate, Query::OP_OR, yymsp[0].minor.yy39);
2450 }
2451 #line 2452 "queryparser/queryparser_internal.cc"
2452  yy_destructor(yypParser,9,&yymsp[-1].minor);
2453  break;
2454  case 18: /* prob ::= HATE BOOLEAN_FILTER */
2455 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2456 #line 2055 "queryparser/queryparser.lemony"
2457 {
2458  yymsp[-1].minor.yy40 = new ProbQuery;
2459  yymsp[-1].minor.yy40->hate = new Query(yymsp[0].minor.yy0->get_query());
2460  delete yymsp[0].minor.yy0;
2461 }
2462 #line 2463 "queryparser/queryparser_internal.cc"
2463 }
2464  break;
2465  case 19: /* prob ::= stop_prob HATE BOOLEAN_FILTER */
2466 #line 2061 "queryparser/queryparser.lemony"
2467 {
2468  add_to_query(yymsp[-2].minor.yy40->hate, Query::OP_OR, yymsp[0].minor.yy0->get_query());
2469  delete yymsp[0].minor.yy0;
2470 }
2471 #line 2472 "queryparser/queryparser_internal.cc"
2472  yy_destructor(yypParser,9,&yymsp[-1].minor);
2473  break;
2474  case 20: /* prob ::= BOOLEAN_FILTER */
2475 #line 2066 "queryparser/queryparser.lemony"
2476 {
2477  yylhsminor.yy40 = new ProbQuery;
2478  yylhsminor.yy40->add_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2479  delete yymsp[0].minor.yy0;
2480 }
2481 #line 2482 "queryparser/queryparser_internal.cc"
2482  yymsp[0].minor.yy40 = yylhsminor.yy40;
2483  break;
2484  case 21: /* prob ::= stop_prob BOOLEAN_FILTER */
2485 #line 2072 "queryparser/queryparser.lemony"
2486 {
2487  yymsp[-1].minor.yy40->append_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2488  delete yymsp[0].minor.yy0;
2489 }
2490 #line 2491 "queryparser/queryparser_internal.cc"
2491  break;
2492  case 22: /* prob ::= LOVE BOOLEAN_FILTER */
2493 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2494 #line 2077 "queryparser/queryparser.lemony"
2495 {
2496  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2497  yymsp[-1].minor.yy40 = new ProbQuery;
2498  yymsp[-1].minor.yy40->filter[yymsp[0].minor.yy0->get_grouping()] = yymsp[0].minor.yy0->get_query();
2499  delete yymsp[0].minor.yy0;
2500 }
2501 #line 2502 "queryparser/queryparser_internal.cc"
2502 }
2503  break;
2504  case 23: /* prob ::= stop_prob LOVE BOOLEAN_FILTER */
2505 #line 2084 "queryparser/queryparser.lemony"
2506 {
2507  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2508  // We OR filters with the same prefix...
2509  Query & q = yymsp[-2].minor.yy40->filter[yymsp[0].minor.yy0->get_grouping()];
2510  q |= yymsp[0].minor.yy0->get_query();
2511  delete yymsp[0].minor.yy0;
2512 }
2513 #line 2514 "queryparser/queryparser_internal.cc"
2514  yy_destructor(yypParser,8,&yymsp[-1].minor);
2515  break;
2516  case 24: /* stop_prob ::= stop_term */
2517 #line 2099 "queryparser/queryparser.lemony"
2518 {
2519  yymsp[0].minor.yy40 = new ProbQuery(yymsp[0].minor.yy39); /*P-overwrites-T*/
2520 }
2521 #line 2522 "queryparser/queryparser_internal.cc"
2522  break;
2523  case 25: /* stop_term ::= TERM */
2524 #line 2112 "queryparser/queryparser.lemony"
2525 {
2526  if (state->is_stopword(yymsp[0].minor.yy0)) {
2527  yylhsminor.yy39 = NULL;
2528  state->add_to_stoplist(yymsp[0].minor.yy0);
2529  } else {
2530  yylhsminor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2531  }
2532  delete yymsp[0].minor.yy0;
2533 }
2534 #line 2535 "queryparser/queryparser_internal.cc"
2535  yymsp[0].minor.yy39 = yylhsminor.yy39;
2536  break;
2537  case 26: /* term ::= TERM */
2538 #line 2129 "queryparser/queryparser.lemony"
2539 {
2540  yylhsminor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2541  delete yymsp[0].minor.yy0;
2542 }
2543 #line 2544 "queryparser/queryparser_internal.cc"
2544  yymsp[0].minor.yy39 = yylhsminor.yy39;
2545  break;
2546  case 27: /* compound_term ::= WILD_TERM */
2547 #line 2144 "queryparser/queryparser.lemony"
2548 { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_wildcarded_query(state); /*T-overwrites-U*/ }
2549 #line 2550 "queryparser/queryparser_internal.cc"
2550  break;
2551  case 28: /* compound_term ::= PARTIAL_TERM */
2552 #line 2147 "queryparser/queryparser.lemony"
2553 { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_partial_query(state); /*T-overwrites-U*/ }
2554 #line 2555 "queryparser/queryparser_internal.cc"
2555  break;
2556  case 29: /* compound_term ::= QUOTE phrase QUOTE */
2557 { yy_destructor(yypParser,19,&yymsp[-2].minor);
2558 #line 2150 "queryparser/queryparser.lemony"
2559 { yymsp[-2].minor.yy39 = yymsp[-1].minor.yy32->as_phrase_query(); }
2560 #line 2561 "queryparser/queryparser_internal.cc"
2561  yy_destructor(yypParser,19,&yymsp[0].minor);
2562 }
2563  break;
2564  case 30: /* compound_term ::= phrased_term */
2565 #line 2153 "queryparser/queryparser.lemony"
2566 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_phrase_query(); /*T-overwrites-P*/ }
2567 #line 2568 "queryparser/queryparser_internal.cc"
2568  break;
2569  case 31: /* compound_term ::= group */
2570 #line 2156 "queryparser/queryparser.lemony"
2571 { yymsp[0].minor.yy39 = yymsp[0].minor.yy14->as_group(state); /*T-overwrites-P*/ }
2572 #line 2573 "queryparser/queryparser_internal.cc"
2573  break;
2574  case 32: /* compound_term ::= near_expr */
2575 #line 2159 "queryparser/queryparser.lemony"
2576 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_near_query(); /*T-overwrites-P*/ }
2577 #line 2578 "queryparser/queryparser_internal.cc"
2578  break;
2579  case 33: /* compound_term ::= adj_expr */
2580 #line 2162 "queryparser/queryparser.lemony"
2581 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_adj_query(); /*T-overwrites-P*/ }
2582 #line 2583 "queryparser/queryparser_internal.cc"
2583  break;
2584  case 34: /* compound_term ::= BRA expr KET */
2585 { yy_destructor(yypParser,20,&yymsp[-2].minor);
2586 #line 2165 "queryparser/queryparser.lemony"
2587 { yymsp[-2].minor.yy39 = yymsp[-1].minor.yy39; }
2588 #line 2589 "queryparser/queryparser_internal.cc"
2589  yy_destructor(yypParser,21,&yymsp[0].minor);
2590 }
2591  break;
2592  case 35: /* compound_term ::= SYNONYM TERM */
2593 { yy_destructor(yypParser,11,&yymsp[-1].minor);
2594 #line 2167 "queryparser/queryparser.lemony"
2595 {
2596  yymsp[-1].minor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_synonyms());
2597  delete yymsp[0].minor.yy0;
2598 }
2599 #line 2600 "queryparser/queryparser_internal.cc"
2600 }
2601  break;
2602  case 36: /* compound_term ::= UNBROKEN_WORDS */
2603 #line 2172 "queryparser/queryparser.lemony"
2604 {
2605  { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_unbroken_query(); /*T-overwrites-U*/ }
2606 }
2607 #line 2608 "queryparser/queryparser_internal.cc"
2608  break;
2609  case 37: /* phrase ::= TERM */
2610 #line 2182 "queryparser/queryparser.lemony"
2611 {
2612  yylhsminor.yy32 = Terms::create(state);
2613  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2614 }
2615 #line 2616 "queryparser/queryparser_internal.cc"
2616  yymsp[0].minor.yy32 = yylhsminor.yy32;
2617  break;
2618  case 38: /* phrase ::= UNBROKEN_WORDS */
2619 #line 2187 "queryparser/queryparser.lemony"
2620 {
2621  yylhsminor.yy32 = Terms::create(state);
2622  yymsp[0].minor.yy0->as_positional_unbroken(yylhsminor.yy32);
2623 }
2624 #line 2625 "queryparser/queryparser_internal.cc"
2625  yymsp[0].minor.yy32 = yylhsminor.yy32;
2626  break;
2627  case 39: /* phrase ::= phrase TERM */
2628  case 42: /* phrased_term ::= phrased_term PHR_TERM */ yytestcase(yyruleno==42);
2629 #line 2192 "queryparser/queryparser.lemony"
2630 {
2631  yymsp[-1].minor.yy32->add_positional_term(yymsp[0].minor.yy0);
2632 }
2633 #line 2634 "queryparser/queryparser_internal.cc"
2634  break;
2635  case 40: /* phrase ::= phrase UNBROKEN_WORDS */
2636 #line 2196 "queryparser/queryparser.lemony"
2637 {
2638  yymsp[0].minor.yy0->as_positional_unbroken(yymsp[-1].minor.yy32);
2639 }
2640 #line 2641 "queryparser/queryparser_internal.cc"
2641  break;
2642  case 41: /* phrased_term ::= TERM PHR_TERM */
2643 #line 2207 "queryparser/queryparser.lemony"
2644 {
2645  yylhsminor.yy32 = Terms::create(state);
2646  yylhsminor.yy32->add_positional_term(yymsp[-1].minor.yy0);
2647  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2648 }
2649 #line 2650 "queryparser/queryparser_internal.cc"
2650  yymsp[-1].minor.yy32 = yylhsminor.yy32;
2651  break;
2652  case 43: /* group ::= TERM GROUP_TERM */
2653 #line 2223 "queryparser/queryparser.lemony"
2654 {
2655  yymsp[-1].minor.yy14 = TermGroup::create(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); /*P-overwrites-T*/
2656 }
2657 #line 2658 "queryparser/queryparser_internal.cc"
2658  break;
2659  case 44: /* group ::= group GROUP_TERM */
2660 #line 2227 "queryparser/queryparser.lemony"
2661 {
2662  yymsp[-1].minor.yy14->add_term(yymsp[0].minor.yy0);
2663 }
2664 #line 2665 "queryparser/queryparser_internal.cc"
2665  break;
2666  case 45: /* group ::= group EMPTY_GROUP_OK */
2667 #line 2231 "queryparser/queryparser.lemony"
2668 {
2669  yymsp[-1].minor.yy14->set_empty_ok();
2670 }
2671 #line 2672 "queryparser/queryparser_internal.cc"
2672  yy_destructor(yypParser,23,&yymsp[0].minor);
2673  break;
2674  case 46: /* near_expr ::= TERM NEAR TERM */
2675  case 48: /* adj_expr ::= TERM ADJ TERM */ yytestcase(yyruleno==48);
2676 #line 2241 "queryparser/queryparser.lemony"
2677 {
2678  yylhsminor.yy32 = Terms::create(state);
2679  yylhsminor.yy32->add_positional_term(yymsp[-2].minor.yy0);
2680  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2681  if (yymsp[-1].minor.yy0) {
2682  yylhsminor.yy32->adjust_window(yymsp[-1].minor.yy0->get_termpos());
2683  delete yymsp[-1].minor.yy0;
2684  }
2685 }
2686 #line 2687 "queryparser/queryparser_internal.cc"
2687  yymsp[-2].minor.yy32 = yylhsminor.yy32;
2688  break;
2689  case 47: /* near_expr ::= near_expr NEAR TERM */
2690  case 49: /* adj_expr ::= adj_expr ADJ TERM */ yytestcase(yyruleno==49);
2691 #line 2251 "queryparser/queryparser.lemony"
2692 {
2693  yymsp[-2].minor.yy32->add_positional_term(yymsp[0].minor.yy0);
2694  if (yymsp[-1].minor.yy0) {
2695  yymsp[-2].minor.yy32->adjust_window(yymsp[-1].minor.yy0->get_termpos());
2696  delete yymsp[-1].minor.yy0;
2697  }
2698 }
2699 #line 2700 "queryparser/queryparser_internal.cc"
2700  break;
2701  default:
2702  /* (50) expr ::= prob_expr (OPTIMIZED OUT) */ Assert(yyruleno!=50);
2703  /* (51) bool_arg ::= expr */ yytestcase(yyruleno==51);
2704  /* (52) prob_expr ::= term (OPTIMIZED OUT) */ Assert(yyruleno!=52);
2705  /* (53) stop_prob ::= prob */ yytestcase(yyruleno==53);
2706  /* (54) stop_term ::= compound_term */ yytestcase(yyruleno==54);
2707  /* (55) term ::= compound_term */ yytestcase(yyruleno==55);
2708  break;
2709 /********** End reduce actions ************************************************/
2710  }
2711  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
2712  yygoto = yyRuleInfo[yyruleno].lhs;
2713  yysize = yyRuleInfo[yyruleno].nrhs;
2714  yyact = yy_find_reduce_action(yymsp[yysize].stateno,static_cast<YYCODETYPE>(yygoto));
2715 
2716  /* There are no SHIFTREDUCE actions on nonterminals because the table
2717  ** generator has simplified them to pure REDUCE actions. */
2718  Assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );
2719 
2720  /* It is not possible for a REDUCE to be followed by an error */
2721  Assert( yyact!=YY_ERROR_ACTION );
2722 
2723  yymsp += yysize+1;
2724  if (yysize) {
2725  yypParser->yystack.resize(yypParser->yystack.size() + yysize+1);
2726  }
2727  yymsp->stateno = static_cast<YYACTIONTYPE>(yyact);
2728  yymsp->major = static_cast<YYCODETYPE>(yygoto);
2729  yyTraceShift(yypParser, yyact, "... then shift");
2730 }
2731 
2732 /*
2733 ** The following code executes when the parse fails
2734 */
2735 #ifndef YYNOERRORRECOVERY
2736 static void yy_parse_failed(
2737  yyParser *yypParser /* The parser */
2738 ){
2740  LOGLINE(QUERYPARSER, "Fail!");
2741  while( yypParser->yystack.size() > 1 ) yy_pop_parser_stack(yypParser);
2742  /* Here code is inserted which will be executed whenever the
2743  ** parser fails */
2744 /************ Begin %parse_failure code ***************************************/
2745 #line 1809 "queryparser/queryparser.lemony"
2746 
2747  // If we've not already set an error message, set a default one.
2748  if (!state->error) state->error = "parse error";
2749 #line 2750 "queryparser/queryparser_internal.cc"
2750 /************ End %parse_failure code *****************************************/
2751  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2752 }
2753 #endif /* YYNOERRORRECOVERY */
2754 
2755 /*
2756 ** The following code executes when a syntax error first occurs.
2757 */
2758 static void yy_syntax_error(
2759  yyParser *yypParser, /* The parser */
2760  int yymajor, /* The major type of the error token */
2761  ParseTOKENTYPE yyminor /* The minor type of the error token */
2762 ){
2764  (void)yymajor;
2765  (void)yyminor;
2766 #define TOKEN yyminor
2767 /************ Begin %syntax_error code ****************************************/
2768 #line 1814 "queryparser/queryparser.lemony"
2769 
2770  yy_parse_failed(yypParser);
2771 #line 2772 "queryparser/queryparser_internal.cc"
2772 /************ End %syntax_error code ******************************************/
2773  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2774 }
2775 
2776 /*
2777 ** The following is executed when the parser accepts
2778 */
2779 static void yy_accept(
2780  yyParser *yypParser /* The parser */
2781 ){
2783  LOGLINE(QUERYPARSER, "Accept!");
2784 #ifndef YYNOERRORRECOVERY
2785  yypParser->yyerrcnt = -1;
2786 #endif
2787  AssertEq( yypParser->yystack.size(), 1 );
2788  /* Here code is inserted which will be executed whenever the
2789  ** parser accepts */
2790 /*********** Begin %parse_accept code *****************************************/
2791 /*********** End %parse_accept code *******************************************/
2792  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2793 }
2794 
2795 /* The main parser program.
2796 ** The first argument is a pointer to a structure obtained from
2797 ** "ParseAlloc" which describes the current state of the parser.
2798 ** The second argument is the major token number. The third is
2799 ** the minor token. The fourth optional argument is whatever the
2800 ** user wants (and specified in the grammar) and is available for
2801 ** use by the action routines.
2802 **
2803 ** Inputs:
2804 ** <ul>
2805 ** <li> A pointer to the parser (an opaque structure.)
2806 ** <li> The major token number.
2807 ** <li> The minor token number.
2808 ** <li> An option argument of a grammar-specified type.
2809 ** </ul>
2810 **
2811 ** Outputs:
2812 ** None.
2813 */
2814 static
2815 void Parse(
2816  yyParser *yypParser, /* The parser */
2817  int yymajor, /* The major token code number */
2818  ParseTOKENTYPE yyminor /* The value for the token */
2819  ParseARG_PDECL /* Optional %extra_argument parameter */
2820 ){
2821  YYMINORTYPE yyminorunion;
2822  unsigned int yyact; /* The parser action. */
2823 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
2824  int yyendofinput; /* True if we are at the end of input */
2825 #endif
2826 #ifdef YYERRORSYMBOL
2827  int yyerrorhit = 0; /* True if yymajor has invoked an error */
2828 #endif
2829 
2830 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
2831  yyendofinput = (yymajor==0);
2832 #endif
2834 
2835 #ifdef XAPIAN_DEBUG_LOG
2836  {
2837  int stateno = yypParser->yystack.back().stateno;
2838  if( stateno < YY_MIN_REDUCE ){
2839  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
2840  "'," << (yyminor ? yyminor->name : "<<null>>") <<
2841  "in state " << stateno);
2842  }else{
2843  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
2844  "'," << (yyminor ? yyminor->name : "<<null>>") <<
2845  "with pending reduce " << stateno-YY_MIN_REDUCE);
2846  }
2847  }
2848 #endif
2849 
2850  do{
2851  yyact = yy_find_shift_action(yypParser,static_cast<YYCODETYPE>(yymajor));
2852  if( yyact >= YY_MIN_REDUCE ){
2853  yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,yyminor);
2854  }else if( yyact <= YY_MAX_SHIFTREDUCE ){
2855  yy_shift(yypParser,yyact,yymajor,yyminor);
2856 #ifndef YYNOERRORRECOVERY
2857  yypParser->yyerrcnt--;
2858 #endif
2859  yymajor = YYNOCODE;
2860  }else if( yyact==YY_ACCEPT_ACTION ){
2861  yypParser->yystack.pop_back();
2862  yy_accept(yypParser);
2863  return;
2864  }else{
2865  Assert( yyact == YY_ERROR_ACTION );
2866  yyminorunion.yy0 = yyminor;
2867 #ifdef YYERRORSYMBOL
2868  int yymx;
2869 #endif
2870  LOGLINE(QUERYPARSER, "Syntax Error!");
2871 #ifdef YYERRORSYMBOL
2872  /* A syntax error has occurred.
2873  ** The response to an error depends upon whether or not the
2874  ** grammar defines an error token "ERROR".
2875  **
2876  ** This is what we do if the grammar does define ERROR:
2877  **
2878  ** * Call the %syntax_error function.
2879  **
2880  ** * Begin popping the stack until we enter a state where
2881  ** it is legal to shift the error symbol, then shift
2882  ** the error symbol.
2883  **
2884  ** * Set the error count to three.
2885  **
2886  ** * Begin accepting and shifting new tokens. No new error
2887  ** processing will occur until three tokens have been
2888  ** shifted successfully.
2889  **
2890  */
2891  if( yypParser->yyerrcnt<0 ){
2892  yy_syntax_error(yypParser,yymajor,yyminor);
2893  }
2894  yymx = yypParser->yystack.back().major;
2895  if( yymx==YYERRORSYMBOL || yyerrorhit ){
2896  LOGLINE(QUERYPARSER, "Discard input token " << ParseTokenName(yymajor));
2897  yy_destructor(yypParser, static_cast<YYCODETYPE>(yymajor), &yyminorunion);
2898  yymajor = YYNOCODE;
2899  }else{
2900  while( !yypParser->yystack.empty()
2901  && yymx != YYERRORSYMBOL
2902  && (yyact = yy_find_reduce_action(
2903  yypParser->yystack.back().stateno,
2904  YYERRORSYMBOL)) >= YY_MIN_REDUCE
2905  ){
2906  yy_pop_parser_stack(yypParser);
2907  }
2908  if( yypParser->yystack.empty() || yymajor==0 ){
2909  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2910  yy_parse_failed(yypParser);
2911 #ifndef YYNOERRORRECOVERY
2912  yypParser->yyerrcnt = -1;
2913 #endif
2914  yymajor = YYNOCODE;
2915  }else if( yymx!=YYERRORSYMBOL ){
2916  yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
2917  }
2918  }
2919  yypParser->yyerrcnt = 3;
2920  yyerrorhit = 1;
2921 #elif defined(YYNOERRORRECOVERY)
2922  /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
2923  ** do any kind of error recovery. Instead, simply invoke the syntax
2924  ** error routine and continue going as if nothing had happened.
2925  **
2926  ** Applications can set this macro (for example inside %include) if
2927  ** they intend to abandon the parse upon the first syntax error seen.
2928  */
2929  yy_syntax_error(yypParser,yymajor, yyminor);
2930  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2931  yymajor = YYNOCODE;
2932 
2933 #else /* YYERRORSYMBOL is not defined */
2934  /* This is what we do if the grammar does not define ERROR:
2935  **
2936  ** * Report an error message, and throw away the input token.
2937  **
2938  ** * If the input token is $, then fail the parse.
2939  **
2940  ** As before, subsequent error messages are suppressed until
2941  ** three input tokens have been successfully shifted.
2942  */
2943  if( yypParser->yyerrcnt<=0 ){
2944  yy_syntax_error(yypParser,yymajor, yyminor);
2945  }
2946  yypParser->yyerrcnt = 3;
2947  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2948  if( yyendofinput ){
2949  yy_parse_failed(yypParser);
2950 #ifndef YYNOERRORRECOVERY
2951  yypParser->yyerrcnt = -1;
2952 #endif
2953  }
2954  yymajor = YYNOCODE;
2955 #endif
2956  }
2957  }while( yymajor!=YYNOCODE && yypParser->yystack.size() > 1 );
2958 #ifdef XAPIAN_DEBUG_LOG
2959  {
2960  int i;
2961  LOGLINE(QUERYPARSER, "Return. Stack=");
2962  for(i=1; i<=(int)yypParser->yystack.size(); i++)
2963  LOGLINE(QUERYPARSER, yyTokenName[yypParser->yystack[i].major]);
2964  }
2965 #endif
2966  return;
2967 }
2968 
2969 // Select C++ syntax highlighting in vim editor: vim: syntax=cpp
2970 #line 799 "queryparser/queryparser.lemony"
2971 
2972 
2973 Query
2974 QueryParser::Internal::parse_query(const string &qs, unsigned flags,
2975  const string &default_prefix)
2976 {
2977  bool try_word_break = (flags & FLAG_NGRAMS) || is_ngram_enabled();
2978 
2979  // Set ranges if we may have to handle ranges in the query.
2980  bool ranges = !rangeprocs.empty() && (qs.find("..") != string::npos);
2981 
2982  termpos term_pos = 1;
2983  Utf8Iterator it(qs), end;
2984 
2985  State state(this, flags);
2986 
2987  // To successfully apply more than one spelling correction to a query
2988  // string, we must keep track of the offset due to previous corrections.
2989  int correction_offset = 0;
2990  corrected_query.resize(0);
2991 
2992  // Stack of prefixes, used for phrases and subexpressions.
2993  list<const FieldInfo *> prefix_stack;
2994 
2995  // If default_prefix is specified, use it. Otherwise, use any list
2996  // that has been set for the empty prefix.
2997  const FieldInfo def_pfx(NON_BOOLEAN, default_prefix);
2998  {
2999  const FieldInfo * default_field_info = &def_pfx;
3000  if (default_prefix.empty()) {
3001  auto f = field_map.find(string());
3002  if (f != field_map.end()) default_field_info = &(f->second);
3003  }
3004 
3005  // We always have the current prefix on the top of the stack.
3006  prefix_stack.push_back(default_field_info);
3007  }
3008 
3009  yyParser parser;
3010 
3011  unsigned newprev = ' ';
3012 main_lex_loop:
3013  enum {
3014  DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP,
3015  IN_GROUP2, EXPLICIT_SYNONYM
3016  } mode = DEFAULT;
3017  while (it != end && !state.error) {
3018  bool last_was_operator = false;
3019  bool last_was_operator_needing_term = false;
3020  if (mode == EXPLICIT_SYNONYM) mode = DEFAULT;
3021  if (false) {
3022 just_had_operator:
3023  if (it == end) break;
3024  mode = DEFAULT;
3025  last_was_operator_needing_term = false;
3026  last_was_operator = true;
3027  }
3028  if (false) {
3029 just_had_operator_needing_term:
3030  last_was_operator_needing_term = true;
3031  last_was_operator = true;
3032  }
3033  if (mode == IN_PHRASED_TERM) mode = DEFAULT;
3034  if (is_whitespace(*it)) {
3035  newprev = ' ';
3036  ++it;
3037  it = find_if(it, end, is_not_whitespace);
3038  if (it == end) break;
3039  }
3040 
3041  if (ranges &&
3042  (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2)) {
3043  // Scan forward to see if this could be the "start of range"
3044  // token. Sadly this has O(n²) tendencies, though at least
3045  // "n" is the number of words in a query which is likely to
3046  // remain fairly small. FIXME: can we tokenise more elegantly?
3047  Utf8Iterator it_initial = it;
3048  Utf8Iterator p = it;
3049  unsigned ch = 0;
3050  while (p != end) {
3051  if (ch == '.' && *p == '.') {
3052  string a;
3053  while (it != p) {
3054  Unicode::append_utf8(a, *it++);
3055  }
3056  // Trim off the trailing ".".
3057  a.resize(a.size() - 1);
3058  ++p;
3059  // Either end of the range can be empty (for an open-ended
3060  // range) but both can't be empty.
3061  if (!a.empty() || (p != end && *p > ' ' && *p != ')')) {
3062  string b;
3063  // Allow any character except whitespace and ')' in the
3064  // upper bound.
3065  while (p != end && *p > ' ' && *p != ')') {
3066  Unicode::append_utf8(b, *p++);
3067  }
3068  Term * range = state.range(a, b);
3069  if (!range) {
3070  state.error = "Unknown range operation";
3071  if (a.find(':', 1) == string::npos) {
3072  goto done;
3073  }
3074  // Might be a boolean filter with ".." in. Leave
3075  // state.error in case it isn't.
3076  it = it_initial;
3077  break;
3078  }
3079  Parse(&parser, RANGE, range, &state);
3080  }
3081  it = p;
3082  goto main_lex_loop;
3083  }
3084  ch = *p;
3085  // Allow any character except whitespace and '(' in the lower
3086  // bound.
3087  if (ch <= ' ' || ch == '(') break;
3088  ++p;
3089  }
3090  }
3091 
3092  if (!is_wordchar(*it)) {
3093  unsigned prev = newprev;
3094  unsigned ch = *it++;
3095  newprev = ch;
3096  // Drop out of IN_GROUP mode.
3097  if (mode == IN_GROUP || mode == IN_GROUP2)
3098  mode = DEFAULT;
3099  switch (ch) {
3100  case '"':
3101  case 0x201c: // Left curly double quote.
3102  case 0x201d: // Right curly double quote.
3103  // Quoted phrase.
3104  if (mode == DEFAULT) {
3105  // Skip whitespace.
3106  it = find_if(it, end, is_not_whitespace);
3107  if (it == end) {
3108  // Ignore an unmatched " at the end of the query to
3109  // avoid generating an empty pair of QUOTEs which will
3110  // cause a parse error.
3111  goto done;
3112  }
3113  if (is_double_quote(*it)) {
3114  // Ignore empty "" (but only if we're not already
3115  // IN_QUOTES as we don't merge two adjacent quoted
3116  // phrases!)
3117  newprev = *it++;
3118  break;
3119  }
3120  }
3121  if (flags & QueryParser::FLAG_PHRASE) {
3122  if (ch == '"' && it != end && *it == '"') {
3123  ++it;
3124  // Handle "" inside a quoted phrase as an escaped " for
3125  // consistency with quoted boolean terms.
3126  break;
3127  }
3128  Parse(&parser, QUOTE, NULL, &state);
3129  if (mode == DEFAULT) {
3130  mode = IN_QUOTES;
3131  } else {
3132  // Remove the prefix we pushed for this phrase.
3133  if (mode == IN_PREFIXED_QUOTES)
3134  prefix_stack.pop_back();
3135  mode = DEFAULT;
3136  }
3137  }
3138  break;
3139 
3140  case '+': case '-': // Loved or hated term/phrase/subexpression.
3141  // Ignore + or - at the end of the query string.
3142  if (it == end) goto done;
3143  if (prev > ' ' && prev != '(') {
3144  // Or if not after whitespace or an open bracket.
3145  break;
3146  }
3147  if (is_whitespace(*it) || *it == '+' || *it == '-') {
3148  // Ignore + or - followed by a space, or further + or -.
3149  // Postfix + (such as in C++ and H+) is handled as part of
3150  // the term lexing code in parse_term().
3151  newprev = *it++;
3152  break;
3153  }
3154  if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
3155  int token;
3156  if (ch == '+') {
3157  token = LOVE;
3158  } else if (last_was_operator) {
3159  token = HATE_AFTER_AND;
3160  } else {
3161  token = HATE;
3162  }
3163  Parse(&parser, token, NULL, &state);
3164  goto just_had_operator_needing_term;
3165  }
3166  // Need to prevent the term after a LOVE or HATE starting a
3167  // term group...
3168  break;
3169 
3170  case '(': // Bracketed subexpression.
3171  // Skip whitespace.
3172  it = find_if(it, end, is_not_whitespace);
3173  // Ignore ( at the end of the query string.
3174  if (it == end) goto done;
3175  if (prev > ' ' && strchr("()+-", prev) == NULL) {
3176  // Or if not after whitespace or a bracket or '+' or '-'.
3177  break;
3178  }
3179  if (*it == ')') {
3180  // Ignore empty ().
3181  newprev = *it++;
3182  break;
3183  }
3184  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3185  prefix_stack.push_back(prefix_stack.back());
3186  Parse(&parser, BRA, NULL, &state);
3187  }
3188  break;
3189 
3190  case ')': // End of bracketed subexpression.
3191  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3192  // Remove the prefix we pushed for the corresponding BRA.
3193  // If brackets are unmatched, it's a syntax error, but
3194  // that's no excuse to SEGV!
3195  if (prefix_stack.size() > 1) prefix_stack.pop_back();
3196  Parse(&parser, KET, NULL, &state);
3197  }
3198  break;
3199 
3200  case '~': // Synonym expansion.
3201  // Ignore at the end of the query string.
3202  if (it == end) goto done;
3203  if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
3204  if (prev > ' ' && strchr("+-(", prev) == NULL) {
3205  // Or if not after whitespace, +, -, or an open bracket.
3206  break;
3207  }
3208  if (!is_wordchar(*it)) {
3209  // Ignore if not followed by a word character.
3210  break;
3211  }
3212  Parse(&parser, SYNONYM, NULL, &state);
3213  mode = EXPLICIT_SYNONYM;
3214  goto just_had_operator_needing_term;
3215  }
3216  break;
3217  }
3218  // Skip any other characters.
3219  continue;
3220  }
3221 
3222  Assert(is_wordchar(*it));
3223 
3224  size_t term_start_index = it.raw() - qs.data();
3225 
3226  newprev = 'A'; // Any letter will do...
3227 
3228  // A term, a prefix, or a boolean operator.
3229  const FieldInfo * field_info = NULL;
3230  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2 || mode == EXPLICIT_SYNONYM) &&
3231  !field_map.empty()) {
3232  // Check for a fieldname prefix (e.g. title:historical).
3233  Utf8Iterator p = find_if(it, end, is_not_wordchar);
3234  if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
3235  string field;
3236  p = it;
3237  while (*p != ':')
3238  Unicode::append_utf8(field, *p++);
3239  map<string, FieldInfo>::const_iterator f;
3240  f = field_map.find(field);
3241  if (f != field_map.end()) {
3242  // Special handling for prefixed fields, depending on the
3243  // type of the prefix.
3244  unsigned ch = *++p;
3245  field_info = &(f->second);
3246 
3247  if (field_info->type != NON_BOOLEAN) {
3248  // Drop out of IN_GROUP if we're in it.
3249  if (mode == IN_GROUP || mode == IN_GROUP2)
3250  mode = DEFAULT;
3251  it = p;
3252  string name;
3253  if (it != end && is_double_quote(*it)) {
3254  // Quoted boolean term (can contain any character).
3255  bool fancy = (*it != '"');
3256  ++it;
3257  while (it != end) {
3258  if (*it == '"') {
3259  // Interpret "" as an escaped ".
3260  if (++it == end || *it != '"')
3261  break;
3262  } else if (fancy && is_double_quote(*it)) {
3263  // If the opening quote was ASCII, then the
3264  // closing one must be too - otherwise
3265  // the user can't protect non-ASCII double
3266  // quote characters by quoting or escaping.
3267  ++it;
3268  break;
3269  }
3270  Unicode::append_utf8(name, *it++);
3271  }
3272  } else {
3273  // Can't boolean filter prefix a subexpression, so
3274  // just use anything following the prefix until the
3275  // next space or ')' as part of the boolean filter
3276  // term.
3277  while (it != end && *it > ' ' && *it != ')')
3278  Unicode::append_utf8(name, *it++);
3279  }
3280  // Build the unstemmed form in field.
3281  field += ':';
3282  field += name;
3283  // Clear any pending range error.
3284  state.error = NULL;
3285  Term * token = new Term(&state, name, field_info, field);
3286  Parse(&parser, BOOLEAN_FILTER, token, &state);
3287  continue;
3288  }
3289 
3290  if ((flags & FLAG_PHRASE) && is_double_quote(ch)) {
3291  // Prefixed phrase, e.g.: subject:"space flight"
3292  mode = IN_PREFIXED_QUOTES;
3293  Parse(&parser, QUOTE, NULL, &state);
3294  it = p;
3295  newprev = ch;
3296  ++it;
3297  prefix_stack.push_back(field_info);
3298  continue;
3299  }
3300 
3301  if (ch == '(' && (flags & FLAG_BOOLEAN)) {
3302  // Prefixed subexpression, e.g.: title:(fast NEAR food)
3303  mode = DEFAULT;
3304  Parse(&parser, BRA, NULL, &state);
3305  it = p;
3306  newprev = ch;
3307  ++it;
3308  prefix_stack.push_back(field_info);
3309  continue;
3310  }
3311 
3312  if (ch != ':') {
3313  // Allow 'path:/usr/local' but not 'foo::bar::baz'.
3314  while (is_phrase_generator(ch)) {
3315  if (++p == end)
3316  goto not_prefix;
3317  ch = *p;
3318  }
3319  }
3320 
3321  if (is_wordchar(ch)) {
3322  // Prefixed term.
3323  it = p;
3324  } else {
3325 not_prefix:
3326  // It looks like a prefix but isn't, so parse it as
3327  // text instead.
3328  field_info = NULL;
3329  }
3330  }
3331  }
3332  }
3333 
3334 phrased_term:
3335  bool was_acronym;
3336  bool needs_word_break = false;
3337  string term = parse_term(it, end, try_word_break,
3338  needs_word_break, was_acronym);
3339 
3340  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) &&
3341  (flags & FLAG_BOOLEAN) &&
3342  // Don't want to interpret A.N.D. as an AND operator.
3343  !was_acronym &&
3344  !field_info &&
3345  term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {
3346  // Boolean operators.
3347  string op = term;
3348  if (flags & FLAG_BOOLEAN_ANY_CASE) {
3349  for (string::iterator i = op.begin(); i != op.end(); ++i) {
3350  *i = C_toupper(*i);
3351  }
3352  }
3353  if (op.size() == 3) {
3354  if (op == "AND") {
3355  Parse(&parser, AND, NULL, &state);
3356  goto just_had_operator;
3357  }
3358  if (op == "NOT") {
3359  Parse(&parser, NOT, NULL, &state);
3360  goto just_had_operator;
3361  }
3362  if (op == "XOR") {
3363  Parse(&parser, XOR, NULL, &state);
3364  goto just_had_operator;
3365  }
3366  if (op == "ADJ") {
3367  if (it != end && *it == '/') {
3368  size_t width = 0;
3369  Utf8Iterator p = it;
3370  while (++p != end && U_isdigit(*p)) {
3371  width = (width * 10) + (*p - '0');
3372  }
3373  if (width && (p == end || is_whitespace(*p))) {
3374  it = p;
3375  Parse(&parser, ADJ, new Term(width), &state);
3376  goto just_had_operator;
3377  }
3378  } else {
3379  Parse(&parser, ADJ, NULL, &state);
3380  goto just_had_operator;
3381  }
3382  }
3383  } else if (op.size() == 2) {
3384  if (op == "OR") {
3385  Parse(&parser, OR, NULL, &state);
3386  goto just_had_operator;
3387  }
3388  } else if (op.size() == 4) {
3389  if (op == "NEAR") {
3390  if (it != end && *it == '/') {
3391  size_t width = 0;
3392  Utf8Iterator p = it;
3393  while (++p != end && U_isdigit(*p)) {
3394  width = (width * 10) + (*p - '0');
3395  }
3396  if (width && (p == end || is_whitespace(*p))) {
3397  it = p;
3398  Parse(&parser, NEAR, new Term(width), &state);
3399  goto just_had_operator;
3400  }
3401  } else {
3402  Parse(&parser, NEAR, NULL, &state);
3403  goto just_had_operator;
3404  }
3405  }
3406  }
3407  }
3408 
3409  // If no prefix is set, use the default one.
3410  if (!field_info) field_info = prefix_stack.back();
3411 
3412  Assert(field_info->type == NON_BOOLEAN);
3413 
3414  {
3415  string unstemmed_term(term);
3416  term = Unicode::tolower(term);
3417 
3418  // Reuse stem_strategy - STEM_SOME here means "stem terms except
3419  // when used with positional operators".
3420  stem_strategy stem_term = stem_action;
3421  if (stem_term != STEM_NONE) {
3422  if (stemmer.is_none()) {
3423  stem_term = STEM_NONE;
3424  } else if (stem_term == STEM_SOME ||
3425  stem_term == STEM_SOME_FULL_POS) {
3426  if (!should_stem(unstemmed_term) ||
3427  (it != end && is_stem_preventer(*it))) {
3428  // Don't stem this particular term.
3429  stem_term = STEM_NONE;
3430  }
3431  }
3432  }
3433 
3434  Term * term_obj = new Term(&state, term, field_info,
3435  unstemmed_term, stem_term, term_pos++);
3436 
3437  if (needs_word_break) {
3438  Parse(&parser, UNBROKEN_WORDS, term_obj, &state);
3439  // Drop out of IN_GROUP mode.
3440  if (mode == IN_GROUP || mode == IN_GROUP2)
3441  mode = DEFAULT;
3442  if (it == end) break;
3443  continue;
3444  }
3445 
3446  if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3447  if (it != end) {
3448  if ((flags & FLAG_WILDCARD) && *it == '*') {
3449  Utf8Iterator p(it);
3450  ++p;
3451  if (p == end || !is_wordchar(*p)) {
3452  it = p;
3453  if (mode == IN_GROUP || mode == IN_GROUP2) {
3454  // Drop out of IN_GROUP and flag that the group
3455  // can be empty if all members are stopwords.
3456  if (mode == IN_GROUP2)
3457  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3458  mode = DEFAULT;
3459  }
3460  // Wildcard at end of term (also known as
3461  // "right truncation").
3462  Parse(&parser, WILD_TERM, term_obj, &state);
3463  continue;
3464  }
3465  }
3466  } else {
3467  if (flags & FLAG_PARTIAL) {
3468  if (mode == IN_GROUP || mode == IN_GROUP2) {
3469  // Drop out of IN_GROUP and flag that the group
3470  // can be empty if all members are stopwords.
3471  if (mode == IN_GROUP2)
3472  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3473  mode = DEFAULT;
3474  }
3475  // Final term of a partial match query, with no
3476  // following characters - treat as a wildcard.
3477  Parse(&parser, PARTIAL_TERM, term_obj, &state);
3478  continue;
3479  }
3480  }
3481  }
3482 
3483  // Check spelling, if we're a normal term, and any of the prefixes
3484  // are empty.
3485  if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
3486  const auto& prefixes = field_info->prefixes;
3487  for (const string& prefix : prefixes) {
3488  if (!prefix.empty())
3489  continue;
3490  const string & suggest = db.get_spelling_suggestion(term);
3491  if (!suggest.empty()) {
3492  if (corrected_query.empty()) corrected_query = qs;
3493  size_t term_end_index = it.raw() - qs.data();
3494  size_t n = term_end_index - term_start_index;
3495  size_t pos = term_start_index + correction_offset;
3496  corrected_query.replace(pos, n, suggest);
3497  correction_offset += suggest.size();
3498  correction_offset -= n;
3499  }
3500  break;
3501  }
3502  }
3503 
3504  if (mode == IN_PHRASED_TERM) {
3505  Parse(&parser, PHR_TERM, term_obj, &state);
3506  } else {
3507  // See if the next token will be PHR_TERM - if so, this one
3508  // needs to be TERM not GROUP_TERM.
3509  if ((mode == IN_GROUP || mode == IN_GROUP2) &&
3510  is_phrase_generator(*it)) {
3511  // FIXME: can we clean this up?
3512  Utf8Iterator p = it;
3513  do {
3514  ++p;
3515  } while (p != end && is_phrase_generator(*p));
3516  // Don't generate a phrase unless the phrase generators are
3517  // immediately followed by another term.
3518  if (p != end && is_wordchar(*p)) {
3519  mode = DEFAULT;
3520  }
3521  }
3522 
3523  int token = TERM;
3524  if (mode == IN_GROUP || mode == IN_GROUP2) {
3525  mode = IN_GROUP2;
3526  token = GROUP_TERM;
3527  }
3528  Parse(&parser, token, term_obj, &state);
3529  if (token == TERM && mode != DEFAULT)
3530  continue;
3531  }
3532  }
3533 
3534  if (it == end) break;
3535 
3536  if (is_phrase_generator(*it)) {
3537  // Skip multiple phrase generators.
3538  do {
3539  ++it;
3540  } while (it != end && is_phrase_generator(*it));
3541  // Don't generate a phrase unless the phrase generators are
3542  // immediately followed by another term.
3543  if (it != end && is_wordchar(*it)) {
3544  mode = IN_PHRASED_TERM;
3545  term_start_index = it.raw() - qs.data();
3546  goto phrased_term;
3547  }
3548  } else if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3549  int old_mode = mode;
3550  mode = DEFAULT;
3551  if (!last_was_operator_needing_term && is_whitespace(*it)) {
3552  newprev = ' ';
3553  // Skip multiple whitespace.
3554  do {
3555  ++it;
3556  } while (it != end && is_whitespace(*it));
3557  // Don't generate a group unless the terms are only separated
3558  // by whitespace.
3559  if (it != end && is_wordchar(*it)) {
3560  if (old_mode == IN_GROUP || old_mode == IN_GROUP2) {
3561  mode = IN_GROUP2;
3562  } else {
3563  mode = IN_GROUP;
3564  }
3565  }
3566  }
3567  }
3568  }
3569 done:
3570  if (!state.error) {
3571  // Implicitly close any unclosed quotes.
3572  if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
3573  Parse(&parser, QUOTE, NULL, &state);
3574 
3575  // Implicitly close all unclosed brackets.
3576  while (prefix_stack.size() > 1) {
3577  Parse(&parser, KET, NULL, &state);
3578  prefix_stack.pop_back();
3579  }
3580  Parse(&parser, 0, NULL, &state);
3581  }
3582 
3583  errmsg = state.error;
3584  return state.query;
3585 }
3586 
3587 #line 3588 "queryparser/queryparser_internal.cc"
static void yy_pop_parser_stack(yyParser *pParser)
Unicode and UTF-8 related classes and functions.
bool is_none() const
Return true if this is a no-op stemmer.
Definition: stem.h:166
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
static unsigned int yy_find_shift_action(yyParser *pParser, YYCODETYPE iLookAhead)
void append_utf8(std::string &s, unsigned ch)
Append the UTF-8 representation of a single Unicode character to a std::string.
Definition: unicode.h:332
#define Assert(COND)
Definition: omassert.h:122
ParseARG_SDECL vector< yyStackEntry > yystack
#define XOR
bool is_stopword(const Term *term) const
bool operator==(const SynonymIterator &o) const
Letter, modifier (Lm)
Definition: unicode.h:225
YYCODETYPE lhs
bool U_isalpha(unsigned ch)
size_t stoplist_size() const
Query get_query() const
const char * raw() const
Return the raw const char* pointer for the current position.
Definition: unicode.h:54
#define AssertEq(A, B)
Definition: omassert.h:124
unsigned tolower(unsigned ch)
Convert a Unicode character to lowercase.
Definition: unicode.h:376
signed char nrhs
#define YY_MIN_REDUCE
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount_diff difference_type
Database get_database() const
Letter, other (Lo)
Definition: unicode.h:226
ProbQuery(Query *query_)
Xapian::Query internals.
#define YY_SHIFT_MAX
#define true
Definition: header.h:8
Xapian::TermIterator i
#define BOOLEAN_FILTER
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
bool is_digit(unsigned ch)
static void Parse(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor ParseARG_PDECL)
QueryParser::Internal * qpi
void need_positions()
bool U_isupper(unsigned ch)
#define YY_SHIFT_MIN
#define YYACTIONTYPE
void stoplist_resize(size_t s)
op
Query operators.
Definition: query.h:78
bool is_currency(unsigned ch)
Test if a given Unicode character is a currency symbol.
Definition: unicode.h:371
Number, decimal digit (Nd)
Definition: unicode.h:230
#define ParseARG_SDECL
void append_filter(const string &grouping, const Query &qnew)
#define YY_REDUCE_COUNT
bool is_unbroken_script(unsigned p)
Definition: word-breaker.cc:71
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
Query * as_group(State *state) const
Convert to a Xapian::Query * using default_op.
void append_filter_range(const string &grouping, const Query &range)
#define GROUP_TERM
STL namespace.
Xapian::TermIterator synonyms_end(const std::string &) const
Corresponding end iterator to synonyms_begin(term).
Definition: database.h:447
Convert types to std::string.
#define AND
Query get_query_with_auto_synonyms() const
const char * error
const Xapian::Query operator*() const
Query opwindow_subq(Query::op op, const vector< Query > &v, Xapian::termcount w) const
void add_filter_range(const string &grouping, const Query &range)
void add_positional_term(Term *term)
Add an unstemmed Term object to this Terms object.
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:49
#define ParseARG_STORE
Xapian::Query & reference
#define false
Definition: header.h:9
static Xapian::Stem stemmer
Definition: stemtest.cc:41
Query * as_partial_query(State *state_) const
Build a query for a term at the very end of the query string when FLAG_PARTIAL is in use...
The non-lemon-generated parts of the QueryParser class.
Query as_range_query() const
Range query.
#define PHR_TERM
#define yytestcase(X)
#define NOT
Query * as_adj_query() const
Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
Xapian::Query * pointer
std::input_iterator_tag iterator_category
#define ADJ
Iterator returning unigrams and bigrams.
Definition: word-breaker.h:52
char C_toupper(char ch)
Definition: stringutils.h:226
bool U_isdigit(unsigned ch)
#define HATE_AFTER_AND
Letter, lowercase (Ll)
Definition: unicode.h:223
Term * range(const string &a, const string &b)
Query get_query_with_synonyms() const
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
static void yy_parse_failed(yyParser *)
string unstemmed
#define RANGE
Term(const string &name_, const FieldInfo *field_info_)
void add_term(Term *term)
Add a Term object to this TermGroup object.
static void ParseFinalize(yyParser *pParser)
#define YY_MAX_SHIFTREDUCE
Term(const string &name_)
#define ParseARG_PDECL
Letter, titlecase (Lt)
Definition: unicode.h:224
Information about how to handle a field prefix in the query string.
Query * as_near_query() const
Convert to a Xapian::Query * using OP_NEAR.
const FieldInfo * field_info
#define UNBROKEN_WORDS
static void yy_shift(yyParser *yypParser, int yyNewState, int yyMajor, ParseTOKENTYPE yyMinor)
Base class for field processors.
Definition: queryparser.h:729
Query * as_wildcarded_query(State *state) const
struct yyParser yyParser
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
#define YYCODETYPE
bool is_double_quote(unsigned ch)
SynonymIterator(const Xapian::TermIterator &i_, Xapian::termpos pos_=0, const Xapian::Query *first_=NULL)
Terms(bool no_pos)
#define yyTraceShift(X, Y, Z)
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:943
#define YY_SHIFT_COUNT
static void ParseInit(yyParser *pParser)
Match only documents where a value slot is >= a given value.
Definition: query.h:223
unsigned flags
Xapian::TermIterator synonym_keys_begin(const std::string &prefix=std::string()) const
An iterator which returns all terms which have synonyms.
Definition: omdatabase.cc:740
#define QUOTE
static const YYACTIONTYPE yy_default[]
const unsigned UNICODE_IGNORE
Value representing "ignore this" when returned by check_infix() or check_infix_digit().
#define NEAR
size_t window
Window size.
bool is_phrase_generator(unsigned ch)
#define SYNONYM
const vector< string > * prefixes
The list of prefixes of the terms added.
Parser State shared between the lexer and the parser.
Match only documents where a value slot is within a given range.
Definition: query.h:158
#define WILD_TERM
Xapian::TermIterator synonym_keys_end(const std::string &=std::string()) const
Corresponding end iterator to synonym_keys_begin(prefix).
Definition: database.h:459
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Xapian::Query value_type
vector< string > prefixes
Field prefix strings.
Match only documents where a value slot is <= a given value.
Definition: query.h:231
Term(const string &name_, termpos pos_)
TermGroup(Term *t1, Term *t2)
void add_to_unstem(const string &term, const string &unstemmed)
vector< Term * > terms
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
Construct an invalid query.
Definition: query.h:263
Query * as_unbroken_query() const
Build a query for a string of words without explicit word breaks.
static int yy_find_reduce_action(int stateno, YYCODETYPE iLookAhead)
unsigned check_infix_digit(unsigned ch)
#define YY_MAX_SHIFT
Query merge_filters() const
bool is_stem_preventer(unsigned ch)
Term(const Xapian::Query &q, const string &grouping)
void add_filter(const string &grouping, const Query &q)
Some terms which form a positional sub-query.
Xapian::TermIterator synonyms_begin(const std::string &term) const
An iterator which returns all the synonyms for a given term.
Definition: omdatabase.cc:722
#define YY_ERROR_ACTION
bool is_positional(Xapian::Query::op op)
A group of terms separated only by whitespace.
#define LOVE
Handle text without explicit word breaks.
bool operator!=(const SynonymIterator &o) const
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
static void yy_syntax_error(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor)
#define YY_ACTTAB_COUNT
bool should_stem(const string &term)
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:38
bool empty_ok
Controls how to handle a group where all terms are stopwords.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
int get_max_partial_type() const
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:79
SynonymIterator & operator++()
#define YYNTOKEN
yyStackEntry(YYACTIONTYPE stateno_, YYCODETYPE major_, ParseTOKENTYPE minor_)
bool is_wordchar(unsigned ch)
Test if a given Unicode character is "word character".
Definition: unicode.h:343
bool prefix_needs_colon(const string &prefix, unsigned ch)
#define TERM
char name[9]
Definition: dbcheck.cc:55
#define HATE
Term(State *state_, const string &name_, const FieldInfo *field_info_, const string &unstemmed_, QueryParser::stem_strategy stem_=QueryParser::STEM_NONE, termpos pos_=0)
int get_max_wildcard_type() const
Match only documents where all subqueries match near each other.
Definition: query.h:140
vector< Term * > terms
void add_to_stoplist(const Term *term)
string get_grouping() const
#define KET
void adjust_window(size_t alternative_window)
map< string, Query > filter
Value returned by get_type() for a term.
Definition: query.h:266
#define PARTIAL_TERM
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:524
bool is_suffix(unsigned ch)
#define YYNSTATE
static void yy_reduce(yyParser *yypParser, unsigned int yyruleno, int yyLookahead, ParseTOKENTYPE yyLookaheadToken)
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
Xapian::termcount get_max_wildcard_expansion() const
Various handy helpers which std::string really should provide.
bool is_whitespace(unsigned ch)
Test if a given Unicode character is a whitespace character.
Definition: unicode.h:361
#define YYSTACKDEPTH
#define EMPTY_GROUP_OK
Xapian::termcount get_max_partial_expansion() const
void get_unbroken(Xapian::Utf8Iterator &it)
Definition: word-breaker.cc:86
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
bool is_not_wordchar(unsigned ch)
op get_type() const
Get the type of the top level of the query.
Definition: query.cc:212
category get_category(int info)
Definition: unicode.h:271
Xapian::valueno get_slot() const
static const YYCODETYPE yy_lookahead[]
void as_positional_unbroken(Terms *terms) const
Handle text without explicit word breaks in a positional context.
Definition: header.h:151
multimap< string, string > unstem
const Xapian::Query * first
static void add_to_query(Query *&q, Query::op op, Query *term)
QueryParser::stem_strategy stem
static const unsigned short int yy_shift_ofst[]
Various assertion macros.
ParseTOKENTYPE yy0
bool uniform_prefixes
Keep track of whether the terms added all have the same list of prefixes.
#define OR
#define LOGLINE(a, b)
Definition: debuglog.h:494
bool is_not_whitespace(unsigned ch)
static const YYACTIONTYPE yy_action[]
#define YY_MIN_SHIFTREDUCE
bool is_ngram_enabled()
Should we use the n-gram code?
Definition: word-breaker.cc:41
Class representing a query.
Definition: query.h:46
#define BRA
void set_empty_ok()
Set the empty_ok flag.
termpos get_termpos() const
#define ParseARG_FETCH
Query::op default_op() const
static TermGroup * create(Term *t1, Term *t2)
Factory function - ensures heap allocation.
string make_term(const string &prefix) const
static const struct @13 yyRuleInfo[]
static Terms * create(State *state)
Factory function - ensures heap allocation.
unsigned check_infix(unsigned ch)
Xapian::Internal::opt_intrusive_ptr< const Stopper > stopper
#define YY_ACCEPT_ACTION
Class used to pass information about a token from lexer to parser.
#define YYNOCODE
static void yy_accept(yyParser *)
static void yy_destructor(yyParser *yypParser, YYCODETYPE yymajor, YYMINORTYPE *yypminor)
Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const
Convert to a query using the given operator and window size.
string stem_term(const string &term)
Debug logging macros.
Query * as_phrase_query() const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
Query::op effective_default_op
#define VET_BOOL_ARGS(A, B, OP_TXT)
State(QueryParser::Internal *qpi_, unsigned flags_)
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
static const short yy_reduce_ofst[]
Term(termpos pos_)
filter_type type
The type of this field.
const Stopper * get_stopper() const
#define ParseTOKENTYPE