xapian-core  1.4.22
queryparser_internal.cc
Go to the documentation of this file.
1 /*
2 ** 2000-05-29
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** Driver template for the LEMON parser generator.
13 **
14 ** Synced with upstream:
15 ** https://www.sqlite.org/src/artifact/468a155e8729cfbccfe1d85bf60d064f1dab76167a51149ec5c7928a2de63953
16 **
17 ** The "lemon" program processes an LALR(1) input grammar file, then uses
18 ** this template to construct a parser. The "lemon" program inserts text
19 ** at each "%%" line. Also, any "P-a-r-s-e" identifier prefix (without the
20 ** interstitial "-" characters) contained in this template is changed into
21 ** the value of the %name directive from the grammar. Otherwise, the content
22 ** of this template is copied straight through into the generate parser
23 ** source file.
24 **
25 ** The following is the concatenation of all %include directives from the
26 ** input grammar file:
27 */
28 /************ Begin %include sections from the grammar ************************/
29 #line 1 "queryparser/queryparser.lemony"
30 
34 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2015,2016,2018,2019 Olly Betts
35  * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
36  * Copyright (C) 2010 Adam Sjøgren
37  *
38  * This program is free software; you can redistribute it and/or
39  * modify it under the terms of the GNU General Public License as
40  * published by the Free Software Foundation; either version 2 of the
41  * License, or (at your option) any later version.
42  *
43  * This program is distributed in the hope that it will be useful,
44  * but WITHOUT ANY WARRANTY; without even the implied warranty of
45  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46  * GNU General Public License for more details.
47  *
48  * You should have received a copy of the GNU General Public License
49  * along with this program; if not, write to the Free Software
50  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
51  * USA
52  */
53 
54 #include <config.h>
55 
56 #include "queryparser_internal.h"
57 
58 #include "api/queryinternal.h"
59 #include "omassert.h"
60 #include "str.h"
61 #include "stringutils.h"
62 #include "xapian/error.h"
63 #include "xapian/unicode.h"
64 
65 // Include the list of token values lemon generates.
66 #include "queryparser_token.h"
67 
68 #include "cjk-tokenizer.h"
69 
70 #include <algorithm>
71 #include <cstring>
72 #include <limits>
73 #include <list>
74 #include <string>
75 #include <vector>
76 
77 // We create the yyParser on the stack.
78 #define Parse_ENGINEALWAYSONSTACK
79 
80 using namespace std;
81 
82 using namespace Xapian;
83 
84 inline bool
85 U_isupper(unsigned ch) {
86  return ch < 128 && C_isupper(static_cast<unsigned char>(ch));
87 }
88 
89 inline bool
90 U_isdigit(unsigned ch) {
91  return ch < 128 && C_isdigit(static_cast<unsigned char>(ch));
92 }
93 
94 inline bool
95 U_isalpha(unsigned ch) {
96  return ch < 128 && C_isalpha(static_cast<unsigned char>(ch));
97 }
98 
100 
101 inline bool
102 is_not_whitespace(unsigned ch) {
103  return !is_whitespace(ch);
104 }
105 
107 
108 inline bool
109 is_not_wordchar(unsigned ch) {
110  return !is_wordchar(ch);
111 }
112 
113 inline bool
114 is_digit(unsigned ch) {
116 }
117 
118 // FIXME: we used to keep trailing "-" (e.g. Cl-) but it's of dubious utility
119 // and there's the risk of hyphens getting stuck onto the end of terms...
120 inline bool
121 is_suffix(unsigned ch) {
122  return ch == '+' || ch == '#';
123 }
124 
125 inline bool
126 is_double_quote(unsigned ch) {
127  // We simply treat all double quotes as equivalent, which is a bit crude,
128  // but it isn't clear that it would actually better to require them to
129  // match up exactly.
130  //
131  // 0x201c is Unicode opening double quote.
132  // 0x201d is Unicode closing double quote.
133  return ch == '"' || ch == 0x201c || ch == 0x201d;
134 }
135 
136 inline bool
137 prefix_needs_colon(const string & prefix, unsigned ch)
138 {
139  if (!U_isupper(ch) && ch != ':') return false;
140  string::size_type len = prefix.length();
141  return (len > 1 && prefix[len - 1] != ':');
142 }
143 
145 
146 inline bool
148 {
149  return (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR);
150 }
151 
152 class Terms;
153 
160 class Term {
162 
163  public:
164  string name;
166  string unstemmed;
170 
171  Term(const string &name_, termpos pos_)
172  : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { }
173  explicit Term(const string &name_)
174  : name(name_), stem(QueryParser::STEM_NONE), pos(0) { }
175  Term(const string &name_, const FieldInfo * field_info_)
176  : name(name_), field_info(field_info_),
177  stem(QueryParser::STEM_NONE), pos(0) { }
178  explicit Term(termpos pos_) : stem(QueryParser::STEM_NONE), pos(pos_) { }
179  Term(State * state_, const string &name_, const FieldInfo * field_info_,
180  const string &unstemmed_,
181  QueryParser::stem_strategy stem_ = QueryParser::STEM_NONE,
182  termpos pos_ = 0)
183  : state(state_), name(name_), field_info(field_info_),
184  unstemmed(unstemmed_), stem(stem_), pos(pos_) { }
185  // For RANGE tokens.
186  Term(const Xapian::Query & q, const string & grouping)
187  : name(grouping), query(q) { }
188 
189  string make_term(const string & prefix) const;
190 
191  void need_positions() {
192  if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
193  }
194 
195  termpos get_termpos() const { return pos; }
196 
197  string get_grouping() const {
198  return field_info->grouping;
199  }
200 
201  Query * as_wildcarded_query(State * state) const;
202 
211  Query * as_partial_query(State * state_) const;
212 
214  Query * as_cjk_query() const;
215 
217  void as_positional_cjk_term(Terms * terms) const;
218 
220  Query as_range_query() const;
221 
222  Query get_query() const;
223 
224  Query get_query_with_synonyms() const;
225 
226  Query get_query_with_auto_synonyms() const;
227 };
228 
230 class State {
232 
233  public:
235  const char* error = NULL;
236  unsigned flags;
238 
239  State(QueryParser::Internal * qpi_, unsigned flags_)
240  : qpi(qpi_), flags(flags_), effective_default_op(qpi_->default_op)
241  {
242  if ((flags & QueryParser::FLAG_NO_POSITIONS)) {
243  if (is_positional(effective_default_op)) {
244  effective_default_op = Query::OP_AND;
245  }
246  }
247  }
248 
249  string stem_term(const string &term) {
250  return qpi->stemmer(term);
251  }
252 
253  void add_to_stoplist(const Term * term) {
254  qpi->stoplist.push_back(term->name);
255  }
256 
257  void add_to_unstem(const string & term, const string & unstemmed) {
258  qpi->unstem.insert(make_pair(term, unstemmed));
259  }
260 
261  Term * range(const string &a, const string &b) {
262  for (auto i : qpi->rangeprocs) {
263  Xapian::Query range_query = (i.proc)->check_range(a, b);
264  Xapian::Query::op op = range_query.get_type();
265  switch (op) {
267  break;
271  if (i.default_grouping) {
273  static_cast<Xapian::Internal::QueryValueBase*>(
274  range_query.internal.get());
275  Xapian::valueno slot = base->get_slot();
276  return new Term(range_query, str(slot));
277  }
278  // FALLTHRU
280  return new Term(range_query, i.grouping);
281  default:
282  return new Term(range_query, string());
283  }
284  }
285  return NULL;
286  }
287 
289  return effective_default_op;
290  }
291 
292  bool is_stopword(const Term *term) const {
293  return qpi->stopper.get() && (*qpi->stopper)(term->name);
294  }
295 
297  return qpi->db;
298  }
299 
300  const Stopper * get_stopper() const {
301  return qpi->stopper.get();
302  }
303 
304  size_t stoplist_size() const {
305  return qpi->stoplist.size();
306  }
307 
308  void stoplist_resize(size_t s) {
309  qpi->stoplist.resize(s);
310  }
311 
313  return qpi->max_wildcard_expansion;
314  }
315 
316  int get_max_wildcard_type() const {
317  return qpi->max_wildcard_type;
318  }
319 
321  return qpi->max_partial_expansion;
322  }
323 
324  int get_max_partial_type() const {
325  return qpi->max_partial_type;
326  }
327 };
328 
329 string
330 Term::make_term(const string & prefix) const
331 {
332  string term;
333  if (stem != QueryParser::STEM_NONE && stem != QueryParser::STEM_ALL)
334  term += 'Z';
335  if (!prefix.empty()) {
336  term += prefix;
337  if (prefix_needs_colon(prefix, name[0])) term += ':';
338  }
339  if (stem != QueryParser::STEM_NONE) {
340  term += state->stem_term(name);
341  } else {
342  term += name;
343  }
344 
345  if (!unstemmed.empty())
346  state->add_to_unstem(term, unstemmed);
347  return term;
348 }
349 
350 // Iterator shim to allow building a synonym query from a TermIterator pair.
353 
355 
357 
358  public:
360  Xapian::termpos pos_ = 0,
361  const Xapian::Query * first_ = NULL)
362  : i(i_), pos(pos_), first(first_) { }
363 
365  if (first)
366  first = NULL;
367  else
368  ++i;
369  return *this;
370  }
371 
372  const Xapian::Query operator*() const {
373  if (first) return *first;
374  return Xapian::Query(*i, 1, pos);
375  }
376 
377  bool operator==(const SynonymIterator & o) const {
378  return i == o.i && first == o.first;
379  }
380 
381  bool operator!=(const SynonymIterator & o) const {
382  return !(*this == o);
383  }
384 
385  typedef std::input_iterator_tag iterator_category;
390 };
391 
392 Query
394 {
395  // Handle single-word synonyms with each prefix.
396  const auto& prefixes = field_info->prefixes;
397  if (prefixes.empty()) {
398  Assert(field_info->proc.get());
399  return (*field_info->proc)(name);
400  }
401 
402  Query q = get_query();
403 
404  for (auto&& prefix : prefixes) {
405  // First try the unstemmed term:
406  string term;
407  if (!prefix.empty()) {
408  term += prefix;
409  if (prefix_needs_colon(prefix, name[0])) term += ':';
410  }
411  term += name;
412 
413  Xapian::Database db = state->get_database();
414  Xapian::TermIterator syn = db.synonyms_begin(term);
415  Xapian::TermIterator end = db.synonyms_end(term);
416  if (syn == end && stem != QueryParser::STEM_NONE) {
417  // If that has no synonyms, try the stemmed form:
418  term = 'Z';
419  if (!prefix.empty()) {
420  term += prefix;
421  if (prefix_needs_colon(prefix, name[0])) term += ':';
422  }
423  term += state->stem_term(name);
424  syn = db.synonyms_begin(term);
425  end = db.synonyms_end(term);
426  }
427  q = Query(q.OP_SYNONYM,
428  SynonymIterator(syn, pos, &q),
429  SynonymIterator(end));
430  }
431  return q;
432 }
433 
434 Query
436 {
437  const unsigned MASK_ENABLE_AUTO_SYNONYMS =
438  QueryParser::FLAG_AUTO_SYNONYMS |
439  QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS;
440  if (state->flags & MASK_ENABLE_AUTO_SYNONYMS)
441  return get_query_with_synonyms();
442 
443  return get_query();
444 }
445 
446 static void
448 {
449  Assert(term);
450  if (q) {
451  if (op == Query::OP_OR) {
452  *q |= *term;
453  } else if (op == Query::OP_AND) {
454  *q &= *term;
455  } else {
456  *q = Query(op, *q, *term);
457  }
458  delete term;
459  } else {
460  q = term;
461  }
462 }
463 
464 static void
465 add_to_query(Query *& q, Query::op op, const Query & term)
466 {
467  if (q) {
468  if (op == Query::OP_OR) {
469  *q |= term;
470  } else if (op == Query::OP_AND) {
471  *q &= term;
472  } else {
473  *q = Query(op, *q, term);
474  }
475  } else {
476  q = new Query(term);
477  }
478 }
479 
480 Query
482 {
483  const auto& prefixes = field_info->prefixes;
484  if (prefixes.empty()) {
485  Assert(field_info->proc.get());
486  return (*field_info->proc)(name);
487  }
488  auto piter = prefixes.begin();
489  Query q(make_term(*piter), 1, pos);
490  while (++piter != prefixes.end()) {
491  q |= Query(make_term(*piter), 1, pos);
492  }
493  return q;
494 }
495 
496 Query *
498 {
499  const auto& prefixes = field_info->prefixes;
501  int max_type = state_->get_max_wildcard_type();
502  vector<Query> subqs;
503  subqs.reserve(prefixes.size());
504  for (string root : prefixes) {
505  root += name;
506  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
507  subqs.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
508  Query::OP_OR));
509  }
510  Query * q = new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
511  delete this;
512  return q;
513 }
514 
515 Query *
517 {
519  int max_type = state_->get_max_partial_type();
520  vector<Query> subqs_partial; // A synonym of all the partial terms.
521  vector<Query> subqs_full; // A synonym of all the full terms.
522 
523  for (const string& prefix : field_info->prefixes) {
524  string root = prefix;
525  root += name;
526  // Combine with OP_OR, and apply OP_SYNONYM afterwards.
527  subqs_partial.push_back(Query(Query::OP_WILDCARD, root, max, max_type,
528  Query::OP_OR));
529  // Add the term, as it would normally be handled, as an alternative.
530  subqs_full.push_back(Query(make_term(prefix), 1, pos));
531  }
532  Query * q = new Query(Query::OP_OR,
533  Query(Query::OP_SYNONYM,
534  subqs_partial.begin(), subqs_partial.end()),
535  Query(Query::OP_SYNONYM,
536  subqs_full.begin(), subqs_full.end()));
537  delete this;
538  return q;
539 }
540 
541 Query *
543 {
544  vector<Query> prefix_subqs;
545  vector<Query> cjk_subqs;
546  const auto& prefixes = field_info->prefixes;
547  for (const string& prefix : prefixes) {
548  for (CJKTokenIterator tk(name); tk != CJKTokenIterator(); ++tk) {
549  cjk_subqs.push_back(Query(prefix + *tk, 1, pos));
550  }
551  prefix_subqs.push_back(Query(Query::OP_AND,
552  cjk_subqs.begin(), cjk_subqs.end()));
553  cjk_subqs.clear();
554  }
555  Query * q = new Query(Query::OP_OR,
556  prefix_subqs.begin(), prefix_subqs.end());
557  delete this;
558  return q;
559 }
560 
561 Query
563 {
564  Query q = query;
565  delete this;
566  return q;
567 }
568 
569 inline bool
571 {
572  // These characters generate a phrase search.
573  // Ordered mostly by frequency of calls to this function done when
574  // running the testcases in api_queryparser.cc.
575  return (ch && ch < 128 && strchr(".-/:\\@", ch) != NULL);
576 }
577 
578 inline bool
579 is_stem_preventer(unsigned ch)
580 {
581  return (ch && ch < 128 && strchr("(/\\@<>=*[{\"", ch) != NULL);
582 }
583 
584 inline bool
585 should_stem(const string & term)
586 {
587  const unsigned int SHOULD_STEM_MASK =
590  (1 << Unicode::MODIFIER_LETTER) |
591  (1 << Unicode::OTHER_LETTER);
592  Utf8Iterator u(term);
593  return ((SHOULD_STEM_MASK >> Unicode::get_category(*u)) & 1);
594 }
595 
599 const unsigned UNICODE_IGNORE = numeric_limits<unsigned>::max();
600 
601 inline unsigned check_infix(unsigned ch) {
602  if (ch == '\'' || ch == '&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
603  // Unicode includes all these except '&' in its word boundary rules,
604  // as well as 0x2019 (which we handle below) and ':' (for Swedish
605  // apparently, but we ignore this for now as it's problematic in
606  // real world cases).
607  return ch;
608  }
609  if (ch >= 0x200b) {
610  // 0x2019 is Unicode apostrophe and single closing quote.
611  // 0x201b is Unicode single opening quote with the tail rising.
612  if (ch == 0x2019 || ch == 0x201b)
613  return '\'';
614  if (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff)
615  return UNICODE_IGNORE;
616  }
617  return 0;
618 }
619 
620 inline unsigned check_infix_digit(unsigned ch) {
621  // This list of characters comes from Unicode's word identifying algorithm.
622  switch (ch) {
623  case ',':
624  case '.':
625  case ';':
626  case 0x037e: // GREEK QUESTION MARK
627  case 0x0589: // ARMENIAN FULL STOP
628  case 0x060D: // ARABIC DATE SEPARATOR
629  case 0x07F8: // NKO COMMA
630  case 0x2044: // FRACTION SLASH
631  case 0xFE10: // PRESENTATION FORM FOR VERTICAL COMMA
632  case 0xFE13: // PRESENTATION FORM FOR VERTICAL COLON
633  case 0xFE14: // PRESENTATION FORM FOR VERTICAL SEMICOLON
634  return ch;
635  }
636  if (ch >= 0x200b && (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff))
637  return UNICODE_IGNORE;
638  return 0;
639 }
640 
641 // Prototype a function lemon generates, but which we want to call before that
642 // in the generated source code file.
643 struct yyParser;
644 static void yy_parse_failed(yyParser *);
645 
646 void
647 QueryParser::Internal::add_prefix(const string &field, const string &prefix)
648 {
649  map<string, FieldInfo>::iterator p = field_map.find(field);
650  if (p == field_map.end()) {
651  field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, prefix)));
652  } else {
653  // Check that this is the same type of filter as the existing one(s).
654  if (p->second.type != NON_BOOLEAN) {
655  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter");
656  }
657  if (p->second.proc.get())
658  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
659  p->second.prefixes.push_back(prefix);
660  }
661 }
662 
663 void
664 QueryParser::Internal::add_prefix(const string &field, FieldProcessor *proc)
665 {
666  map<string, FieldInfo>::iterator p = field_map.find(field);
667  if (p == field_map.end()) {
668  field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, proc)));
669  } else {
670  // Check that this is the same type of filter as the existing one(s).
671  if (p->second.type != NON_BOOLEAN) {
672  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter");
673  }
674  if (!p->second.prefixes.empty())
675  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
676  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported");
677  }
678 }
679 
680 void
681 QueryParser::Internal::add_boolean_prefix(const string &field,
682  const string &prefix,
683  const string* grouping)
684 {
685  // Don't allow the empty prefix to be set as boolean as it doesn't
686  // really make sense.
687  if (field.empty())
688  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
689  if (!grouping) grouping = &field;
690  filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE;
691  map<string, FieldInfo>::iterator p = field_map.find(field);
692  if (p == field_map.end()) {
693  field_map.insert(make_pair(field, FieldInfo(type, prefix, *grouping)));
694  } else {
695  // Check that this is the same type of filter as the existing one(s).
696  if (p->second.type != type) {
697  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME
698  }
699  if (p->second.proc.get())
700  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
701  p->second.prefixes.push_back(prefix); // FIXME grouping
702  }
703 }
704 
705 void
706 QueryParser::Internal::add_boolean_prefix(const string &field,
707  FieldProcessor *proc,
708  const string* grouping)
709 {
710  // Don't allow the empty prefix to be set as boolean as it doesn't
711  // really make sense.
712  if (field.empty())
713  throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter");
714  if (!grouping) grouping = &field;
715  filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE;
716  map<string, FieldInfo>::iterator p = field_map.find(field);
717  if (p == field_map.end()) {
718  field_map.insert(make_pair(field, FieldInfo(type, proc, *grouping)));
719  } else {
720  // Check that this is the same type of filter as the existing one(s).
721  if (p->second.type != type) {
722  throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME
723  }
724  if (!p->second.prefixes.empty())
725  throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported");
726  throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported");
727  }
728 }
729 
730 string
731 QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end,
732  bool cjk_ngram, bool & is_cjk_term,
733  bool &was_acronym)
734 {
735  string term;
736  // Look for initials separated by '.' (e.g. P.T.O., U.N.C.L.E).
737  // Don't worry if there's a trailing '.' or not.
738  if (U_isupper(*it)) {
739  string t;
740  Utf8Iterator p = it;
741  do {
742  Unicode::append_utf8(t, *p++);
743  } while (p != end && *p == '.' && ++p != end && U_isupper(*p));
744  // One letter does not make an acronym! If we handled a single
745  // uppercase letter here, we wouldn't catch M&S below.
746  if (t.length() > 1) {
747  // Check there's not a (lower case) letter or digit
748  // immediately after it.
749  // FIXME: should I.B.M..P.T.O be a range search?
750  if (p == end || !is_wordchar(*p)) {
751  it = p;
752  swap(term, t);
753  }
754  }
755  }
756  was_acronym = !term.empty();
757 
758  if (cjk_ngram && term.empty() && CJK::codepoint_is_cjk(*it)) {
759  const char* cjk = it.raw();
760  CJK::get_cjk(it);
761  term.assign(cjk, it.raw() - cjk);
762  is_cjk_term = true;
763  }
764 
765  if (term.empty()) {
766  unsigned prevch = *it;
767  Unicode::append_utf8(term, prevch);
768  while (++it != end) {
769  if (cjk_ngram && CJK::codepoint_is_cjk(*it)) break;
770  unsigned ch = *it;
771  if (!is_wordchar(ch)) {
772  // Treat a single embedded '&' or "'" or similar as a word
773  // character (e.g. AT&T, Fred's). Also, normalise
774  // apostrophes to ASCII apostrophe.
775  Utf8Iterator p = it;
776  ++p;
777  if (p == end || !is_wordchar(*p)) break;
778  unsigned nextch = *p;
779  if (is_digit(prevch) && is_digit(nextch)) {
780  ch = check_infix_digit(ch);
781  } else {
782  ch = check_infix(ch);
783  }
784  if (!ch) break;
785  if (ch == UNICODE_IGNORE)
786  continue;
787  }
788  Unicode::append_utf8(term, ch);
789  prevch = ch;
790  }
791  if (it != end && is_suffix(*it)) {
792  string suff_term = term;
793  Utf8Iterator p = it;
794  // Keep trailing + (e.g. C++, Na+) or # (e.g. C#).
795  do {
796  if (suff_term.size() - term.size() == 3) {
797  suff_term.resize(0);
798  break;
799  }
800  suff_term += *p;
801  } while (is_suffix(*++p));
802  if (!suff_term.empty() && (p == end || !is_wordchar(*p))) {
803  // If the suffixed term doesn't exist, check that the
804  // non-suffixed term does. This also takes care of
805  // the case when QueryParser::set_database() hasn't
806  // been called.
807  bool use_suff_term = false;
808  string lc = Unicode::tolower(suff_term);
809  if (db.term_exists(lc)) {
810  use_suff_term = true;
811  } else {
812  lc = Unicode::tolower(term);
813  if (!db.term_exists(lc)) use_suff_term = true;
814  }
815  if (use_suff_term) {
816  term = suff_term;
817  it = p;
818  }
819  }
820  }
821  }
822  return term;
823 }
824 
825 #line 1412 "queryparser/queryparser.lemony"
826 
827 
828 struct ProbQuery {
829  Query* query = NULL;
830  Query* love = NULL;
831  Query* hate = NULL;
832  // filter is a map from prefix to a query for that prefix. Queries with
833  // the same prefix are combined with OR, and the results of this are
834  // combined with AND to get the full filter.
835  map<string, Query> filter;
836 
838 
839  explicit
840  ProbQuery(Query* query_) : query(query_) {}
841 
843  delete query;
844  delete love;
845  delete hate;
846  }
847 
848  void add_filter(const string& grouping, const Query & q) {
849  filter[grouping] = q;
850  }
851 
852  void append_filter(const string& grouping, const Query & qnew) {
853  auto it = filter.find(grouping);
854  if (it == filter.end()) {
855  filter.insert(make_pair(grouping, qnew));
856  } else {
857  Query & q = it->second;
858  // We OR multiple filters with the same prefix if they're
859  // exclusive, otherwise we AND them.
860  bool exclusive = !grouping.empty();
861  if (exclusive) {
862  q |= qnew;
863  } else {
864  q &= qnew;
865  }
866  }
867  }
868 
869  void add_filter_range(const string& grouping, const Query & range) {
870  filter[grouping] = range;
871  }
872 
873  void append_filter_range(const string& grouping, const Query & range) {
874  Query & q = filter[grouping];
875  q |= range;
876  }
877 
879  auto i = filter.begin();
880  Assert(i != filter.end());
881  Query q = i->second;
882  while (++i != filter.end()) {
883  q &= i->second;
884  }
885  return q;
886  }
887 };
888 
890 class TermGroup {
891  vector<Term *> terms;
892 
898  bool empty_ok;
899 
900  TermGroup(Term* t1, Term* t2) : empty_ok(false) {
901  add_term(t1);
902  add_term(t2);
903  }
904 
905  public:
907  static TermGroup* create(Term* t1, Term* t2) {
908  return new TermGroup(t1, t2);
909  }
910 
912  for (auto&& t : terms) {
913  delete t;
914  }
915  }
916 
918  void add_term(Term * term) {
919  terms.push_back(term);
920  }
921 
923  void set_empty_ok() { empty_ok = true; }
924 
926  Query * as_group(State *state) const;
927 };
928 
929 Query *
931 {
932  const Xapian::Stopper * stopper = state->get_stopper();
933  size_t stoplist_size = state->stoplist_size();
934  bool default_op_is_positional = is_positional(state->default_op());
935 reprocess:
936  Query::op default_op = state->default_op();
937  vector<Query> subqs;
938  subqs.reserve(terms.size());
939  if (state->flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
940  // Check for multi-word synonyms.
941  Database db = state->get_database();
942 
943  string key;
944  vector<Term*>::size_type begin = 0;
945  vector<Term*>::size_type i = begin;
946  while (terms.size() - i > 0) {
947  size_t longest_match = 0;
948  // This value is never used, but GCC 4.8 warns with
949  // -Wmaybe-uninitialized (GCC 5.4 doesn't).
950  vector<Term*>::size_type longest_match_end = 0;
951  if (terms.size() - i >= 2) {
952  // Greedily try to match as many consecutive words as possible.
953  key = terms[i]->name;
954  key += ' ';
955  key += terms[i + 1]->name;
956  TermIterator synkey(db.synonym_keys_begin(key));
957  TermIterator synend(db.synonym_keys_end(key));
958  if (synkey != synend) {
959  longest_match = key.size();
960  longest_match_end = i + 2;
961  for (auto j = i + 2; j < terms.size(); ++j) {
962  key += ' ';
963  key += terms[j]->name;
964  synkey.skip_to(key);
965  if (synkey == synend)
966  break;
967  const string& found = *synkey;
968  if (!startswith(found, key))
969  break;
970  if (found.size() == key.size()) {
971  longest_match = key.size();
972  longest_match_end = j + 1;
973  }
974  }
975  }
976  }
977  if (longest_match == 0) {
978  // No multi-synonym matches at position i.
979  if (stopper && (*stopper)(terms[i]->name)) {
980  state->add_to_stoplist(terms[i]);
981  } else {
982  if (default_op_is_positional)
983  terms[i]->need_positions();
984  subqs.push_back(terms[i]->get_query_with_auto_synonyms());
985  }
986  begin = ++i;
987  continue;
988  }
989  i = longest_match_end;
990  key.resize(longest_match);
991 
992  vector<Query> subqs2;
993  for (auto j = begin; j != i; ++j) {
994  if (stopper && (*stopper)(terms[j]->name)) {
995  state->add_to_stoplist(terms[j]);
996  } else {
997  if (default_op_is_positional)
998  terms[i]->need_positions();
999  subqs2.push_back(terms[j]->get_query());
1000  }
1001  }
1002  Query q_original_terms;
1003  if (default_op_is_positional) {
1004  q_original_terms = Query(default_op,
1005  subqs2.begin(), subqs2.end(),
1006  subqs2.size() + 9);
1007  } else {
1008  q_original_terms = Query(default_op,
1009  subqs2.begin(), subqs2.end());
1010  }
1011  subqs2.clear();
1012 
1013  // Use the position of the first term for the synonyms.
1014  TermIterator syn = db.synonyms_begin(key);
1015  Query q(Query::OP_SYNONYM,
1016  SynonymIterator(syn, terms[begin]->pos, &q_original_terms),
1017  SynonymIterator(db.synonyms_end(key)));
1018  subqs.push_back(q);
1019 
1020  begin = i;
1021  }
1022  } else {
1023  vector<Term*>::const_iterator i;
1024  for (i = terms.begin(); i != terms.end(); ++i) {
1025  if (stopper && (*stopper)((*i)->name)) {
1026  state->add_to_stoplist(*i);
1027  } else {
1028  if (default_op_is_positional)
1029  (*i)->need_positions();
1030  subqs.push_back((*i)->get_query_with_auto_synonyms());
1031  }
1032  }
1033  }
1034 
1035  if (!empty_ok && stopper && subqs.empty() &&
1036  stoplist_size < state->stoplist_size()) {
1037  // This group is all stopwords, so roll-back, disable stopper
1038  // temporarily, and reprocess this group.
1039  state->stoplist_resize(stoplist_size);
1040  stopper = NULL;
1041  goto reprocess;
1042  }
1043 
1044  Query * q = NULL;
1045  if (!subqs.empty()) {
1046  if (default_op_is_positional) {
1047  q = new Query(default_op, subqs.begin(), subqs.end(),
1048  subqs.size() + 9);
1049  } else {
1050  q = new Query(default_op, subqs.begin(), subqs.end());
1051  }
1052  }
1053  delete this;
1054  return q;
1055 }
1056 
1058 class Terms {
1059  vector<Term *> terms;
1060 
1066  size_t window;
1067 
1078 
1082  const vector<string>* prefixes;
1083 
1085  const vector<Query>& v,
1086  Xapian::termcount w) const {
1087  if (op == Query::OP_AND) {
1088  return Query(op, v.begin(), v.end());
1089  }
1090  return Query(op, v.begin(), v.end(), w);
1091  }
1092 
1095  if (window == size_t(-1)) op = Query::OP_AND;
1096  Query * q = NULL;
1097  size_t n_terms = terms.size();
1098  Xapian::termcount w = w_delta + terms.size();
1099  if (uniform_prefixes) {
1100  if (prefixes) {
1101  for (auto&& prefix : *prefixes) {
1102  vector<Query> subqs;
1103  subqs.reserve(n_terms);
1104  for (Term* t : terms) {
1105  subqs.push_back(Query(t->make_term(prefix), 1, t->pos));
1106  }
1107  add_to_query(q, Query::OP_OR, opwindow_subq(op, subqs, w));
1108  }
1109  }
1110  } else {
1111  vector<Query> subqs;
1112  subqs.reserve(n_terms);
1113  for (Term* t : terms) {
1114  subqs.push_back(t->get_query());
1115  }
1116  q = new Query(opwindow_subq(op, subqs, w));
1117  }
1118 
1119  delete this;
1120  return q;
1121  }
1122 
1123  explicit Terms(bool no_pos)
1124  : window(no_pos ? size_t(-1) : 0),
1125  uniform_prefixes(true),
1126  prefixes(NULL) { }
1127 
1128  public:
1130  static Terms* create(State* state) {
1131  return new Terms(state->flags & QueryParser::FLAG_NO_POSITIONS);
1132  }
1133 
1135  for (auto&& t : terms) {
1136  delete t;
1137  }
1138  }
1139 
1141  void add_positional_term(Term * term) {
1142  const auto& term_prefixes = term->field_info->prefixes;
1143  if (terms.empty()) {
1144  prefixes = &term_prefixes;
1145  } else if (uniform_prefixes && prefixes != &term_prefixes) {
1146  if (*prefixes != term_prefixes) {
1147  prefixes = NULL;
1148  uniform_prefixes = false;
1149  }
1150  }
1151  term->need_positions();
1152  terms.push_back(term);
1153  }
1154 
1155  void adjust_window(size_t alternative_window) {
1156  if (alternative_window > window) window = alternative_window;
1157  }
1158 
1161  return as_opwindow_query(Query::OP_PHRASE, 0);
1162  }
1163 
1165  Query * as_near_query() const {
1166  // The common meaning of 'a NEAR b' is "a within 10 terms of b", which
1167  // means a window size of 11. For more than 2 terms, we just add one
1168  // to the window size for each extra term.
1169  size_t w = window;
1170  if (w == 0) w = 10;
1171  return as_opwindow_query(Query::OP_NEAR, w - 1);
1172  }
1173 
1175  Query * as_adj_query() const {
1176  // The common meaning of 'a ADJ b' is "a at most 10 terms before b",
1177  // which means a window size of 11. For more than 2 terms, we just add
1178  // one to the window size for each extra term.
1179  size_t w = window;
1180  if (w == 0) w = 10;
1181  return as_opwindow_query(Query::OP_PHRASE, w - 1);
1182  }
1183 };
1184 
1185 void
1187 {
1188  // Add each individual CJK character to the phrase.
1189  string t;
1190  for (Utf8Iterator it(name); it != Utf8Iterator(); ++it) {
1191  Unicode::append_utf8(t, *it);
1192  Term * c = new Term(state, t, field_info, unstemmed, stem, pos);
1193  terms->add_positional_term(c);
1194  t.resize(0);
1195  }
1196 
1197  // FIXME: we want to add the n-grams as filters too for efficiency.
1198 
1199  delete this;
1200 }
1201 
1202 // Helper macro to check for missing arguments to a boolean operator.
1203 #define VET_BOOL_ARGS(A, B, OP_TXT) \
1204  do {\
1205  if (!A || !B) {\
1206  state->error = "Syntax: <expression> " OP_TXT " <expression>";\
1207  yy_parse_failed(yypParser);\
1208  return;\
1209  }\
1210  } while (0)
1211 
1212 #line 1213 "queryparser/queryparser_internal.cc"
1213 /**************** End of %include directives **********************************/
1214 /* These constants specify the various numeric values for terminal symbols
1215 ** in a format understandable to "makeheaders". This section is blank unless
1216 ** "lemon" is run with the "-m" command-line option.
1217 ***************** Begin makeheaders token definitions *************************/
1218 /**************** End makeheaders token definitions ***************************/
1219 
1220 /* The next section is a series of control #defines.
1221 ** various aspects of the generated parser.
1222 ** YYCODETYPE is the data type used to store the integer codes
1223 ** that represent terminal and non-terminal symbols.
1224 ** "unsigned char" is used if there are fewer than
1225 ** 256 symbols. Larger types otherwise.
1226 ** YYNOCODE is a number of type YYCODETYPE that is not used for
1227 ** any terminal or nonterminal symbol.
1228 ** YYFALLBACK If defined, this indicates that one or more tokens
1229 ** (also known as: "terminal symbols") have fall-back
1230 ** values which should be used if the original symbol
1231 ** would not parse. This permits keywords to sometimes
1232 ** be used as identifiers, for example.
1233 ** YYACTIONTYPE is the data type used for "action codes" - numbers
1234 ** that indicate what to do in response to the next
1235 ** token.
1236 ** ParseTOKENTYPE is the data type used for minor type for terminal
1237 ** symbols. Background: A "minor type" is a semantic
1238 ** value associated with a terminal or non-terminal
1239 ** symbols. For example, for an "ID" terminal symbol,
1240 ** the minor type might be the name of the identifier.
1241 ** Each non-terminal can have a different minor type.
1242 ** Terminal symbols all have the same minor type, though.
1243 ** This macros defines the minor type for terminal
1244 ** symbols.
1245 ** YYMINORTYPE is the data type used for all minor types.
1246 ** This is typically a union of many types, one of
1247 ** which is ParseTOKENTYPE. The entry in the union
1248 ** for terminal symbols is called "yy0".
1249 ** YYSTACKDEPTH is the maximum depth of the parser's stack. If
1250 ** zero the stack is dynamically sized using realloc()
1251 ** ParseARG_SDECL A static variable declaration for the %extra_argument
1252 ** ParseARG_PDECL A parameter declaration for the %extra_argument
1253 ** ParseARG_STORE Code to store %extra_argument into yypParser
1254 ** ParseARG_FETCH Code to extract %extra_argument from yypParser
1255 ** YYERRORSYMBOL is the code number of the error symbol. If not
1256 ** defined, then do no error processing.
1257 ** YYNSTATE the combined number of states.
1258 ** YYNRULE the number of rules in the grammar
1259 ** YYNTOKEN Number of terminal symbols
1260 ** YY_MAX_SHIFT Maximum value for shift actions
1261 ** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1262 ** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1263 ** YY_ERROR_ACTION The yy_action[] code for syntax error
1264 ** YY_ACCEPT_ACTION The yy_action[] code for accept
1265 ** YY_NO_ACTION The yy_action[] code for no-op
1266 ** YY_MIN_REDUCE Minimum value for reduce actions
1267 ** YY_MAX_REDUCE Maximum value for reduce actions
1268 */
1269 #ifndef INTERFACE
1270 # define INTERFACE 1
1271 #endif
1272 /************* Begin control #defines *****************************************/
1273 #define YYCODETYPE unsigned char
1274 #define YYNOCODE 40
1275 #define YYACTIONTYPE unsigned char
1276 #define ParseTOKENTYPE Term *
1277 typedef union {
1278  int yyinit;
1284  int yy46;
1285 } YYMINORTYPE;
1286 #ifndef YYSTACKDEPTH
1287 #define YYSTACKDEPTH 100
1288 #endif
1289 #define ParseARG_SDECL State * state;
1290 #define ParseARG_PDECL ,State * state
1291 #define ParseARG_FETCH State * state = yypParser->state
1292 #define ParseARG_STORE yypParser->state = state
1293 #define YYNSTATE 35
1294 #define YYNRULE 56
1295 #define YYNTOKEN 24
1296 #define YY_MAX_SHIFT 34
1297 #define YY_MIN_SHIFTREDUCE 77
1298 #define YY_MAX_SHIFTREDUCE 132
1299 #define YY_ERROR_ACTION 133
1300 #define YY_ACCEPT_ACTION 134
1301 #define YY_NO_ACTION 135
1302 #define YY_MIN_REDUCE 136
1303 #define YY_MAX_REDUCE 191
1304 /************* End control #defines *******************************************/
1305 
1306 /* Define the yytestcase() macro to be a no-op if is not already defined
1307 ** otherwise.
1308 **
1309 ** Applications can choose to define yytestcase() in the %include section
1310 ** to a macro that can assist in verifying code coverage. For production
1311 ** code the yytestcase() macro should be turned off. But it is useful
1312 ** for testing.
1313 */
1314 #ifndef yytestcase
1315 # define yytestcase(X)
1316 #endif
1317 
1318 
1319 /* Next are the tables used to determine what action to take based on the
1320 ** current state and lookahead token. These tables are used to implement
1321 ** functions that take a state number and lookahead value and return an
1322 ** action integer.
1323 **
1324 ** Suppose the action integer is N. Then the action is determined as
1325 ** follows
1326 **
1327 ** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
1328 ** token onto the stack and goto state N.
1329 **
1330 ** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
1331 ** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
1332 **
1333 ** N == YY_ERROR_ACTION A syntax error has occurred.
1334 **
1335 ** N == YY_ACCEPT_ACTION The parser accepts its input.
1336 **
1337 ** N == YY_NO_ACTION No such action. Denotes unused
1338 ** slots in the yy_action[] table.
1339 **
1340 ** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
1341 ** and YY_MAX_REDUCE
1342 **
1343 ** The action table is constructed as a single large table named yy_action[].
1344 ** Given state S and lookahead X, the action is computed as either:
1345 **
1346 ** (A) N = yy_action[ yy_shift_ofst[S] + X ]
1347 ** (B) N = yy_default[S]
1348 **
1349 ** The (A) formula is preferred. The B formula is used instead if
1350 ** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
1351 **
1352 ** The formulas above are for computing the action when the lookahead is
1353 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after
1354 ** a reduce action) then the yy_reduce_ofst[] array is used in place of
1355 ** the yy_shift_ofst[] array.
1356 **
1357 ** The following are the tables generated in this section:
1358 **
1359 ** yy_action[] A single table containing all actions.
1360 ** yy_lookahead[] A table containing the lookahead for each entry in
1361 ** yy_action. Used to detect hash collisions.
1362 ** yy_shift_ofst[] For each state, the offset into yy_action for
1363 ** shifting terminals.
1364 ** yy_reduce_ofst[] For each state, the offset into yy_action for
1365 ** shifting non-terminals after a reduce.
1366 ** yy_default[] Default action for each state.
1367 **
1368 *********** Begin parsing tables **********************************************/
1369 #define YY_ACTTAB_COUNT (326)
1370 static const YYACTIONTYPE yy_action[] = {
1371  /* 0 */ 134, 34, 34, 20, 8, 34, 18, 13, 16, 27,
1372  /* 10 */ 31, 23, 30, 28, 3, 21, 112, 10, 9, 2,
1373  /* 20 */ 25, 15, 111, 114, 104, 105, 97, 87, 14, 4,
1374  /* 30 */ 137, 113, 126, 115, 12, 11, 1, 7, 10, 9,
1375  /* 40 */ 124, 25, 15, 98, 88, 104, 105, 97, 87, 14,
1376  /* 50 */ 4, 29, 113, 138, 138, 138, 8, 138, 18, 13,
1377  /* 60 */ 16, 119, 31, 23, 30, 28, 141, 141, 141, 8,
1378  /* 70 */ 141, 18, 13, 16, 125, 31, 23, 30, 28, 140,
1379  /* 80 */ 140, 140, 8, 140, 18, 13, 16, 123, 31, 23,
1380  /* 90 */ 30, 28, 26, 26, 20, 8, 26, 18, 13, 16,
1381  /* 100 */ 136, 31, 23, 30, 28, 24, 24, 24, 8, 24,
1382  /* 110 */ 18, 13, 16, 135, 31, 23, 30, 28, 22, 22,
1383  /* 120 */ 22, 8, 22, 18, 13, 16, 135, 31, 23, 30,
1384  /* 130 */ 28, 139, 139, 139, 8, 139, 18, 13, 16, 121,
1385  /* 140 */ 31, 23, 30, 28, 10, 9, 135, 25, 15, 122,
1386  /* 150 */ 135, 104, 105, 97, 87, 14, 4, 135, 113, 135,
1387  /* 160 */ 189, 189, 135, 25, 19, 135, 135, 104, 105, 189,
1388  /* 170 */ 189, 14, 4, 162, 113, 162, 162, 162, 162, 33,
1389  /* 180 */ 32, 33, 32, 116, 135, 135, 120, 118, 120, 118,
1390  /* 190 */ 106, 25, 17, 117, 162, 104, 105, 95, 135, 14,
1391  /* 200 */ 4, 135, 113, 25, 17, 135, 135, 104, 105, 99,
1392  /* 210 */ 135, 14, 4, 135, 113, 25, 17, 135, 135, 104,
1393  /* 220 */ 105, 96, 135, 14, 4, 135, 113, 25, 17, 135,
1394  /* 230 */ 135, 104, 105, 100, 135, 14, 4, 135, 113, 25,
1395  /* 240 */ 19, 135, 135, 104, 105, 135, 135, 14, 4, 135,
1396  /* 250 */ 113, 135, 149, 149, 135, 31, 23, 30, 28, 152,
1397  /* 260 */ 135, 135, 152, 135, 31, 23, 30, 28, 135, 150,
1398  /* 270 */ 135, 135, 150, 135, 31, 23, 30, 28, 153, 135,
1399  /* 280 */ 135, 153, 135, 31, 23, 30, 28, 151, 135, 135,
1400  /* 290 */ 151, 135, 31, 23, 30, 28, 135, 148, 148, 135,
1401  /* 300 */ 31, 23, 30, 28, 191, 135, 191, 191, 191, 191,
1402  /* 310 */ 6, 5, 1, 7, 5, 1, 7, 135, 135, 135,
1403  /* 320 */ 135, 135, 135, 135, 135, 191,
1404 };
1405 static const YYCODETYPE yy_lookahead[] = {
1406  /* 0 */ 25, 26, 27, 28, 29, 30, 31, 32, 33, 7,
1407  /* 10 */ 35, 36, 37, 38, 5, 34, 12, 8, 9, 10,
1408  /* 20 */ 11, 12, 21, 12, 15, 16, 17, 18, 19, 20,
1409  /* 30 */ 0, 22, 12, 22, 8, 9, 4, 5, 8, 9,
1410  /* 40 */ 12, 11, 12, 17, 18, 15, 16, 17, 18, 19,
1411  /* 50 */ 20, 6, 22, 26, 27, 28, 29, 30, 31, 32,
1412  /* 60 */ 33, 14, 35, 36, 37, 38, 26, 27, 28, 29,
1413  /* 70 */ 30, 31, 32, 33, 12, 35, 36, 37, 38, 26,
1414  /* 80 */ 27, 28, 29, 30, 31, 32, 33, 12, 35, 36,
1415  /* 90 */ 37, 38, 26, 27, 28, 29, 30, 31, 32, 33,
1416  /* 100 */ 0, 35, 36, 37, 38, 26, 27, 28, 29, 30,
1417  /* 110 */ 31, 32, 33, 39, 35, 36, 37, 38, 26, 27,
1418  /* 120 */ 28, 29, 30, 31, 32, 33, 39, 35, 36, 37,
1419  /* 130 */ 38, 26, 27, 28, 29, 30, 31, 32, 33, 13,
1420  /* 140 */ 35, 36, 37, 38, 8, 9, 39, 11, 12, 23,
1421  /* 150 */ 39, 15, 16, 17, 18, 19, 20, 39, 22, 39,
1422  /* 160 */ 8, 9, 39, 11, 12, 39, 39, 15, 16, 17,
1423  /* 170 */ 18, 19, 20, 0, 22, 2, 3, 4, 5, 6,
1424  /* 180 */ 7, 6, 7, 12, 39, 39, 13, 14, 13, 14,
1425  /* 190 */ 19, 11, 12, 22, 21, 15, 16, 17, 39, 19,
1426  /* 200 */ 20, 39, 22, 11, 12, 39, 39, 15, 16, 17,
1427  /* 210 */ 39, 19, 20, 39, 22, 11, 12, 39, 39, 15,
1428  /* 220 */ 16, 17, 39, 19, 20, 39, 22, 11, 12, 39,
1429  /* 230 */ 39, 15, 16, 17, 39, 19, 20, 39, 22, 11,
1430  /* 240 */ 12, 39, 39, 15, 16, 39, 39, 19, 20, 39,
1431  /* 250 */ 22, 39, 32, 33, 39, 35, 36, 37, 38, 30,
1432  /* 260 */ 39, 39, 33, 39, 35, 36, 37, 38, 39, 30,
1433  /* 270 */ 39, 39, 33, 39, 35, 36, 37, 38, 30, 39,
1434  /* 280 */ 39, 33, 39, 35, 36, 37, 38, 30, 39, 39,
1435  /* 290 */ 33, 39, 35, 36, 37, 38, 39, 32, 33, 39,
1436  /* 300 */ 35, 36, 37, 38, 0, 39, 2, 3, 4, 5,
1437  /* 310 */ 2, 3, 4, 5, 3, 4, 5, 39, 39, 39,
1438  /* 320 */ 39, 39, 39, 39, 39, 21, 39, 39, 39, 39,
1439  /* 330 */ 39, 39, 39, 39, 39, 39, 39, 39, 39,
1440 };
1441 #define YY_SHIFT_COUNT (34)
1442 #define YY_SHIFT_MIN (0)
1443 #define YY_SHIFT_MAX (311)
1444 static const unsigned short int yy_shift_ofst[] = {
1445  /* 0 */ 30, 9, 136, 136, 136, 136, 136, 136, 152, 180,
1446  /* 10 */ 192, 204, 216, 228, 11, 173, 304, 175, 26, 175,
1447  /* 20 */ 308, 171, 311, 126, 32, 4, 1, 20, 2, 28,
1448  /* 30 */ 45, 47, 62, 75, 100,
1449 };
1450 #define YY_REDUCE_COUNT (14)
1451 #define YY_REDUCE_MIN (-25)
1452 #define YY_REDUCE_MAX (265)
1453 static const short yy_reduce_ofst[] = {
1454  /* 0 */ -25, 27, 40, 53, 66, 79, 92, 105, 220, 229,
1455  /* 10 */ 239, 248, 257, 265, -19,
1456 };
1457 static const YYACTIONTYPE yy_default[] = {
1458  /* 0 */ 144, 144, 144, 144, 144, 144, 144, 144, 145, 133,
1459  /* 10 */ 133, 133, 133, 160, 133, 161, 190, 162, 133, 161,
1460  /* 20 */ 133, 133, 142, 167, 143, 133, 187, 133, 169, 133,
1461  /* 30 */ 168, 166, 133, 133, 187,
1462 };
1463 /********** End of lemon-generated parsing tables *****************************/
1464 
1465 /* The next table maps tokens (terminal symbols) into fallback tokens.
1466 ** If a construct like the following:
1467 **
1468 ** %fallback ID X Y Z.
1469 **
1470 ** appears in the grammar, then ID becomes a fallback token for X, Y,
1471 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
1472 ** but it does not parse, the type of the token is changed to ID and
1473 ** the parse is retried before an error is thrown.
1474 **
1475 ** This feature can be used, for example, to cause some keywords in a language
1476 ** to revert to identifiers if they keyword does not apply in the context where
1477 ** it appears.
1478 */
1479 #ifdef YYFALLBACK
1480 static const YYCODETYPE yyFallback[] = {
1481 };
1482 #endif /* YYFALLBACK */
1483 
1484 /* The following structure represents a single element of the
1485 ** parser's stack. Information stored includes:
1486 **
1487 ** + The state number for the parser at this level of the stack.
1488 **
1489 ** + The value of the token stored at this level of the stack.
1490 ** (In other words, the "major" token.)
1491 **
1492 ** + The semantic value stored at this level of the stack. This is
1493 ** the information used by the action routines in the grammar.
1494 ** It is sometimes called the "minor" token.
1495 **
1496 ** After the "shift" half of a SHIFTREDUCE action, the stateno field
1497 ** actually contains the reduce action for the second half of the
1498 ** SHIFTREDUCE.
1499 */
1502  stateno = 0;
1503  major = 0;
1504  }
1506  stateno = stateno_;
1507  major = major_;
1508  minor.yy0 = minor_;
1509  }
1510  YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
1511  YYCODETYPE major; /* The major token value. This is the code
1512  ** number for the token at this stack level */
1513  YYMINORTYPE minor; /* The user-supplied minor token value. This
1514  ** is the value of the token */
1515 };
1516 
1517 static void ParseInit(yyParser *pParser);
1518 static void ParseFinalize(yyParser *pParser);
1519 
1520 /* The state of the parser is completely contained in an instance of
1521 ** the following structure */
1522 struct yyParser {
1523 #ifdef YYTRACKMAXSTACKDEPTH
1524  int yyhwm; /* High-water mark of the stack */
1525 #endif
1526 #ifndef YYNOERRORRECOVERY
1527  int yyerrcnt; /* Shifts left before out of the error */
1528 #endif
1529  ParseARG_SDECL /* A place to hold %extra_argument */
1530  vector<yyStackEntry> yystack; /* The parser's stack */
1532  ParseInit(this);
1533  }
1535  ParseFinalize(this);
1536  }
1537 };
1538 typedef struct yyParser yyParser;
1539 
1540 #include "omassert.h"
1541 #include "debuglog.h"
1542 
1543 #if defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG)
1544 /* For tracing shifts, the names of all terminals and nonterminals
1545 ** are required. The following table supplies these names */
1546 static const char *const yyTokenName[] = {
1547  /* 0 */ "$",
1548  /* 1 */ "ERROR",
1549  /* 2 */ "OR",
1550  /* 3 */ "XOR",
1551  /* 4 */ "AND",
1552  /* 5 */ "NOT",
1553  /* 6 */ "NEAR",
1554  /* 7 */ "ADJ",
1555  /* 8 */ "LOVE",
1556  /* 9 */ "HATE",
1557  /* 10 */ "HATE_AFTER_AND",
1558  /* 11 */ "SYNONYM",
1559  /* 12 */ "TERM",
1560  /* 13 */ "GROUP_TERM",
1561  /* 14 */ "PHR_TERM",
1562  /* 15 */ "WILD_TERM",
1563  /* 16 */ "PARTIAL_TERM",
1564  /* 17 */ "BOOLEAN_FILTER",
1565  /* 18 */ "RANGE",
1566  /* 19 */ "QUOTE",
1567  /* 20 */ "BRA",
1568  /* 21 */ "KET",
1569  /* 22 */ "CJKTERM",
1570  /* 23 */ "EMPTY_GROUP_OK",
1571  /* 24 */ "error",
1572  /* 25 */ "query",
1573  /* 26 */ "expr",
1574  /* 27 */ "prob_expr",
1575  /* 28 */ "bool_arg",
1576  /* 29 */ "prob",
1577  /* 30 */ "term",
1578  /* 31 */ "stop_prob",
1579  /* 32 */ "stop_term",
1580  /* 33 */ "compound_term",
1581  /* 34 */ "phrase",
1582  /* 35 */ "phrased_term",
1583  /* 36 */ "group",
1584  /* 37 */ "near_expr",
1585  /* 38 */ "adj_expr",
1586 };
1587 
1588 /* For tracing reduce actions, the names of all rules are required.
1589 */
1590 static const char *const yyRuleName[] = {
1591  /* 0 */ "query ::= expr",
1592  /* 1 */ "query ::=",
1593  /* 2 */ "expr ::= bool_arg AND bool_arg",
1594  /* 3 */ "expr ::= bool_arg NOT bool_arg",
1595  /* 4 */ "expr ::= bool_arg AND NOT bool_arg",
1596  /* 5 */ "expr ::= bool_arg AND HATE_AFTER_AND bool_arg",
1597  /* 6 */ "expr ::= bool_arg OR bool_arg",
1598  /* 7 */ "expr ::= bool_arg XOR bool_arg",
1599  /* 8 */ "bool_arg ::=",
1600  /* 9 */ "prob_expr ::= prob",
1601  /* 10 */ "prob ::= RANGE",
1602  /* 11 */ "prob ::= stop_prob RANGE",
1603  /* 12 */ "prob ::= stop_term stop_term",
1604  /* 13 */ "prob ::= prob stop_term",
1605  /* 14 */ "prob ::= LOVE term",
1606  /* 15 */ "prob ::= stop_prob LOVE term",
1607  /* 16 */ "prob ::= HATE term",
1608  /* 17 */ "prob ::= stop_prob HATE term",
1609  /* 18 */ "prob ::= HATE BOOLEAN_FILTER",
1610  /* 19 */ "prob ::= stop_prob HATE BOOLEAN_FILTER",
1611  /* 20 */ "prob ::= BOOLEAN_FILTER",
1612  /* 21 */ "prob ::= stop_prob BOOLEAN_FILTER",
1613  /* 22 */ "prob ::= LOVE BOOLEAN_FILTER",
1614  /* 23 */ "prob ::= stop_prob LOVE BOOLEAN_FILTER",
1615  /* 24 */ "stop_prob ::= stop_term",
1616  /* 25 */ "stop_term ::= TERM",
1617  /* 26 */ "term ::= TERM",
1618  /* 27 */ "compound_term ::= WILD_TERM",
1619  /* 28 */ "compound_term ::= PARTIAL_TERM",
1620  /* 29 */ "compound_term ::= QUOTE phrase QUOTE",
1621  /* 30 */ "compound_term ::= phrased_term",
1622  /* 31 */ "compound_term ::= group",
1623  /* 32 */ "compound_term ::= near_expr",
1624  /* 33 */ "compound_term ::= adj_expr",
1625  /* 34 */ "compound_term ::= BRA expr KET",
1626  /* 35 */ "compound_term ::= SYNONYM TERM",
1627  /* 36 */ "compound_term ::= CJKTERM",
1628  /* 37 */ "phrase ::= TERM",
1629  /* 38 */ "phrase ::= CJKTERM",
1630  /* 39 */ "phrase ::= phrase TERM",
1631  /* 40 */ "phrase ::= phrase CJKTERM",
1632  /* 41 */ "phrased_term ::= TERM PHR_TERM",
1633  /* 42 */ "phrased_term ::= phrased_term PHR_TERM",
1634  /* 43 */ "group ::= TERM GROUP_TERM",
1635  /* 44 */ "group ::= group GROUP_TERM",
1636  /* 45 */ "group ::= group EMPTY_GROUP_OK",
1637  /* 46 */ "near_expr ::= TERM NEAR TERM",
1638  /* 47 */ "near_expr ::= near_expr NEAR TERM",
1639  /* 48 */ "adj_expr ::= TERM ADJ TERM",
1640  /* 49 */ "adj_expr ::= adj_expr ADJ TERM",
1641  /* 50 */ "expr ::= prob_expr",
1642  /* 51 */ "bool_arg ::= expr",
1643  /* 52 */ "prob_expr ::= term",
1644  /* 53 */ "stop_prob ::= prob",
1645  /* 54 */ "stop_term ::= compound_term",
1646  /* 55 */ "term ::= compound_term",
1647 };
1648 
1649 /*
1650 ** This function returns the symbolic name associated with a token
1651 ** value.
1652 */
1653 static const char *ParseTokenName(int tokenType){
1654  if( tokenType>=0 && tokenType<(int)(sizeof(yyTokenName)/sizeof(yyTokenName[0])) ){
1655  return yyTokenName[tokenType];
1656  }
1657  return "Unknown";
1658 }
1659 
1660 /*
1661 ** This function returns the symbolic name associated with a rule
1662 ** value.
1663 */
1664 static const char *ParseRuleName(int ruleNum){
1665  if( ruleNum>=0 && ruleNum<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
1666  return yyRuleName[ruleNum];
1667  }
1668  return "Unknown";
1669 }
1670 #endif /* defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG) */
1671 
1672 /* Datatype of the argument to the memory allocated passed as the
1673 ** second argument to ParseAlloc() below. This can be changed by
1674 ** putting an appropriate #define in the %include section of the input
1675 ** grammar.
1676 */
1677 #ifndef YYMALLOCARGTYPE
1678 # define YYMALLOCARGTYPE size_t
1679 #endif
1680 
1681 /* Initialize a new parser that has already been allocated.
1682 */
1683 static
1684 void ParseInit(yyParser *pParser){
1685 #ifdef YYTRACKMAXSTACKDEPTH
1686  pParser->yyhwm = 0;
1687 #endif
1688 #if 0
1689 #if YYSTACKDEPTH<=0
1690  pParser->yytos = NULL;
1691  pParser->yystack = NULL;
1692  pParser->yystksz = 0;
1693  if( yyGrowStack(pParser) ){
1694  pParser->yystack = &pParser->yystk0;
1695  pParser->yystksz = 1;
1696  }
1697 #endif
1698 #endif
1699 #ifndef YYNOERRORRECOVERY
1700  pParser->yyerrcnt = -1;
1701 #endif
1702 #if 0
1703  pParser->yytos = pParser->yystack;
1704  pParser->yystack[0].stateno = 0;
1705  pParser->yystack[0].major = 0;
1706 #if YYSTACKDEPTH>0
1707  pParser->yystackEnd = &pParser->yystack[YYSTACKDEPTH-1];
1708 #endif
1709 #else
1710  pParser->yystack.push_back(yyStackEntry());
1711 #endif
1712 }
1713 
1714 #ifndef Parse_ENGINEALWAYSONSTACK
1715 /*
1716 ** This function allocates a new parser.
1717 **
1718 ** Inputs:
1719 ** None.
1720 **
1721 ** Outputs:
1722 ** A pointer to a parser. This pointer is used in subsequent calls
1723 ** to Parse and ParseFree.
1724 */
1725 static yyParser *ParseAlloc(void){
1726  return new yyParser;
1727 }
1728 #endif /* Parse_ENGINEALWAYSONSTACK */
1729 
1730 
1731 /* The following function deletes the "minor type" or semantic value
1732 ** associated with a symbol. The symbol can be either a terminal
1733 ** or nonterminal. "yymajor" is the symbol code, and "yypminor" is
1734 ** a pointer to the value to be deleted. The code used to do the
1735 ** deletions is derived from the %destructor and/or %token_destructor
1736 ** directives of the input grammar.
1737 */
1738 static void yy_destructor(
1739  yyParser *yypParser, /* The parser */
1740  YYCODETYPE yymajor, /* Type code for object to destroy */
1741  YYMINORTYPE *yypminor /* The object to be destroyed */
1742 ){
1744  switch( yymajor ){
1745  /* Here is inserted the actions which take place when a
1746  ** terminal or non-terminal is destroyed. This can happen
1747  ** when the symbol is popped from the stack during a
1748  ** reduce or during error processing or when a parser is
1749  ** being destroyed before it is finished parsing.
1750  **
1751  ** Note: during a reduce, the only symbols destroyed are those
1752  ** which appear on the RHS of the rule, but which are *not* used
1753  ** inside the C code.
1754  */
1755 /********* Begin destructor definitions ***************************************/
1756  /* TERMINAL Destructor */
1757  case 1: /* ERROR */
1758  case 2: /* OR */
1759  case 3: /* XOR */
1760  case 4: /* AND */
1761  case 5: /* NOT */
1762  case 6: /* NEAR */
1763  case 7: /* ADJ */
1764  case 8: /* LOVE */
1765  case 9: /* HATE */
1766  case 10: /* HATE_AFTER_AND */
1767  case 11: /* SYNONYM */
1768  case 12: /* TERM */
1769  case 13: /* GROUP_TERM */
1770  case 14: /* PHR_TERM */
1771  case 15: /* WILD_TERM */
1772  case 16: /* PARTIAL_TERM */
1773  case 17: /* BOOLEAN_FILTER */
1774  case 18: /* RANGE */
1775  case 19: /* QUOTE */
1776  case 20: /* BRA */
1777  case 21: /* KET */
1778  case 22: /* CJKTERM */
1779  case 23: /* EMPTY_GROUP_OK */
1780 {
1781 #line 1801 "queryparser/queryparser.lemony"
1782  delete (yypminor->yy0);
1783 #line 1784 "queryparser/queryparser_internal.cc"
1784 }
1785  break;
1786  case 26: /* expr */
1787  case 27: /* prob_expr */
1788  case 28: /* bool_arg */
1789  case 30: /* term */
1790  case 32: /* stop_term */
1791  case 33: /* compound_term */
1792 {
1793 #line 1876 "queryparser/queryparser.lemony"
1794  delete (yypminor->yy39);
1795 #line 1796 "queryparser/queryparser_internal.cc"
1796 }
1797  break;
1798  case 29: /* prob */
1799  case 31: /* stop_prob */
1800 {
1801 #line 1985 "queryparser/queryparser.lemony"
1802  delete (yypminor->yy40);
1803 #line 1804 "queryparser/queryparser_internal.cc"
1804 }
1805  break;
1806  case 34: /* phrase */
1807  case 35: /* phrased_term */
1808  case 37: /* near_expr */
1809  case 38: /* adj_expr */
1810 {
1811 #line 2176 "queryparser/queryparser.lemony"
1812  delete (yypminor->yy32);
1813 #line 1814 "queryparser/queryparser_internal.cc"
1814 }
1815  break;
1816  case 36: /* group */
1817 {
1818 #line 2217 "queryparser/queryparser.lemony"
1819  delete (yypminor->yy14);
1820 #line 1821 "queryparser/queryparser_internal.cc"
1821 }
1822  break;
1823 /********* End destructor definitions *****************************************/
1824  default: break; /* If no destructor action specified: do nothing */
1825  }
1826  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
1827 }
1828 
1829 /*
1830 ** Pop the parser's stack once.
1831 **
1832 ** If there is a destructor routine associated with the token which
1833 ** is popped from the stack, then call it.
1834 */
1835 static void yy_pop_parser_stack(yyParser *pParser){
1836  Assert( pParser->yystack.size() > 1 );
1837  yyStackEntry *yytos = &pParser->yystack.back();
1838 
1839  LOGLINE(QUERYPARSER, "Popping " << ParseTokenName(yytos->major));
1840  yy_destructor(pParser, yytos->major, &yytos->minor);
1841  pParser->yystack.pop_back();
1842 }
1843 
1844 /*
1845 ** Clear all secondary memory allocations from the parser
1846 */
1847 static
1848 void ParseFinalize(yyParser *pParser){
1849  while( pParser->yystack.size() > 1 ) yy_pop_parser_stack(pParser);
1850 }
1851 
1852 #ifndef Parse_ENGINEALWAYSONSTACK
1853 /*
1854 ** Deallocate and destroy a parser. Destructors are called for
1855 ** all stack elements before shutting the parser down.
1856 **
1857 ** If the YYPARSEFREENEVERNULL macro exists (for example because it
1858 ** is defined in a %include section of the input grammar) then it is
1859 ** assumed that the input pointer is never NULL.
1860 */
1861 static
1862 void ParseFree(
1863  yyParser *pParser /* The parser to be deleted */
1864 ){
1865  delete pParser;
1866 }
1867 #endif /* Parse_ENGINEALWAYSONSTACK */
1868 
1869 /*
1870 ** Return the peak depth of the stack for a parser.
1871 */
1872 #ifdef YYTRACKMAXSTACKDEPTH
1873 int ParseStackPeak(yyParser *pParser){
1874  return pParser->yyhwm;
1875 }
1876 #endif
1877 
1878 /* This array of booleans keeps track of the parser statement
1879 ** coverage. The element yycoverage[X][Y] is set when the parser
1880 ** is in state X and has a lookahead token Y. In a well-tested
1881 ** systems, every element of this matrix should end up being set.
1882 */
1883 #if defined(YYCOVERAGE)
1884 static unsigned char yycoverage[YYNSTATE][YYNTOKEN];
1885 #endif
1886 
1887 /*
1888 ** Write into out a description of every state/lookahead combination that
1889 **
1890 ** (1) has not been used by the parser, and
1891 ** (2) is not a syntax error.
1892 **
1893 ** Return the number of missed state/lookahead combinations.
1894 */
1895 #if defined(YYCOVERAGE)
1896 int ParseCoverage(FILE *out){
1897  int stateno, iLookAhead, i;
1898  int nMissed = 0;
1899  for(stateno=0; stateno<YYNSTATE; stateno++){
1900  i = yy_shift_ofst[stateno];
1901  for(iLookAhead=0; iLookAhead<YYNTOKEN; iLookAhead++){
1902  if( yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
1903  if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
1904  if( out ){
1905  fprintf(out,"State %d lookahead %s %s\n", stateno,
1906  yyTokenName[iLookAhead],
1907  yycoverage[stateno][iLookAhead] ? "ok" : "missed");
1908  }
1909  }
1910  }
1911  return nMissed;
1912 }
1913 #endif
1914 
1915 /*
1916 ** Find the appropriate action for a parser given the terminal
1917 ** look-ahead token iLookAhead.
1918 */
1919 static unsigned int yy_find_shift_action(
1920  yyParser *pParser, /* The parser */
1921  YYCODETYPE iLookAhead /* The look-ahead token */
1922 ){
1923  int i;
1924  int stateno = pParser->yystack.back().stateno;
1925 
1926  if( stateno>YY_MAX_SHIFT ) return stateno;
1927  Assert( stateno <= YY_SHIFT_COUNT );
1928 #if defined(YYCOVERAGE)
1929  yycoverage[stateno][iLookAhead] = 1;
1930 #endif
1931  do{
1932  i = yy_shift_ofst[stateno];
1933  Assert( i>=0 );
1934  Assert( i+YYNTOKEN<=(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) );
1935  Assert( iLookAhead!=YYNOCODE );
1936  Assert( iLookAhead < YYNTOKEN );
1937  i += iLookAhead;
1938  if( yy_lookahead[i]!=iLookAhead ){
1939 #ifdef YYFALLBACK
1940  YYCODETYPE iFallback; /* Fallback token */
1941  if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
1942  && (iFallback = yyFallback[iLookAhead])!=0 ){
1943  LOGLINE(QUERYPARSER,
1944  "FALLBACK " << ParseTokenName(iLookAhead) << " => " <<
1945  ParseTokenName(iFallback));
1946  Assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
1947  iLookAhead = iFallback;
1948  continue;
1949  }
1950 #endif
1951 #ifdef YYWILDCARD
1952  {
1953  int j = i - iLookAhead + YYWILDCARD;
1954  if(
1955 #if YY_SHIFT_MIN+YYWILDCARD<0
1956  j>=0 &&
1957 #endif
1958 #if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT
1959  j<YY_ACTTAB_COUNT &&
1960 #endif
1961  yy_lookahead[j]==YYWILDCARD && iLookAhead>0
1962  ){
1963  LOGLINE(QUERYPARSER,
1964  "WILDCARD " << ParseTokenName(iLookAhead) << " => " <<
1965  ParseTokenName(YYWILDCARD));
1966  return yy_action[j];
1967  }
1968  }
1969 #endif /* YYWILDCARD */
1970  return yy_default[stateno];
1971  }else{
1972  return yy_action[i];
1973  }
1974  }while(1);
1975 }
1976 
1977 /*
1978 ** Find the appropriate action for a parser given the non-terminal
1979 ** look-ahead token iLookAhead.
1980 */
1982  int stateno, /* Current state number */
1983  YYCODETYPE iLookAhead /* The look-ahead token */
1984 ){
1985  int i;
1986 #ifdef YYERRORSYMBOL
1987  if( stateno>YY_REDUCE_COUNT ){
1988  return yy_default[stateno];
1989  }
1990 #else
1991  Assert( stateno<=YY_REDUCE_COUNT );
1992 #endif
1993  i = yy_reduce_ofst[stateno];
1994  Assert( iLookAhead!=YYNOCODE );
1995  i += iLookAhead;
1996 #ifdef YYERRORSYMBOL
1997  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
1998  return yy_default[stateno];
1999  }
2000 #else
2001  Assert( i>=0 && i<YY_ACTTAB_COUNT );
2002  Assert( yy_lookahead[i]==iLookAhead );
2003 #endif
2004  return yy_action[i];
2005 }
2006 
2007 /*
2008 ** The following routine is called if the stack overflows.
2009 ** In Xapian this can never happen as we use std::vector to provide a stack
2010 ** of indefinite size.
2011 */
2012 #if 0
2013 static void yyStackOverflow(yyParser *yypParser){
2015  yypParser->yyidx--;
2016 #ifndef NDEBUG
2017  if( yyTraceFILE ){
2018  fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
2019  }
2020 #endif
2021  while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser);
2022  /* Here code is inserted which will execute if the parser
2023  ** stack ever overflows */
2024 /******** Begin %stack_overflow code ******************************************/
2025 /******** End %stack_overflow code ********************************************/
2026  ParseARG_STORE; /* Suppress warning about unused %extra_argument var */
2027 }
2028 #endif
2029 
2030 /*
2031 ** Print tracing information for a SHIFT action
2032 */
2033 #ifdef XAPIAN_DEBUG_LOG
2034 static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){
2035  if( yyNewState<YYNSTATE ){
2036  LOGLINE(QUERYPARSER, zTag << " '" <<
2037  yyTokenName[yypParser->yystack.back().major] <<
2038  "', go to state " << yyNewState);
2039  }else{
2040  LOGLINE(QUERYPARSER, zTag << " '" <<
2041  yyTokenName[yypParser->yystack.back().major] <<
2042  "', pending reduce " << yyNewState - YY_MIN_REDUCE);
2043  }
2044 }
2045 #else
2046 # define yyTraceShift(X,Y,Z)
2047 #endif
2048 
2049 /*
2050 ** Perform a shift action.
2051 */
2052 static void yy_shift(
2053  yyParser *yypParser, /* The parser to be shifted */
2054  int yyNewState, /* The new state to shift in */
2055  int yyMajor, /* The major token to shift in */
2056  ParseTOKENTYPE yyMinor /* The minor token to shift in */
2057 ){
2058  if( yyNewState > YY_MAX_SHIFT ){
2059  yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE;
2060  }
2061  yypParser->yystack.push_back(yyStackEntry(yyNewState, yyMajor, yyMinor));
2062 #ifdef YYTRACKMAXSTACKDEPTH
2063  if( (int)(yypParser->yystack.size()>yypParser->yyhwm ){
2064  yypParser->yyhwm++;
2065  Assert( yypParser->yyhwm == (int)(yypParser->yystack.size() );
2066  }
2067 #endif
2068  yyTraceShift(yypParser, yyNewState, "Shift");
2069 }
2070 
2071 /* The following table contains information about every rule that
2072 ** is used during the reduce.
2073 */
2074 static const struct {
2075  YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */
2076  signed char nrhs; /* Negative of the number of RHS symbols in the rule */
2077 } yyRuleInfo[] = {
2078  { 25, -1 }, /* (0) query ::= expr */
2079  { 25, 0 }, /* (1) query ::= */
2080  { 26, -3 }, /* (2) expr ::= bool_arg AND bool_arg */
2081  { 26, -3 }, /* (3) expr ::= bool_arg NOT bool_arg */
2082  { 26, -4 }, /* (4) expr ::= bool_arg AND NOT bool_arg */
2083  { 26, -4 }, /* (5) expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2084  { 26, -3 }, /* (6) expr ::= bool_arg OR bool_arg */
2085  { 26, -3 }, /* (7) expr ::= bool_arg XOR bool_arg */
2086  { 28, 0 }, /* (8) bool_arg ::= */
2087  { 27, -1 }, /* (9) prob_expr ::= prob */
2088  { 29, -1 }, /* (10) prob ::= RANGE */
2089  { 29, -2 }, /* (11) prob ::= stop_prob RANGE */
2090  { 29, -2 }, /* (12) prob ::= stop_term stop_term */
2091  { 29, -2 }, /* (13) prob ::= prob stop_term */
2092  { 29, -2 }, /* (14) prob ::= LOVE term */
2093  { 29, -3 }, /* (15) prob ::= stop_prob LOVE term */
2094  { 29, -2 }, /* (16) prob ::= HATE term */
2095  { 29, -3 }, /* (17) prob ::= stop_prob HATE term */
2096  { 29, -2 }, /* (18) prob ::= HATE BOOLEAN_FILTER */
2097  { 29, -3 }, /* (19) prob ::= stop_prob HATE BOOLEAN_FILTER */
2098  { 29, -1 }, /* (20) prob ::= BOOLEAN_FILTER */
2099  { 29, -2 }, /* (21) prob ::= stop_prob BOOLEAN_FILTER */
2100  { 29, -2 }, /* (22) prob ::= LOVE BOOLEAN_FILTER */
2101  { 29, -3 }, /* (23) prob ::= stop_prob LOVE BOOLEAN_FILTER */
2102  { 31, -1 }, /* (24) stop_prob ::= stop_term */
2103  { 32, -1 }, /* (25) stop_term ::= TERM */
2104  { 30, -1 }, /* (26) term ::= TERM */
2105  { 33, -1 }, /* (27) compound_term ::= WILD_TERM */
2106  { 33, -1 }, /* (28) compound_term ::= PARTIAL_TERM */
2107  { 33, -3 }, /* (29) compound_term ::= QUOTE phrase QUOTE */
2108  { 33, -1 }, /* (30) compound_term ::= phrased_term */
2109  { 33, -1 }, /* (31) compound_term ::= group */
2110  { 33, -1 }, /* (32) compound_term ::= near_expr */
2111  { 33, -1 }, /* (33) compound_term ::= adj_expr */
2112  { 33, -3 }, /* (34) compound_term ::= BRA expr KET */
2113  { 33, -2 }, /* (35) compound_term ::= SYNONYM TERM */
2114  { 33, -1 }, /* (36) compound_term ::= CJKTERM */
2115  { 34, -1 }, /* (37) phrase ::= TERM */
2116  { 34, -1 }, /* (38) phrase ::= CJKTERM */
2117  { 34, -2 }, /* (39) phrase ::= phrase TERM */
2118  { 34, -2 }, /* (40) phrase ::= phrase CJKTERM */
2119  { 35, -2 }, /* (41) phrased_term ::= TERM PHR_TERM */
2120  { 35, -2 }, /* (42) phrased_term ::= phrased_term PHR_TERM */
2121  { 36, -2 }, /* (43) group ::= TERM GROUP_TERM */
2122  { 36, -2 }, /* (44) group ::= group GROUP_TERM */
2123  { 36, -2 }, /* (45) group ::= group EMPTY_GROUP_OK */
2124  { 37, -3 }, /* (46) near_expr ::= TERM NEAR TERM */
2125  { 37, -3 }, /* (47) near_expr ::= near_expr NEAR TERM */
2126  { 38, -3 }, /* (48) adj_expr ::= TERM ADJ TERM */
2127  { 38, -3 }, /* (49) adj_expr ::= adj_expr ADJ TERM */
2128  { 26, -1 }, /* (50) expr ::= prob_expr */
2129  { 28, -1 }, /* (51) bool_arg ::= expr */
2130  { 27, -1 }, /* (52) prob_expr ::= term */
2131  { 31, -1 }, /* (53) stop_prob ::= prob */
2132  { 32, -1 }, /* (54) stop_term ::= compound_term */
2133  { 30, -1 }, /* (55) term ::= compound_term */
2134 };
2135 
2136 static void yy_accept(yyParser*); /* Forward Declaration */
2137 
2138 /*
2139 ** Perform a reduce action and the shift that must immediately
2140 ** follow the reduce.
2141 **
2142 ** The yyLookahead and yyLookaheadToken parameters provide reduce actions
2143 ** access to the lookahead token (if any). The yyLookahead will be YYNOCODE
2144 ** if the lookahead token has already been consumed. As this procedure is
2145 ** only called from one place, optimizing compilers will in-line it, which
2146 ** means that the extra parameters have no performance impact.
2147 */
2148 static void yy_reduce(
2149  yyParser *yypParser, /* The parser */
2150  unsigned int yyruleno, /* Number of the rule by which to reduce */
2151  int yyLookahead, /* Lookahead token, or YYNOCODE if none */
2152  ParseTOKENTYPE yyLookaheadToken /* Value of the lookahead token */
2153 ){
2154  int yygoto; /* The next state */
2155  int yyact; /* The next action */
2156  yyStackEntry *yymsp; /* The top of the parser's stack */
2157  int yysize; /* Amount to pop the stack */
2159  (void)yyLookahead;
2160  (void)yyLookaheadToken;
2161  yymsp = &yypParser->yystack.back();
2162 #ifdef XAPIAN_DEBUG_LOG
2163  if( yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
2164  yysize = yyRuleInfo[yyruleno].nrhs;
2165  if( yysize ){
2166  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2167  ParseRuleName(yyruleno) << "], go to state " <<
2168  yymsp[yysize].stateno);
2169  } else {
2170  LOGLINE(QUERYPARSER, "Reduce " << yyruleno << " [" <<
2171  ParseRuleName(yyruleno) << "].");
2172  }
2173  }
2174 #endif /* XAPIAN_DEBUG_LOG */
2175  /* yygotominor = yyzerominor; */
2176 
2177  /* Check that the stack is large enough to grow by a single entry
2178  ** if the RHS of the rule is empty. This ensures that there is room
2179  ** enough on the stack to push the LHS value without invalidating
2180  ** pointers into the stack. */
2181  if( yyRuleInfo[yyruleno].nrhs==0 ){
2182 #if 1
2183  yypParser->yystack.resize(yypParser->yystack.size() + 1);
2184  yymsp = &(yypParser->yystack.back()) - 1;
2185 #else
2186 #ifdef YYTRACKMAXSTACKDEPTH
2187  if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){
2188  yypParser->yyhwm++;
2189  Assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack));
2190  }
2191 #endif
2192 #if YYSTACKDEPTH>0
2193  if( yypParser->yytos>=yypParser->yystackEnd ){
2194  yyStackOverflow(yypParser);
2195  return;
2196  }
2197 #else
2198  if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){
2199  if( yyGrowStack(yypParser) ){
2200  yyStackOverflow(yypParser);
2201  return;
2202  }
2203  yymsp = yypParser->yytos;
2204  }
2205 #endif
2206 #endif
2207  }
2208 
2209  switch( yyruleno ){
2210  /* Beginning here are the reduction cases. A typical example
2211  ** follows:
2212  ** case 0:
2213  ** #line <lineno> <grammarfile>
2214  ** { ... } // User supplied code
2215  ** #line <lineno> <thisfile>
2216  ** break;
2217  */
2218 /********** Begin reduce actions **********************************************/
2219  YYMINORTYPE yylhsminor;
2220  case 0: /* query ::= expr */
2221 #line 1858 "queryparser/queryparser.lemony"
2222 {
2223  // Save the parsed query in the State structure so we can return it.
2224  if (yymsp[0].minor.yy39) {
2225  state->query = *yymsp[0].minor.yy39;
2226  delete yymsp[0].minor.yy39;
2227  } else {
2228  state->query = Query();
2229  }
2230 }
2231 #line 2232 "queryparser/queryparser_internal.cc"
2232  break;
2233  case 1: /* query ::= */
2234 #line 1868 "queryparser/queryparser.lemony"
2235 {
2236  // Handle a query string with no terms in.
2237  state->query = Query();
2238 }
2239 #line 2240 "queryparser/queryparser_internal.cc"
2240  break;
2241  case 2: /* expr ::= bool_arg AND bool_arg */
2242 #line 1880 "queryparser/queryparser.lemony"
2243 {
2244  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "AND");
2245  *yymsp[-2].minor.yy39 &= *yymsp[0].minor.yy39;
2246  delete yymsp[0].minor.yy39;
2247 }
2248 #line 2249 "queryparser/queryparser_internal.cc"
2249  yy_destructor(yypParser,4,&yymsp[-1].minor);
2250  break;
2251  case 3: /* expr ::= bool_arg NOT bool_arg */
2252 #line 1886 "queryparser/queryparser.lemony"
2253 {
2254  // 'NOT foo' -> '<alldocuments> NOT foo'
2255  if (!yymsp[-2].minor.yy39 && (state->flags & QueryParser::FLAG_PURE_NOT)) {
2256  yymsp[-2].minor.yy39 = new Query("", 1, 0);
2257  }
2258  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "NOT");
2259  *yymsp[-2].minor.yy39 &= ~*yymsp[0].minor.yy39;
2260  delete yymsp[0].minor.yy39;
2261 }
2262 #line 2263 "queryparser/queryparser_internal.cc"
2263  yy_destructor(yypParser,5,&yymsp[-1].minor);
2264  break;
2265  case 4: /* expr ::= bool_arg AND NOT bool_arg */
2266 #line 1896 "queryparser/queryparser.lemony"
2267 {
2268  VET_BOOL_ARGS(yymsp[-3].minor.yy39, yymsp[0].minor.yy39, "AND NOT");
2269  *yymsp[-3].minor.yy39 &= ~*yymsp[0].minor.yy39;
2270  delete yymsp[0].minor.yy39;
2271 }
2272 #line 2273 "queryparser/queryparser_internal.cc"
2273  yy_destructor(yypParser,4,&yymsp[-2].minor);
2274  yy_destructor(yypParser,5,&yymsp[-1].minor);
2275  break;
2276  case 5: /* expr ::= bool_arg AND HATE_AFTER_AND bool_arg */
2277 #line 1902 "queryparser/queryparser.lemony"
2278 {
2279  VET_BOOL_ARGS(yymsp[-3].minor.yy39, yymsp[0].minor.yy39, "AND");
2280  *yymsp[-3].minor.yy39 &= ~*yymsp[0].minor.yy39;
2281  delete yymsp[0].minor.yy39;
2282 }
2283 #line 2284 "queryparser/queryparser_internal.cc"
2284  yy_destructor(yypParser,4,&yymsp[-2].minor);
2285  yy_destructor(yypParser,10,&yymsp[-1].minor);
2286  break;
2287  case 6: /* expr ::= bool_arg OR bool_arg */
2288 #line 1908 "queryparser/queryparser.lemony"
2289 {
2290  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "OR");
2291  *yymsp[-2].minor.yy39 |= *yymsp[0].minor.yy39;
2292  delete yymsp[0].minor.yy39;
2293 }
2294 #line 2295 "queryparser/queryparser_internal.cc"
2295  yy_destructor(yypParser,2,&yymsp[-1].minor);
2296  break;
2297  case 7: /* expr ::= bool_arg XOR bool_arg */
2298 #line 1914 "queryparser/queryparser.lemony"
2299 {
2300  VET_BOOL_ARGS(yymsp[-2].minor.yy39, yymsp[0].minor.yy39, "XOR");
2301  *yymsp[-2].minor.yy39 ^= *yymsp[0].minor.yy39;
2302  delete yymsp[0].minor.yy39;
2303 }
2304 #line 2305 "queryparser/queryparser_internal.cc"
2305  yy_destructor(yypParser,3,&yymsp[-1].minor);
2306  break;
2307  case 8: /* bool_arg ::= */
2308 #line 1927 "queryparser/queryparser.lemony"
2309 {
2310  // Set the argument to NULL, which enables the bool_arg-using rules in
2311  // expr above to report uses of AND, OR, etc which don't have two
2312  // arguments.
2313  yymsp[1].minor.yy39 = NULL;
2314 }
2315 #line 2316 "queryparser/queryparser_internal.cc"
2316  break;
2317  case 9: /* prob_expr ::= prob */
2318 #line 1939 "queryparser/queryparser.lemony"
2319 {
2320  yylhsminor.yy39 = yymsp[0].minor.yy40->query;
2321  yymsp[0].minor.yy40->query = NULL;
2322  // Handle any "+ terms".
2323  if (yymsp[0].minor.yy40->love) {
2324  if (yymsp[0].minor.yy40->love->empty()) {
2325  // +<nothing>.
2326  delete yylhsminor.yy39;
2327  yylhsminor.yy39 = yymsp[0].minor.yy40->love;
2328  } else if (yylhsminor.yy39) {
2329  swap(yylhsminor.yy39, yymsp[0].minor.yy40->love);
2330  add_to_query(yylhsminor.yy39, Query::OP_AND_MAYBE, yymsp[0].minor.yy40->love);
2331  } else {
2332  yylhsminor.yy39 = yymsp[0].minor.yy40->love;
2333  }
2334  yymsp[0].minor.yy40->love = NULL;
2335  }
2336  // Handle any boolean filters.
2337  if (!yymsp[0].minor.yy40->filter.empty()) {
2338  if (yylhsminor.yy39) {
2339  add_to_query(yylhsminor.yy39, Query::OP_FILTER, yymsp[0].minor.yy40->merge_filters());
2340  } else {
2341  // Make the query a boolean one.
2342  yylhsminor.yy39 = new Query(Query::OP_SCALE_WEIGHT, yymsp[0].minor.yy40->merge_filters(), 0.0);
2343  }
2344  }
2345  // Handle any "- terms".
2346  if (yymsp[0].minor.yy40->hate && !yymsp[0].minor.yy40->hate->empty()) {
2347  if (!yylhsminor.yy39) {
2348  // Can't just hate!
2349  yy_parse_failed(yypParser);
2350  return;
2351  }
2352  *yylhsminor.yy39 = Query(Query::OP_AND_NOT, *yylhsminor.yy39, *yymsp[0].minor.yy40->hate);
2353  }
2354  delete yymsp[0].minor.yy40;
2355 }
2356 #line 2357 "queryparser/queryparser_internal.cc"
2357  yymsp[0].minor.yy39 = yylhsminor.yy39;
2358  break;
2359  case 10: /* prob ::= RANGE */
2360 #line 1987 "queryparser/queryparser.lemony"
2361 {
2362  string grouping = yymsp[0].minor.yy0->name;
2363  const Query & range = yymsp[0].minor.yy0->as_range_query();
2364  yymsp[0].minor.yy40 = new ProbQuery; /*P-overwrites-R*/
2365  yymsp[0].minor.yy40->add_filter_range(grouping, range);
2366 }
2367 #line 2368 "queryparser/queryparser_internal.cc"
2368  break;
2369  case 11: /* prob ::= stop_prob RANGE */
2370 #line 1994 "queryparser/queryparser.lemony"
2371 {
2372  string grouping = yymsp[0].minor.yy0->name;
2373  const Query & range = yymsp[0].minor.yy0->as_range_query();
2374  yymsp[-1].minor.yy40->append_filter_range(grouping, range);
2375 }
2376 #line 2377 "queryparser/queryparser_internal.cc"
2377  break;
2378  case 12: /* prob ::= stop_term stop_term */
2379 #line 2000 "queryparser/queryparser.lemony"
2380 {
2381  yymsp[-1].minor.yy40 = new ProbQuery(yymsp[-1].minor.yy39); /*P-overwrites-T*/
2382  if (yymsp[0].minor.yy39) {
2383  Query::op op = state->default_op();
2384  if (yymsp[-1].minor.yy40->query && is_positional(op)) {
2385  // If default_op is OP_NEAR or OP_PHRASE, set the window size to
2386  // 11 for the first pair of terms and it will automatically grow
2387  // by one for each subsequent term.
2388  Query * subqs[2] = { yymsp[-1].minor.yy40->query, yymsp[0].minor.yy39 };
2389  *(yymsp[-1].minor.yy40->query) = Query(op, subqs, subqs + 2, 11);
2390  delete yymsp[0].minor.yy39;
2391  } else {
2392  add_to_query(yymsp[-1].minor.yy40->query, op, yymsp[0].minor.yy39);
2393  }
2394  }
2395 }
2396 #line 2397 "queryparser/queryparser_internal.cc"
2397  break;
2398  case 13: /* prob ::= prob stop_term */
2399 #line 2017 "queryparser/queryparser.lemony"
2400 {
2401  // If yymsp[0].minor.yy39 is a stopword, there's nothing to do here.
2402  if (yymsp[0].minor.yy39) add_to_query(yymsp[-1].minor.yy40->query, state->default_op(), yymsp[0].minor.yy39);
2403 }
2404 #line 2405 "queryparser/queryparser_internal.cc"
2405  break;
2406  case 14: /* prob ::= LOVE term */
2407 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2408 #line 2022 "queryparser/queryparser.lemony"
2409 {
2410  yymsp[-1].minor.yy40 = new ProbQuery;
2411  if (state->default_op() == Query::OP_AND) {
2412  yymsp[-1].minor.yy40->query = yymsp[0].minor.yy39;
2413  } else {
2414  yymsp[-1].minor.yy40->love = yymsp[0].minor.yy39;
2415  }
2416 }
2417 #line 2418 "queryparser/queryparser_internal.cc"
2418 }
2419  break;
2420  case 15: /* prob ::= stop_prob LOVE term */
2421 #line 2031 "queryparser/queryparser.lemony"
2422 {
2423  if (state->default_op() == Query::OP_AND) {
2424  /* The default op is AND, so we just put loved terms into the query
2425  * (in this case the only effect of love is to ignore the stopword
2426  * list). */
2427  add_to_query(yymsp[-2].minor.yy40->query, Query::OP_AND, yymsp[0].minor.yy39);
2428  } else {
2429  add_to_query(yymsp[-2].minor.yy40->love, Query::OP_AND, yymsp[0].minor.yy39);
2430  }
2431 }
2432 #line 2433 "queryparser/queryparser_internal.cc"
2433  yy_destructor(yypParser,8,&yymsp[-1].minor);
2434  break;
2435  case 16: /* prob ::= HATE term */
2436 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2437 #line 2042 "queryparser/queryparser.lemony"
2438 {
2439  yymsp[-1].minor.yy40 = new ProbQuery;
2440  yymsp[-1].minor.yy40->hate = yymsp[0].minor.yy39;
2441 }
2442 #line 2443 "queryparser/queryparser_internal.cc"
2443 }
2444  break;
2445  case 17: /* prob ::= stop_prob HATE term */
2446 #line 2047 "queryparser/queryparser.lemony"
2447 {
2448  add_to_query(yymsp[-2].minor.yy40->hate, Query::OP_OR, yymsp[0].minor.yy39);
2449 }
2450 #line 2451 "queryparser/queryparser_internal.cc"
2451  yy_destructor(yypParser,9,&yymsp[-1].minor);
2452  break;
2453  case 18: /* prob ::= HATE BOOLEAN_FILTER */
2454 { yy_destructor(yypParser,9,&yymsp[-1].minor);
2455 #line 2051 "queryparser/queryparser.lemony"
2456 {
2457  yymsp[-1].minor.yy40 = new ProbQuery;
2458  yymsp[-1].minor.yy40->hate = new Query(yymsp[0].minor.yy0->get_query());
2459  delete yymsp[0].minor.yy0;
2460 }
2461 #line 2462 "queryparser/queryparser_internal.cc"
2462 }
2463  break;
2464  case 19: /* prob ::= stop_prob HATE BOOLEAN_FILTER */
2465 #line 2057 "queryparser/queryparser.lemony"
2466 {
2467  add_to_query(yymsp[-2].minor.yy40->hate, Query::OP_OR, yymsp[0].minor.yy0->get_query());
2468  delete yymsp[0].minor.yy0;
2469 }
2470 #line 2471 "queryparser/queryparser_internal.cc"
2471  yy_destructor(yypParser,9,&yymsp[-1].minor);
2472  break;
2473  case 20: /* prob ::= BOOLEAN_FILTER */
2474 #line 2062 "queryparser/queryparser.lemony"
2475 {
2476  yylhsminor.yy40 = new ProbQuery;
2477  yylhsminor.yy40->add_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2478  delete yymsp[0].minor.yy0;
2479 }
2480 #line 2481 "queryparser/queryparser_internal.cc"
2481  yymsp[0].minor.yy40 = yylhsminor.yy40;
2482  break;
2483  case 21: /* prob ::= stop_prob BOOLEAN_FILTER */
2484 #line 2068 "queryparser/queryparser.lemony"
2485 {
2486  yymsp[-1].minor.yy40->append_filter(yymsp[0].minor.yy0->get_grouping(), yymsp[0].minor.yy0->get_query());
2487  delete yymsp[0].minor.yy0;
2488 }
2489 #line 2490 "queryparser/queryparser_internal.cc"
2490  break;
2491  case 22: /* prob ::= LOVE BOOLEAN_FILTER */
2492 { yy_destructor(yypParser,8,&yymsp[-1].minor);
2493 #line 2073 "queryparser/queryparser.lemony"
2494 {
2495  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2496  yymsp[-1].minor.yy40 = new ProbQuery;
2497  yymsp[-1].minor.yy40->filter[yymsp[0].minor.yy0->get_grouping()] = yymsp[0].minor.yy0->get_query();
2498  delete yymsp[0].minor.yy0;
2499 }
2500 #line 2501 "queryparser/queryparser_internal.cc"
2501 }
2502  break;
2503  case 23: /* prob ::= stop_prob LOVE BOOLEAN_FILTER */
2504 #line 2080 "queryparser/queryparser.lemony"
2505 {
2506  // LOVE BOOLEAN_FILTER(yymsp[0].minor.yy0) is just the same as BOOLEAN_FILTER
2507  // We OR filters with the same prefix...
2508  Query & q = yymsp[-2].minor.yy40->filter[yymsp[0].minor.yy0->get_grouping()];
2509  q |= yymsp[0].minor.yy0->get_query();
2510  delete yymsp[0].minor.yy0;
2511 }
2512 #line 2513 "queryparser/queryparser_internal.cc"
2513  yy_destructor(yypParser,8,&yymsp[-1].minor);
2514  break;
2515  case 24: /* stop_prob ::= stop_term */
2516 #line 2095 "queryparser/queryparser.lemony"
2517 {
2518  yymsp[0].minor.yy40 = new ProbQuery(yymsp[0].minor.yy39); /*P-overwrites-T*/
2519 }
2520 #line 2521 "queryparser/queryparser_internal.cc"
2521  break;
2522  case 25: /* stop_term ::= TERM */
2523 #line 2108 "queryparser/queryparser.lemony"
2524 {
2525  if (state->is_stopword(yymsp[0].minor.yy0)) {
2526  yylhsminor.yy39 = NULL;
2527  state->add_to_stoplist(yymsp[0].minor.yy0);
2528  } else {
2529  yylhsminor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2530  }
2531  delete yymsp[0].minor.yy0;
2532 }
2533 #line 2534 "queryparser/queryparser_internal.cc"
2534  yymsp[0].minor.yy39 = yylhsminor.yy39;
2535  break;
2536  case 26: /* term ::= TERM */
2537 #line 2125 "queryparser/queryparser.lemony"
2538 {
2539  yylhsminor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2540  delete yymsp[0].minor.yy0;
2541 }
2542 #line 2543 "queryparser/queryparser_internal.cc"
2543  yymsp[0].minor.yy39 = yylhsminor.yy39;
2544  break;
2545  case 27: /* compound_term ::= WILD_TERM */
2546 #line 2140 "queryparser/queryparser.lemony"
2547 { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_wildcarded_query(state); /*T-overwrites-U*/ }
2548 #line 2549 "queryparser/queryparser_internal.cc"
2549  break;
2550  case 28: /* compound_term ::= PARTIAL_TERM */
2551 #line 2143 "queryparser/queryparser.lemony"
2552 { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_partial_query(state); /*T-overwrites-U*/ }
2553 #line 2554 "queryparser/queryparser_internal.cc"
2554  break;
2555  case 29: /* compound_term ::= QUOTE phrase QUOTE */
2556 { yy_destructor(yypParser,19,&yymsp[-2].minor);
2557 #line 2146 "queryparser/queryparser.lemony"
2558 { yymsp[-2].minor.yy39 = yymsp[-1].minor.yy32->as_phrase_query(); }
2559 #line 2560 "queryparser/queryparser_internal.cc"
2560  yy_destructor(yypParser,19,&yymsp[0].minor);
2561 }
2562  break;
2563  case 30: /* compound_term ::= phrased_term */
2564 #line 2149 "queryparser/queryparser.lemony"
2565 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_phrase_query(); /*T-overwrites-P*/ }
2566 #line 2567 "queryparser/queryparser_internal.cc"
2567  break;
2568  case 31: /* compound_term ::= group */
2569 #line 2152 "queryparser/queryparser.lemony"
2570 { yymsp[0].minor.yy39 = yymsp[0].minor.yy14->as_group(state); /*T-overwrites-P*/ }
2571 #line 2572 "queryparser/queryparser_internal.cc"
2572  break;
2573  case 32: /* compound_term ::= near_expr */
2574 #line 2155 "queryparser/queryparser.lemony"
2575 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_near_query(); /*T-overwrites-P*/ }
2576 #line 2577 "queryparser/queryparser_internal.cc"
2577  break;
2578  case 33: /* compound_term ::= adj_expr */
2579 #line 2158 "queryparser/queryparser.lemony"
2580 { yymsp[0].minor.yy39 = yymsp[0].minor.yy32->as_adj_query(); /*T-overwrites-P*/ }
2581 #line 2582 "queryparser/queryparser_internal.cc"
2582  break;
2583  case 34: /* compound_term ::= BRA expr KET */
2584 { yy_destructor(yypParser,20,&yymsp[-2].minor);
2585 #line 2161 "queryparser/queryparser.lemony"
2586 { yymsp[-2].minor.yy39 = yymsp[-1].minor.yy39; }
2587 #line 2588 "queryparser/queryparser_internal.cc"
2588  yy_destructor(yypParser,21,&yymsp[0].minor);
2589 }
2590  break;
2591  case 35: /* compound_term ::= SYNONYM TERM */
2592 { yy_destructor(yypParser,11,&yymsp[-1].minor);
2593 #line 2163 "queryparser/queryparser.lemony"
2594 {
2595  yymsp[-1].minor.yy39 = new Query(yymsp[0].minor.yy0->get_query_with_synonyms());
2596  delete yymsp[0].minor.yy0;
2597 }
2598 #line 2599 "queryparser/queryparser_internal.cc"
2599 }
2600  break;
2601  case 36: /* compound_term ::= CJKTERM */
2602 #line 2168 "queryparser/queryparser.lemony"
2603 {
2604  { yymsp[0].minor.yy39 = yymsp[0].minor.yy0->as_cjk_query(); /*T-overwrites-U*/ }
2605 }
2606 #line 2607 "queryparser/queryparser_internal.cc"
2607  break;
2608  case 37: /* phrase ::= TERM */
2609 #line 2178 "queryparser/queryparser.lemony"
2610 {
2611  yylhsminor.yy32 = Terms::create(state);
2612  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2613 }
2614 #line 2615 "queryparser/queryparser_internal.cc"
2615  yymsp[0].minor.yy32 = yylhsminor.yy32;
2616  break;
2617  case 38: /* phrase ::= CJKTERM */
2618 #line 2183 "queryparser/queryparser.lemony"
2619 {
2620  yylhsminor.yy32 = Terms::create(state);
2621  yymsp[0].minor.yy0->as_positional_cjk_term(yylhsminor.yy32);
2622 }
2623 #line 2624 "queryparser/queryparser_internal.cc"
2624  yymsp[0].minor.yy32 = yylhsminor.yy32;
2625  break;
2626  case 39: /* phrase ::= phrase TERM */
2627  case 42: /* phrased_term ::= phrased_term PHR_TERM */ yytestcase(yyruleno==42);
2628 #line 2188 "queryparser/queryparser.lemony"
2629 {
2630  yymsp[-1].minor.yy32->add_positional_term(yymsp[0].minor.yy0);
2631 }
2632 #line 2633 "queryparser/queryparser_internal.cc"
2633  break;
2634  case 40: /* phrase ::= phrase CJKTERM */
2635 #line 2192 "queryparser/queryparser.lemony"
2636 {
2637  yymsp[0].minor.yy0->as_positional_cjk_term(yymsp[-1].minor.yy32);
2638 }
2639 #line 2640 "queryparser/queryparser_internal.cc"
2640  break;
2641  case 41: /* phrased_term ::= TERM PHR_TERM */
2642 #line 2203 "queryparser/queryparser.lemony"
2643 {
2644  yylhsminor.yy32 = Terms::create(state);
2645  yylhsminor.yy32->add_positional_term(yymsp[-1].minor.yy0);
2646  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2647 }
2648 #line 2649 "queryparser/queryparser_internal.cc"
2649  yymsp[-1].minor.yy32 = yylhsminor.yy32;
2650  break;
2651  case 43: /* group ::= TERM GROUP_TERM */
2652 #line 2219 "queryparser/queryparser.lemony"
2653 {
2654  yymsp[-1].minor.yy14 = TermGroup::create(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); /*P-overwrites-T*/
2655 }
2656 #line 2657 "queryparser/queryparser_internal.cc"
2657  break;
2658  case 44: /* group ::= group GROUP_TERM */
2659 #line 2223 "queryparser/queryparser.lemony"
2660 {
2661  yymsp[-1].minor.yy14->add_term(yymsp[0].minor.yy0);
2662 }
2663 #line 2664 "queryparser/queryparser_internal.cc"
2664  break;
2665  case 45: /* group ::= group EMPTY_GROUP_OK */
2666 #line 2227 "queryparser/queryparser.lemony"
2667 {
2668  yymsp[-1].minor.yy14->set_empty_ok();
2669 }
2670 #line 2671 "queryparser/queryparser_internal.cc"
2671  yy_destructor(yypParser,23,&yymsp[0].minor);
2672  break;
2673  case 46: /* near_expr ::= TERM NEAR TERM */
2674  case 48: /* adj_expr ::= TERM ADJ TERM */ yytestcase(yyruleno==48);
2675 #line 2237 "queryparser/queryparser.lemony"
2676 {
2677  yylhsminor.yy32 = Terms::create(state);
2678  yylhsminor.yy32->add_positional_term(yymsp[-2].minor.yy0);
2679  yylhsminor.yy32->add_positional_term(yymsp[0].minor.yy0);
2680  if (yymsp[-1].minor.yy0) {
2681  yylhsminor.yy32->adjust_window(yymsp[-1].minor.yy0->get_termpos());
2682  delete yymsp[-1].minor.yy0;
2683  }
2684 }
2685 #line 2686 "queryparser/queryparser_internal.cc"
2686  yymsp[-2].minor.yy32 = yylhsminor.yy32;
2687  break;
2688  case 47: /* near_expr ::= near_expr NEAR TERM */
2689  case 49: /* adj_expr ::= adj_expr ADJ TERM */ yytestcase(yyruleno==49);
2690 #line 2247 "queryparser/queryparser.lemony"
2691 {
2692  yymsp[-2].minor.yy32->add_positional_term(yymsp[0].minor.yy0);
2693  if (yymsp[-1].minor.yy0) {
2694  yymsp[-2].minor.yy32->adjust_window(yymsp[-1].minor.yy0->get_termpos());
2695  delete yymsp[-1].minor.yy0;
2696  }
2697 }
2698 #line 2699 "queryparser/queryparser_internal.cc"
2699  break;
2700  default:
2701  /* (50) expr ::= prob_expr (OPTIMIZED OUT) */ Assert(yyruleno!=50);
2702  /* (51) bool_arg ::= expr */ yytestcase(yyruleno==51);
2703  /* (52) prob_expr ::= term (OPTIMIZED OUT) */ Assert(yyruleno!=52);
2704  /* (53) stop_prob ::= prob */ yytestcase(yyruleno==53);
2705  /* (54) stop_term ::= compound_term */ yytestcase(yyruleno==54);
2706  /* (55) term ::= compound_term */ yytestcase(yyruleno==55);
2707  break;
2708 /********** End reduce actions ************************************************/
2709  }
2710  Assert( yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
2711  yygoto = yyRuleInfo[yyruleno].lhs;
2712  yysize = yyRuleInfo[yyruleno].nrhs;
2713  yyact = yy_find_reduce_action(yymsp[yysize].stateno,static_cast<YYCODETYPE>(yygoto));
2714 
2715  /* There are no SHIFTREDUCE actions on nonterminals because the table
2716  ** generator has simplified them to pure REDUCE actions. */
2717  Assert( !(yyact>YY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) );
2718 
2719  /* It is not possible for a REDUCE to be followed by an error */
2720  Assert( yyact!=YY_ERROR_ACTION );
2721 
2722  yymsp += yysize+1;
2723  if (yysize) {
2724  yypParser->yystack.resize(yypParser->yystack.size() + yysize+1);
2725  }
2726  yymsp->stateno = static_cast<YYACTIONTYPE>(yyact);
2727  yymsp->major = static_cast<YYCODETYPE>(yygoto);
2728  yyTraceShift(yypParser, yyact, "... then shift");
2729 }
2730 
2731 /*
2732 ** The following code executes when the parse fails
2733 */
2734 #ifndef YYNOERRORRECOVERY
2735 static void yy_parse_failed(
2736  yyParser *yypParser /* The parser */
2737 ){
2739  LOGLINE(QUERYPARSER, "Fail!");
2740  while( yypParser->yystack.size() > 1 ) yy_pop_parser_stack(yypParser);
2741  /* Here code is inserted which will be executed whenever the
2742  ** parser fails */
2743 /************ Begin %parse_failure code ***************************************/
2744 #line 1805 "queryparser/queryparser.lemony"
2745 
2746  // If we've not already set an error message, set a default one.
2747  if (!state->error) state->error = "parse error";
2748 #line 2749 "queryparser/queryparser_internal.cc"
2749 /************ End %parse_failure code *****************************************/
2750  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2751 }
2752 #endif /* YYNOERRORRECOVERY */
2753 
2754 /*
2755 ** The following code executes when a syntax error first occurs.
2756 */
2757 static void yy_syntax_error(
2758  yyParser *yypParser, /* The parser */
2759  int yymajor, /* The major type of the error token */
2760  ParseTOKENTYPE yyminor /* The minor type of the error token */
2761 ){
2763  (void)yymajor;
2764  (void)yyminor;
2765 #define TOKEN yyminor
2766 /************ Begin %syntax_error code ****************************************/
2767 #line 1810 "queryparser/queryparser.lemony"
2768 
2769  yy_parse_failed(yypParser);
2770 #line 2771 "queryparser/queryparser_internal.cc"
2771 /************ End %syntax_error code ******************************************/
2772  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2773 }
2774 
2775 /*
2776 ** The following is executed when the parser accepts
2777 */
2778 static void yy_accept(
2779  yyParser *yypParser /* The parser */
2780 ){
2782  LOGLINE(QUERYPARSER, "Accept!");
2783 #ifndef YYNOERRORRECOVERY
2784  yypParser->yyerrcnt = -1;
2785 #endif
2786  AssertEq( yypParser->yystack.size(), 1 );
2787  /* Here code is inserted which will be executed whenever the
2788  ** parser accepts */
2789 /*********** Begin %parse_accept code *****************************************/
2790 /*********** End %parse_accept code *******************************************/
2791  ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */
2792 }
2793 
2794 /* The main parser program.
2795 ** The first argument is a pointer to a structure obtained from
2796 ** "ParseAlloc" which describes the current state of the parser.
2797 ** The second argument is the major token number. The third is
2798 ** the minor token. The fourth optional argument is whatever the
2799 ** user wants (and specified in the grammar) and is available for
2800 ** use by the action routines.
2801 **
2802 ** Inputs:
2803 ** <ul>
2804 ** <li> A pointer to the parser (an opaque structure.)
2805 ** <li> The major token number.
2806 ** <li> The minor token number.
2807 ** <li> An option argument of a grammar-specified type.
2808 ** </ul>
2809 **
2810 ** Outputs:
2811 ** None.
2812 */
2813 static
2814 void Parse(
2815  yyParser *yypParser, /* The parser */
2816  int yymajor, /* The major token code number */
2817  ParseTOKENTYPE yyminor /* The value for the token */
2818  ParseARG_PDECL /* Optional %extra_argument parameter */
2819 ){
2820  YYMINORTYPE yyminorunion;
2821  unsigned int yyact; /* The parser action. */
2822 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
2823  int yyendofinput; /* True if we are at the end of input */
2824 #endif
2825 #ifdef YYERRORSYMBOL
2826  int yyerrorhit = 0; /* True if yymajor has invoked an error */
2827 #endif
2828 
2829 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
2830  yyendofinput = (yymajor==0);
2831 #endif
2833 
2834 #ifdef XAPIAN_DEBUG_LOG
2835  {
2836  int stateno = yypParser->yystack.back().stateno;
2837  if( stateno < YY_MIN_REDUCE ){
2838  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
2839  "'," << (yyminor ? yyminor->name : "<<null>>") <<
2840  "in state " << stateno);
2841  }else{
2842  LOGLINE(QUERYPARSER, "Input '" << ParseTokenName(yymajor) <<
2843  "'," << (yyminor ? yyminor->name : "<<null>>") <<
2844  "with pending reduce " << stateno-YY_MIN_REDUCE);
2845  }
2846  }
2847 #endif
2848 
2849  do{
2850  yyact = yy_find_shift_action(yypParser,static_cast<YYCODETYPE>(yymajor));
2851  if( yyact >= YY_MIN_REDUCE ){
2852  yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,yyminor);
2853  }else if( yyact <= YY_MAX_SHIFTREDUCE ){
2854  yy_shift(yypParser,yyact,yymajor,yyminor);
2855 #ifndef YYNOERRORRECOVERY
2856  yypParser->yyerrcnt--;
2857 #endif
2858  yymajor = YYNOCODE;
2859  }else if( yyact==YY_ACCEPT_ACTION ){
2860  yypParser->yystack.pop_back();
2861  yy_accept(yypParser);
2862  return;
2863  }else{
2864  Assert( yyact == YY_ERROR_ACTION );
2865  yyminorunion.yy0 = yyminor;
2866 #ifdef YYERRORSYMBOL
2867  int yymx;
2868 #endif
2869  LOGLINE(QUERYPARSER, "Syntax Error!");
2870 #ifdef YYERRORSYMBOL
2871  /* A syntax error has occurred.
2872  ** The response to an error depends upon whether or not the
2873  ** grammar defines an error token "ERROR".
2874  **
2875  ** This is what we do if the grammar does define ERROR:
2876  **
2877  ** * Call the %syntax_error function.
2878  **
2879  ** * Begin popping the stack until we enter a state where
2880  ** it is legal to shift the error symbol, then shift
2881  ** the error symbol.
2882  **
2883  ** * Set the error count to three.
2884  **
2885  ** * Begin accepting and shifting new tokens. No new error
2886  ** processing will occur until three tokens have been
2887  ** shifted successfully.
2888  **
2889  */
2890  if( yypParser->yyerrcnt<0 ){
2891  yy_syntax_error(yypParser,yymajor,yyminor);
2892  }
2893  yymx = yypParser->yystack.back().major;
2894  if( yymx==YYERRORSYMBOL || yyerrorhit ){
2895  LOGLINE(QUERYPARSER, "Discard input token " << ParseTokenName(yymajor));
2896  yy_destructor(yypParser, static_cast<YYCODETYPE>(yymajor), &yyminorunion);
2897  yymajor = YYNOCODE;
2898  }else{
2899  while( !yypParser->yystack.empty()
2900  && yymx != YYERRORSYMBOL
2901  && (yyact = yy_find_reduce_action(
2902  yypParser->yystack.back().stateno,
2903  YYERRORSYMBOL)) >= YY_MIN_REDUCE
2904  ){
2905  yy_pop_parser_stack(yypParser);
2906  }
2907  if( yypParser->yystack.empty() || yymajor==0 ){
2908  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2909  yy_parse_failed(yypParser);
2910 #ifndef YYNOERRORRECOVERY
2911  yypParser->yyerrcnt = -1;
2912 #endif
2913  yymajor = YYNOCODE;
2914  }else if( yymx!=YYERRORSYMBOL ){
2915  yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
2916  }
2917  }
2918  yypParser->yyerrcnt = 3;
2919  yyerrorhit = 1;
2920 #elif defined(YYNOERRORRECOVERY)
2921  /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
2922  ** do any kind of error recovery. Instead, simply invoke the syntax
2923  ** error routine and continue going as if nothing had happened.
2924  **
2925  ** Applications can set this macro (for example inside %include) if
2926  ** they intend to abandon the parse upon the first syntax error seen.
2927  */
2928  yy_syntax_error(yypParser,yymajor, yyminor);
2929  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2930  yymajor = YYNOCODE;
2931 
2932 #else /* YYERRORSYMBOL is not defined */
2933  /* This is what we do if the grammar does not define ERROR:
2934  **
2935  ** * Report an error message, and throw away the input token.
2936  **
2937  ** * If the input token is $, then fail the parse.
2938  **
2939  ** As before, subsequent error messages are suppressed until
2940  ** three input tokens have been successfully shifted.
2941  */
2942  if( yypParser->yyerrcnt<=0 ){
2943  yy_syntax_error(yypParser,yymajor, yyminor);
2944  }
2945  yypParser->yyerrcnt = 3;
2946  yy_destructor(yypParser,static_cast<YYCODETYPE>(yymajor),&yyminorunion);
2947  if( yyendofinput ){
2948  yy_parse_failed(yypParser);
2949 #ifndef YYNOERRORRECOVERY
2950  yypParser->yyerrcnt = -1;
2951 #endif
2952  }
2953  yymajor = YYNOCODE;
2954 #endif
2955  }
2956  }while( yymajor!=YYNOCODE && yypParser->yystack.size() > 1 );
2957 #ifdef XAPIAN_DEBUG_LOG
2958  {
2959  int i;
2960  LOGLINE(QUERYPARSER, "Return. Stack=");
2961  for(i=1; i<=(int)yypParser->yystack.size(); i++)
2962  LOGLINE(QUERYPARSER, yyTokenName[yypParser->yystack[i].major]);
2963  }
2964 #endif
2965  return;
2966 }
2967 
2968 // Select C++ syntax highlighting in vim editor: vim: syntax=cpp
2969 #line 799 "queryparser/queryparser.lemony"
2970 
2971 
2972 Query
2973 QueryParser::Internal::parse_query(const string &qs, unsigned flags,
2974  const string &default_prefix)
2975 {
2976  bool cjk_ngram = (flags & FLAG_CJK_NGRAM) || CJK::is_cjk_enabled();
2977 
2978  // Set ranges if we may have to handle ranges in the query.
2979  bool ranges = !rangeprocs.empty() && (qs.find("..") != string::npos);
2980 
2981  termpos term_pos = 1;
2982  Utf8Iterator it(qs), end;
2983 
2984  State state(this, flags);
2985 
2986  // To successfully apply more than one spelling correction to a query
2987  // string, we must keep track of the offset due to previous corrections.
2988  int correction_offset = 0;
2989  corrected_query.resize(0);
2990 
2991  // Stack of prefixes, used for phrases and subexpressions.
2992  list<const FieldInfo *> prefix_stack;
2993 
2994  // If default_prefix is specified, use it. Otherwise, use any list
2995  // that has been set for the empty prefix.
2996  const FieldInfo def_pfx(NON_BOOLEAN, default_prefix);
2997  {
2998  const FieldInfo * default_field_info = &def_pfx;
2999  if (default_prefix.empty()) {
3000  auto f = field_map.find(string());
3001  if (f != field_map.end()) default_field_info = &(f->second);
3002  }
3003 
3004  // We always have the current prefix on the top of the stack.
3005  prefix_stack.push_back(default_field_info);
3006  }
3007 
3008  yyParser parser;
3009 
3010  unsigned newprev = ' ';
3011 main_lex_loop:
3012  enum {
3013  DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP,
3014  IN_GROUP2, EXPLICIT_SYNONYM
3015  } mode = DEFAULT;
3016  while (it != end && !state.error) {
3017  bool last_was_operator = false;
3018  bool last_was_operator_needing_term = false;
3019  if (mode == EXPLICIT_SYNONYM) mode = DEFAULT;
3020  if (false) {
3021 just_had_operator:
3022  if (it == end) break;
3023  mode = DEFAULT;
3024  last_was_operator_needing_term = false;
3025  last_was_operator = true;
3026  }
3027  if (false) {
3028 just_had_operator_needing_term:
3029  last_was_operator_needing_term = true;
3030  last_was_operator = true;
3031  }
3032  if (mode == IN_PHRASED_TERM) mode = DEFAULT;
3033  if (is_whitespace(*it)) {
3034  newprev = ' ';
3035  ++it;
3036  it = find_if(it, end, is_not_whitespace);
3037  if (it == end) break;
3038  }
3039 
3040  if (ranges &&
3041  (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2)) {
3042  // Scan forward to see if this could be the "start of range"
3043  // token. Sadly this has O(n²) tendencies, though at least
3044  // "n" is the number of words in a query which is likely to
3045  // remain fairly small. FIXME: can we tokenise more elegantly?
3046  Utf8Iterator it_initial = it;
3047  Utf8Iterator p = it;
3048  unsigned ch = 0;
3049  while (p != end) {
3050  if (ch == '.' && *p == '.') {
3051  string a;
3052  while (it != p) {
3053  Unicode::append_utf8(a, *it++);
3054  }
3055  // Trim off the trailing ".".
3056  a.resize(a.size() - 1);
3057  ++p;
3058  // Either end of the range can be empty (for an open-ended
3059  // range) but both can't be empty.
3060  if (!a.empty() || (p != end && *p > ' ' && *p != ')')) {
3061  string b;
3062  // Allow any character except whitespace and ')' in the
3063  // upper bound.
3064  while (p != end && *p > ' ' && *p != ')') {
3065  Unicode::append_utf8(b, *p++);
3066  }
3067  Term * range = state.range(a, b);
3068  if (!range) {
3069  state.error = "Unknown range operation";
3070  if (a.find(':', 1) == string::npos) {
3071  goto done;
3072  }
3073  // Might be a boolean filter with ".." in. Leave
3074  // state.error in case it isn't.
3075  it = it_initial;
3076  break;
3077  }
3078  Parse(&parser, RANGE, range, &state);
3079  }
3080  it = p;
3081  goto main_lex_loop;
3082  }
3083  ch = *p;
3084  // Allow any character except whitespace and '(' in the lower
3085  // bound.
3086  if (ch <= ' ' || ch == '(') break;
3087  ++p;
3088  }
3089  }
3090 
3091  if (!is_wordchar(*it)) {
3092  unsigned prev = newprev;
3093  unsigned ch = *it++;
3094  newprev = ch;
3095  // Drop out of IN_GROUP mode.
3096  if (mode == IN_GROUP || mode == IN_GROUP2)
3097  mode = DEFAULT;
3098  switch (ch) {
3099  case '"':
3100  case 0x201c: // Left curly double quote.
3101  case 0x201d: // Right curly double quote.
3102  // Quoted phrase.
3103  if (mode == DEFAULT) {
3104  // Skip whitespace.
3105  it = find_if(it, end, is_not_whitespace);
3106  if (it == end) {
3107  // Ignore an unmatched " at the end of the query to
3108  // avoid generating an empty pair of QUOTEs which will
3109  // cause a parse error.
3110  goto done;
3111  }
3112  if (is_double_quote(*it)) {
3113  // Ignore empty "" (but only if we're not already
3114  // IN_QUOTES as we don't merge two adjacent quoted
3115  // phrases!)
3116  newprev = *it++;
3117  break;
3118  }
3119  }
3120  if (flags & QueryParser::FLAG_PHRASE) {
3121  if (ch == '"' && it != end && *it == '"') {
3122  ++it;
3123  // Handle "" inside a quoted phrase as an escaped " for
3124  // consistency with quoted boolean terms.
3125  break;
3126  }
3127  Parse(&parser, QUOTE, NULL, &state);
3128  if (mode == DEFAULT) {
3129  mode = IN_QUOTES;
3130  } else {
3131  // Remove the prefix we pushed for this phrase.
3132  if (mode == IN_PREFIXED_QUOTES)
3133  prefix_stack.pop_back();
3134  mode = DEFAULT;
3135  }
3136  }
3137  break;
3138 
3139  case '+': case '-': // Loved or hated term/phrase/subexpression.
3140  // Ignore + or - at the end of the query string.
3141  if (it == end) goto done;
3142  if (prev > ' ' && prev != '(') {
3143  // Or if not after whitespace or an open bracket.
3144  break;
3145  }
3146  if (is_whitespace(*it) || *it == '+' || *it == '-') {
3147  // Ignore + or - followed by a space, or further + or -.
3148  // Postfix + (such as in C++ and H+) is handled as part of
3149  // the term lexing code in parse_term().
3150  newprev = *it++;
3151  break;
3152  }
3153  if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
3154  int token;
3155  if (ch == '+') {
3156  token = LOVE;
3157  } else if (last_was_operator) {
3158  token = HATE_AFTER_AND;
3159  } else {
3160  token = HATE;
3161  }
3162  Parse(&parser, token, NULL, &state);
3163  goto just_had_operator_needing_term;
3164  }
3165  // Need to prevent the term after a LOVE or HATE starting a
3166  // term group...
3167  break;
3168 
3169  case '(': // Bracketed subexpression.
3170  // Skip whitespace.
3171  it = find_if(it, end, is_not_whitespace);
3172  // Ignore ( at the end of the query string.
3173  if (it == end) goto done;
3174  if (prev > ' ' && strchr("()+-", prev) == NULL) {
3175  // Or if not after whitespace or a bracket or '+' or '-'.
3176  break;
3177  }
3178  if (*it == ')') {
3179  // Ignore empty ().
3180  newprev = *it++;
3181  break;
3182  }
3183  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3184  prefix_stack.push_back(prefix_stack.back());
3185  Parse(&parser, BRA, NULL, &state);
3186  }
3187  break;
3188 
3189  case ')': // End of bracketed subexpression.
3190  if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3191  // Remove the prefix we pushed for the corresponding BRA.
3192  // If brackets are unmatched, it's a syntax error, but
3193  // that's no excuse to SEGV!
3194  if (prefix_stack.size() > 1) prefix_stack.pop_back();
3195  Parse(&parser, KET, NULL, &state);
3196  }
3197  break;
3198 
3199  case '~': // Synonym expansion.
3200  // Ignore at the end of the query string.
3201  if (it == end) goto done;
3202  if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
3203  if (prev > ' ' && strchr("+-(", prev) == NULL) {
3204  // Or if not after whitespace, +, -, or an open bracket.
3205  break;
3206  }
3207  if (!is_wordchar(*it)) {
3208  // Ignore if not followed by a word character.
3209  break;
3210  }
3211  Parse(&parser, SYNONYM, NULL, &state);
3212  mode = EXPLICIT_SYNONYM;
3213  goto just_had_operator_needing_term;
3214  }
3215  break;
3216  }
3217  // Skip any other characters.
3218  continue;
3219  }
3220 
3221  Assert(is_wordchar(*it));
3222 
3223  size_t term_start_index = it.raw() - qs.data();
3224 
3225  newprev = 'A'; // Any letter will do...
3226 
3227  // A term, a prefix, or a boolean operator.
3228  const FieldInfo * field_info = NULL;
3229  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2 || mode == EXPLICIT_SYNONYM) &&
3230  !field_map.empty()) {
3231  // Check for a fieldname prefix (e.g. title:historical).
3232  Utf8Iterator p = find_if(it, end, is_not_wordchar);
3233  if (p != end && *p == ':' && ++p != end && *p > ' ' && *p != ')') {
3234  string field;
3235  p = it;
3236  while (*p != ':')
3237  Unicode::append_utf8(field, *p++);
3238  map<string, FieldInfo>::const_iterator f;
3239  f = field_map.find(field);
3240  if (f != field_map.end()) {
3241  // Special handling for prefixed fields, depending on the
3242  // type of the prefix.
3243  unsigned ch = *++p;
3244  field_info = &(f->second);
3245 
3246  if (field_info->type != NON_BOOLEAN) {
3247  // Drop out of IN_GROUP if we're in it.
3248  if (mode == IN_GROUP || mode == IN_GROUP2)
3249  mode = DEFAULT;
3250  it = p;
3251  string name;
3252  if (it != end && is_double_quote(*it)) {
3253  // Quoted boolean term (can contain any character).
3254  bool fancy = (*it != '"');
3255  ++it;
3256  while (it != end) {
3257  if (*it == '"') {
3258  // Interpret "" as an escaped ".
3259  if (++it == end || *it != '"')
3260  break;
3261  } else if (fancy && is_double_quote(*it)) {
3262  // If the opening quote was ASCII, then the
3263  // closing one must be too - otherwise
3264  // the user can't protect non-ASCII double
3265  // quote characters by quoting or escaping.
3266  ++it;
3267  break;
3268  }
3269  Unicode::append_utf8(name, *it++);
3270  }
3271  } else {
3272  // Can't boolean filter prefix a subexpression, so
3273  // just use anything following the prefix until the
3274  // next space or ')' as part of the boolean filter
3275  // term.
3276  while (it != end && *it > ' ' && *it != ')')
3277  Unicode::append_utf8(name, *it++);
3278  }
3279  // Build the unstemmed form in field.
3280  field += ':';
3281  field += name;
3282  // Clear any pending range error.
3283  state.error = NULL;
3284  Term * token = new Term(&state, name, field_info, field);
3285  Parse(&parser, BOOLEAN_FILTER, token, &state);
3286  continue;
3287  }
3288 
3289  if ((flags & FLAG_PHRASE) && is_double_quote(ch)) {
3290  // Prefixed phrase, e.g.: subject:"space flight"
3291  mode = IN_PREFIXED_QUOTES;
3292  Parse(&parser, QUOTE, NULL, &state);
3293  it = p;
3294  newprev = ch;
3295  ++it;
3296  prefix_stack.push_back(field_info);
3297  continue;
3298  }
3299 
3300  if (ch == '(' && (flags & FLAG_BOOLEAN)) {
3301  // Prefixed subexpression, e.g.: title:(fast NEAR food)
3302  mode = DEFAULT;
3303  Parse(&parser, BRA, NULL, &state);
3304  it = p;
3305  newprev = ch;
3306  ++it;
3307  prefix_stack.push_back(field_info);
3308  continue;
3309  }
3310 
3311  if (ch != ':') {
3312  // Allow 'path:/usr/local' but not 'foo::bar::baz'.
3313  while (is_phrase_generator(ch)) {
3314  if (++p == end)
3315  goto not_prefix;
3316  ch = *p;
3317  }
3318  }
3319 
3320  if (is_wordchar(ch)) {
3321  // Prefixed term.
3322  it = p;
3323  } else {
3324 not_prefix:
3325  // It looks like a prefix but isn't, so parse it as
3326  // text instead.
3327  field_info = NULL;
3328  }
3329  }
3330  }
3331  }
3332 
3333 phrased_term:
3334  bool was_acronym;
3335  bool is_cjk_term = false;
3336  string term = parse_term(it, end, cjk_ngram, is_cjk_term, was_acronym);
3337 
3338  if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) &&
3339  (flags & FLAG_BOOLEAN) &&
3340  // Don't want to interpret A.N.D. as an AND operator.
3341  !was_acronym &&
3342  !field_info &&
3343  term.size() >= 2 && term.size() <= 4 && U_isalpha(term[0])) {
3344  // Boolean operators.
3345  string op = term;
3346  if (flags & FLAG_BOOLEAN_ANY_CASE) {
3347  for (string::iterator i = op.begin(); i != op.end(); ++i) {
3348  *i = C_toupper(*i);
3349  }
3350  }
3351  if (op.size() == 3) {
3352  if (op == "AND") {
3353  Parse(&parser, AND, NULL, &state);
3354  goto just_had_operator;
3355  }
3356  if (op == "NOT") {
3357  Parse(&parser, NOT, NULL, &state);
3358  goto just_had_operator;
3359  }
3360  if (op == "XOR") {
3361  Parse(&parser, XOR, NULL, &state);
3362  goto just_had_operator;
3363  }
3364  if (op == "ADJ") {
3365  if (it != end && *it == '/') {
3366  size_t width = 0;
3367  Utf8Iterator p = it;
3368  while (++p != end && U_isdigit(*p)) {
3369  width = (width * 10) + (*p - '0');
3370  }
3371  if (width && (p == end || is_whitespace(*p))) {
3372  it = p;
3373  Parse(&parser, ADJ, new Term(width), &state);
3374  goto just_had_operator;
3375  }
3376  } else {
3377  Parse(&parser, ADJ, NULL, &state);
3378  goto just_had_operator;
3379  }
3380  }
3381  } else if (op.size() == 2) {
3382  if (op == "OR") {
3383  Parse(&parser, OR, NULL, &state);
3384  goto just_had_operator;
3385  }
3386  } else if (op.size() == 4) {
3387  if (op == "NEAR") {
3388  if (it != end && *it == '/') {
3389  size_t width = 0;
3390  Utf8Iterator p = it;
3391  while (++p != end && U_isdigit(*p)) {
3392  width = (width * 10) + (*p - '0');
3393  }
3394  if (width && (p == end || is_whitespace(*p))) {
3395  it = p;
3396  Parse(&parser, NEAR, new Term(width), &state);
3397  goto just_had_operator;
3398  }
3399  } else {
3400  Parse(&parser, NEAR, NULL, &state);
3401  goto just_had_operator;
3402  }
3403  }
3404  }
3405  }
3406 
3407  // If no prefix is set, use the default one.
3408  if (!field_info) field_info = prefix_stack.back();
3409 
3410  Assert(field_info->type == NON_BOOLEAN);
3411 
3412  {
3413  string unstemmed_term(term);
3414  term = Unicode::tolower(term);
3415 
3416  // Reuse stem_strategy - STEM_SOME here means "stem terms except
3417  // when used with positional operators".
3418  stem_strategy stem_term = stem_action;
3419  if (stem_term != STEM_NONE) {
3420  if (stemmer.is_none()) {
3421  stem_term = STEM_NONE;
3422  } else if (stem_term == STEM_SOME ||
3423  stem_term == STEM_SOME_FULL_POS) {
3424  if (!should_stem(unstemmed_term) ||
3425  (it != end && is_stem_preventer(*it))) {
3426  // Don't stem this particular term.
3427  stem_term = STEM_NONE;
3428  }
3429  }
3430  }
3431 
3432  Term * term_obj = new Term(&state, term, field_info,
3433  unstemmed_term, stem_term, term_pos++);
3434 
3435  if (is_cjk_term) {
3436  Parse(&parser, CJKTERM, term_obj, &state);
3437  if (it == end) break;
3438  continue;
3439  }
3440 
3441  if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3442  if (it != end) {
3443  if ((flags & FLAG_WILDCARD) && *it == '*') {
3444  Utf8Iterator p(it);
3445  ++p;
3446  if (p == end || !is_wordchar(*p)) {
3447  it = p;
3448  if (mode == IN_GROUP || mode == IN_GROUP2) {
3449  // Drop out of IN_GROUP and flag that the group
3450  // can be empty if all members are stopwords.
3451  if (mode == IN_GROUP2)
3452  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3453  mode = DEFAULT;
3454  }
3455  // Wildcard at end of term (also known as
3456  // "right truncation").
3457  Parse(&parser, WILD_TERM, term_obj, &state);
3458  continue;
3459  }
3460  }
3461  } else {
3462  if (flags & FLAG_PARTIAL) {
3463  if (mode == IN_GROUP || mode == IN_GROUP2) {
3464  // Drop out of IN_GROUP and flag that the group
3465  // can be empty if all members are stopwords.
3466  if (mode == IN_GROUP2)
3467  Parse(&parser, EMPTY_GROUP_OK, NULL, &state);
3468  mode = DEFAULT;
3469  }
3470  // Final term of a partial match query, with no
3471  // following characters - treat as a wildcard.
3472  Parse(&parser, PARTIAL_TERM, term_obj, &state);
3473  continue;
3474  }
3475  }
3476  }
3477 
3478  // Check spelling, if we're a normal term, and any of the prefixes
3479  // are empty.
3480  if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
3481  const auto& prefixes = field_info->prefixes;
3482  for (const string& prefix : prefixes) {
3483  if (!prefix.empty())
3484  continue;
3485  const string & suggest = db.get_spelling_suggestion(term);
3486  if (!suggest.empty()) {
3487  if (corrected_query.empty()) corrected_query = qs;
3488  size_t term_end_index = it.raw() - qs.data();
3489  size_t n = term_end_index - term_start_index;
3490  size_t pos = term_start_index + correction_offset;
3491  corrected_query.replace(pos, n, suggest);
3492  correction_offset += suggest.size();
3493  correction_offset -= n;
3494  }
3495  break;
3496  }
3497  }
3498 
3499  if (mode == IN_PHRASED_TERM) {
3500  Parse(&parser, PHR_TERM, term_obj, &state);
3501  } else {
3502  // See if the next token will be PHR_TERM - if so, this one
3503  // needs to be TERM not GROUP_TERM.
3504  if ((mode == IN_GROUP || mode == IN_GROUP2) &&
3505  is_phrase_generator(*it)) {
3506  // FIXME: can we clean this up?
3507  Utf8Iterator p = it;
3508  do {
3509  ++p;
3510  } while (p != end && is_phrase_generator(*p));
3511  // Don't generate a phrase unless the phrase generators are
3512  // immediately followed by another term.
3513  if (p != end && is_wordchar(*p)) {
3514  mode = DEFAULT;
3515  }
3516  }
3517 
3518  int token = TERM;
3519  if (mode == IN_GROUP || mode == IN_GROUP2) {
3520  mode = IN_GROUP2;
3521  token = GROUP_TERM;
3522  }
3523  Parse(&parser, token, term_obj, &state);
3524  if (token == TERM && mode != DEFAULT)
3525  continue;
3526  }
3527  }
3528 
3529  if (it == end) break;
3530 
3531  if (is_phrase_generator(*it)) {
3532  // Skip multiple phrase generators.
3533  do {
3534  ++it;
3535  } while (it != end && is_phrase_generator(*it));
3536  // Don't generate a phrase unless the phrase generators are
3537  // immediately followed by another term.
3538  if (it != end && is_wordchar(*it)) {
3539  mode = IN_PHRASED_TERM;
3540  term_start_index = it.raw() - qs.data();
3541  goto phrased_term;
3542  }
3543  } else if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3544  int old_mode = mode;
3545  mode = DEFAULT;
3546  if (!last_was_operator_needing_term && is_whitespace(*it)) {
3547  newprev = ' ';
3548  // Skip multiple whitespace.
3549  do {
3550  ++it;
3551  } while (it != end && is_whitespace(*it));
3552  // Don't generate a group unless the terms are only separated
3553  // by whitespace.
3554  if (it != end && is_wordchar(*it)) {
3555  if (old_mode == IN_GROUP || old_mode == IN_GROUP2) {
3556  mode = IN_GROUP2;
3557  } else {
3558  mode = IN_GROUP;
3559  }
3560  }
3561  }
3562  }
3563  }
3564 done:
3565  if (!state.error) {
3566  // Implicitly close any unclosed quotes.
3567  if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
3568  Parse(&parser, QUOTE, NULL, &state);
3569 
3570  // Implicitly close all unclosed brackets.
3571  while (prefix_stack.size() > 1) {
3572  Parse(&parser, KET, NULL, &state);
3573  prefix_stack.pop_back();
3574  }
3575  Parse(&parser, 0, NULL, &state);
3576  }
3577 
3578  errmsg = state.error;
3579  return state.query;
3580 }
3581 
3582 #line 3583 "queryparser/queryparser_internal.cc"
static void yy_pop_parser_stack(yyParser *pParser)
Unicode and UTF-8 related classes and functions.
bool is_none() const
Return true if this is a no-op stemmer.
Definition: stem.h:166
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
static unsigned int yy_find_shift_action(yyParser *pParser, YYCODETYPE iLookAhead)
void append_utf8(std::string &s, unsigned ch)
Append the UTF-8 representation of a single Unicode character to a std::string.
Definition: unicode.h:332
#define Assert(COND)
Definition: omassert.h:122
ParseARG_SDECL vector< yyStackEntry > yystack
#define XOR
bool is_stopword(const Term *term) const
bool operator==(const SynonymIterator &o) const
Letter, modifier (Lm)
Definition: unicode.h:225
YYCODETYPE lhs
bool U_isalpha(unsigned ch)
size_t stoplist_size() const
Query get_query() const
const char * raw() const
Return the raw const char* pointer for the current position.
Definition: unicode.h:54
#define AssertEq(A, B)
Definition: omassert.h:124
unsigned tolower(unsigned ch)
Convert a Unicode character to lowercase.
Definition: unicode.h:376
signed char nrhs
#define YY_MIN_REDUCE
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount_diff difference_type
Database get_database() const
Letter, other (Lo)
Definition: unicode.h:226
ProbQuery(Query *query_)
Xapian::Query internals.
#define YY_SHIFT_MAX
#define true
Definition: header.h:8
Xapian::TermIterator i
#define BOOLEAN_FILTER
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
bool is_digit(unsigned ch)
static void Parse(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor ParseARG_PDECL)
QueryParser::Internal * qpi
void need_positions()
bool U_isupper(unsigned ch)
#define YY_SHIFT_MIN
#define YYACTIONTYPE
void stoplist_resize(size_t s)
op
Query operators.
Definition: query.h:78
bool is_currency(unsigned ch)
Test if a given Unicode character is a currency symbol.
Definition: unicode.h:371
Number, decimal digit (Nd)
Definition: unicode.h:230
#define ParseARG_SDECL
void append_filter(const string &grouping, const Query &qnew)
#define YY_REDUCE_COUNT
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
bool is_cjk_enabled()
Should we use the CJK n-gram code?
Query * as_group(State *state) const
Convert to a Xapian::Query * using default_op.
void append_filter_range(const string &grouping, const Query &range)
#define GROUP_TERM
STL namespace.
Xapian::TermIterator synonyms_end(const std::string &) const
Corresponding end iterator to synonyms_begin(term).
Definition: database.h:443
Convert types to std::string.
#define AND
Query get_query_with_auto_synonyms() const
const char * error
const Xapian::Query operator*() const
Query opwindow_subq(Query::op op, const vector< Query > &v, Xapian::termcount w) const
void add_filter_range(const string &grouping, const Query &range)
void as_positional_cjk_term(Terms *terms) const
Handle a CJK character string in a positional context.
void add_positional_term(Term *term)
Add an unstemmed Term object to this Terms object.
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:49
#define ParseARG_STORE
Xapian::Query & reference
#define false
Definition: header.h:9
static Xapian::Stem stemmer
Definition: stemtest.cc:41
#define CJKTERM
Query * as_partial_query(State *state_) const
Build a query for a term at the very end of the query string when FLAG_PARTIAL is in use...
bool codepoint_is_cjk(unsigned codepoint)
The non-lemon-generated parts of the QueryParser class.
Query as_range_query() const
Range query.
#define PHR_TERM
#define yytestcase(X)
#define NOT
Query * as_adj_query() const
Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
Xapian::Query * pointer
std::input_iterator_tag iterator_category
#define ADJ
char C_toupper(char ch)
Definition: stringutils.h:183
bool U_isdigit(unsigned ch)
#define HATE_AFTER_AND
Letter, lowercase (Ll)
Definition: unicode.h:223
Term * range(const string &a, const string &b)
Query get_query_with_synonyms() const
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
static void yy_parse_failed(yyParser *)
string unstemmed
#define RANGE
Term(const string &name_, const FieldInfo *field_info_)
void add_term(Term *term)
Add a Term object to this TermGroup object.
static void ParseFinalize(yyParser *pParser)
#define YY_MAX_SHIFTREDUCE
Term(const string &name_)
#define ParseARG_PDECL
Letter, titlecase (Lt)
Definition: unicode.h:224
Information about how to handle a field prefix in the query string.
Query * as_near_query() const
Convert to a Xapian::Query * using OP_NEAR.
const FieldInfo * field_info
static void yy_shift(yyParser *yypParser, int yyNewState, int yyMajor, ParseTOKENTYPE yyMinor)
Base class for field processors.
Definition: queryparser.h:729
Query * as_wildcarded_query(State *state) const
struct yyParser yyParser
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
#define YYCODETYPE
bool is_double_quote(unsigned ch)
SynonymIterator(const Xapian::TermIterator &i_, Xapian::termpos pos_=0, const Xapian::Query *first_=NULL)
Terms(bool no_pos)
#define yyTraceShift(X, Y, Z)
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:925
#define YY_SHIFT_COUNT
static void ParseInit(yyParser *pParser)
Match only documents where a value slot is >= a given value.
Definition: query.h:223
unsigned flags
Xapian::TermIterator synonym_keys_begin(const std::string &prefix=std::string()) const
An iterator which returns all terms which have synonyms.
Definition: omdatabase.cc:740
#define QUOTE
static const YYACTIONTYPE yy_default[]
const unsigned UNICODE_IGNORE
Value representing "ignore this" when returned by check_infix() or check_infix_digit().
#define NEAR
size_t window
Window size.
bool is_phrase_generator(unsigned ch)
#define SYNONYM
const vector< string > * prefixes
The list of prefixes of the terms added.
Parser State shared between the lexer and the parser.
Match only documents where a value slot is within a given range.
Definition: query.h:158
#define WILD_TERM
Xapian::TermIterator synonym_keys_end(const std::string &=std::string()) const
Corresponding end iterator to synonym_keys_begin(prefix).
Definition: database.h:455
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Xapian::Query value_type
vector< string > prefixes
Field prefix strings.
Match only documents where a value slot is <= a given value.
Definition: query.h:231
Term(const string &name_, termpos pos_)
TermGroup(Term *t1, Term *t2)
Tokenise CJK text as n-grams.
void add_to_unstem(const string &term, const string &unstemmed)
vector< Term * > terms
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
Construct an invalid query.
Definition: query.h:263
static int yy_find_reduce_action(int stateno, YYCODETYPE iLookAhead)
unsigned check_infix_digit(unsigned ch)
#define YY_MAX_SHIFT
Query merge_filters() const
bool is_stem_preventer(unsigned ch)
Term(const Xapian::Query &q, const string &grouping)
void add_filter(const string &grouping, const Query &q)
Some terms which form a positional sub-query.
Iterator returning unigrams and bigrams.
Definition: cjk-tokenizer.h:56
Xapian::TermIterator synonyms_begin(const std::string &term) const
An iterator which returns all the synonyms for a given term.
Definition: omdatabase.cc:722
#define YY_ERROR_ACTION
bool is_positional(Xapian::Query::op op)
A group of terms separated only by whitespace.
#define LOVE
bool operator!=(const SynonymIterator &o) const
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
static void yy_syntax_error(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor)
#define YY_ACTTAB_COUNT
bool should_stem(const string &term)
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:38
bool empty_ok
Controls how to handle a group where all terms are stopwords.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
int get_max_partial_type() const
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:79
SynonymIterator & operator++()
#define YYNTOKEN
yyStackEntry(YYACTIONTYPE stateno_, YYCODETYPE major_, ParseTOKENTYPE minor_)
bool is_wordchar(unsigned ch)
Test if a given Unicode character is "word character".
Definition: unicode.h:343
bool prefix_needs_colon(const string &prefix, unsigned ch)
#define TERM
char name[9]
Definition: dbcheck.cc:55
#define HATE
Term(State *state_, const string &name_, const FieldInfo *field_info_, const string &unstemmed_, QueryParser::stem_strategy stem_=QueryParser::STEM_NONE, termpos pos_=0)
int get_max_wildcard_type() const
Match only documents where all subqueries match near each other.
Definition: query.h:140
vector< Term * > terms
void add_to_stoplist(const Term *term)
string get_grouping() const
#define KET
void adjust_window(size_t alternative_window)
map< string, Query > filter
Value returned by get_type() for a term.
Definition: query.h:266
#define PARTIAL_TERM
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:524
bool is_suffix(unsigned ch)
#define YYNSTATE
static void yy_reduce(yyParser *yypParser, unsigned int yyruleno, int yyLookahead, ParseTOKENTYPE yyLookaheadToken)
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
Xapian::termcount get_max_wildcard_expansion() const
Various handy helpers which std::string really should provide.
bool is_whitespace(unsigned ch)
Test if a given Unicode character is a whitespace character.
Definition: unicode.h:361
#define YYSTACKDEPTH
#define EMPTY_GROUP_OK
Xapian::termcount get_max_partial_expansion() const
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
bool is_not_wordchar(unsigned ch)
op get_type() const
Get the type of the top level of the query.
Definition: query.cc:212
category get_category(int info)
Definition: unicode.h:271
Xapian::valueno get_slot() const
static const YYCODETYPE yy_lookahead[]
Definition: header.h:151
multimap< string, string > unstem
const Xapian::Query * first
static void add_to_query(Query *&q, Query::op op, Query *term)
QueryParser::stem_strategy stem
void get_cjk(Xapian::Utf8Iterator &it)
static const unsigned short int yy_shift_ofst[]
Various assertion macros.
ParseTOKENTYPE yy0
bool uniform_prefixes
Keep track of whether the terms added all have the same list of prefixes.
#define OR
#define LOGLINE(a, b)
Definition: debuglog.h:483
bool is_not_whitespace(unsigned ch)
static const YYACTIONTYPE yy_action[]
#define YY_MIN_SHIFTREDUCE
Class representing a query.
Definition: query.h:46
Query * as_cjk_query() const
Build a query for a string of CJK characters.
#define BRA
void set_empty_ok()
Set the empty_ok flag.
termpos get_termpos() const
#define ParseARG_FETCH
Query::op default_op() const
static TermGroup * create(Term *t1, Term *t2)
Factory function - ensures heap allocation.
string make_term(const string &prefix) const
static const struct @13 yyRuleInfo[]
static Terms * create(State *state)
Factory function - ensures heap allocation.
unsigned check_infix(unsigned ch)
Xapian::Internal::opt_intrusive_ptr< const Stopper > stopper
#define YY_ACCEPT_ACTION
Class used to pass information about a token from lexer to parser.
#define YYNOCODE
static void yy_accept(yyParser *)
static void yy_destructor(yyParser *yypParser, YYCODETYPE yymajor, YYMINORTYPE *yypminor)
Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const
Convert to a query using the given operator and window size.
string stem_term(const string &term)
Debug logging macros.
Query * as_phrase_query() const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
Query::op effective_default_op
#define VET_BOOL_ARGS(A, B, OP_TXT)
State(QueryParser::Internal *qpi_, unsigned flags_)
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
static const short yy_reduce_ofst[]
Term(termpos pos_)
filter_type type
The type of this field.
const Stopper * get_stopper() const
#define ParseTOKENTYPE