29 #line 1 "queryparser/queryparser.lemony"
74 #include <string_view>
78 #define Parse_ENGINEALWAYSONSTACK
89 return ch < 128 && C_isupper(static_cast<unsigned char>(ch));
94 return ch < 128 && C_isdigit(static_cast<unsigned char>(ch));
99 return ch < 128 && C_isalpha(static_cast<unsigned char>(ch));
128 return ch ==
'+' || ch ==
'#';
139 return ch ==
'"' || ch == 0x201c || ch == 0x201d;
145 if (!
U_isupper(ch) && ch !=
':')
return false;
146 string::size_type len = prefix.length();
147 return (len > 1 && prefix[len - 1] !=
':');
180 explicit Term(
const string &name_)
183 :
name(name_), field_info(field_info_),
187 const string &unstemmed_,
191 : state(state_),
name(name_), field_info(field_info_),
192 unstemmed(unstemmed_), stem(stem_),
pos(pos_),
193 edit_distance(edit_distance_) { }
198 string make_term(
const string & prefix)
const;
201 if (stem == QueryParser::STEM_SOME) stem = QueryParser::STEM_NONE;
212 Query * as_wildcarded_query(
State * state)
const;
222 Query * as_partial_query(
State * state_)
const;
225 Query* as_unbroken_query()
const;
228 void as_positional_unbroken(
Terms* terms)
const;
231 Query as_range_query()
const;
233 Query get_query()
const;
235 Query get_query_with_synonyms()
const;
237 Query get_query_with_auto_synonyms()
const;
246 const char* error = NULL;
248 unsigned int should_stem_mask =
256 : qpi(qpi_), flags(flags_), effective_default_op(qpi_->default_op)
258 if ((flags & QueryParser::FLAG_NO_PROPER_NOUN_HEURISTIC) ||
263 if ((flags & QueryParser::FLAG_NO_POSITIONS)) {
265 effective_default_op = Query::OP_AND;
279 qpi->
unstem.insert(make_pair(
term, unstemmed));
292 if (i.default_grouping) {
297 return new Term(range_query,
str(slot));
301 return new Term(range_query, i.grouping);
303 return new Term(range_query,
string());
310 return effective_default_op;
373 if (state->get_stopper_strategy() == QueryParser::STOP_ALL) {
375 if (stopper && (*stopper)(
name)) {
381 if (stem != QueryParser::STEM_NONE && stem != QueryParser::STEM_ALL)
383 if (!prefix.empty()) {
387 if (stem != QueryParser::STEM_NONE) {
393 if (!unstemmed.empty())
394 state->add_to_unstem(
term, unstemmed);
410 : i(i_),
pos(pos_), first(first_) { }
421 if (first)
return *first;
426 return i == o.
i && first == o.
first;
430 return !(*
this == o);
444 const auto& prefixes = field_info->prefixes;
445 if (prefixes.empty()) {
447 return (*field_info->proc)(
name);
450 Query q = get_query();
452 for (
auto&& prefix : prefixes) {
455 if (!prefix.empty()) {
464 if (syn == end && stem != QueryParser::STEM_NONE) {
467 if (!prefix.empty()) {
485 const unsigned MASK_ENABLE_AUTO_SYNONYMS =
486 QueryParser::FLAG_AUTO_SYNONYMS |
487 QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS;
488 if (state->flags & MASK_ENABLE_AUTO_SYNONYMS)
489 return get_query_with_synonyms();
499 if (op == Query::OP_OR) {
501 }
else if (op == Query::OP_AND) {
516 if (op == Query::OP_OR) {
518 }
else if (op == Query::OP_AND) {
531 const auto& prefixes = field_info->prefixes;
532 if (prefixes.empty()) {
534 return (*field_info->proc)(
name);
536 auto piter = prefixes.begin();
537 const string&
term = make_term(*piter);
540 while (++piter != prefixes.end()) {
541 q |=
Query(make_term(*piter), 1,
pos);
549 const auto& prefixes = field_info->prefixes;
553 subqs.reserve(prefixes.size());
554 for (
auto&& prefix : prefixes) {
556 subqs.emplace_back(Query::OP_EDIT_DISTANCE,
564 Query* q =
new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
572 const auto& prefixes = field_info->prefixes;
575 if (state_->
flags & QueryParser::FLAG_WILDCARD_SINGLE)
576 query_flags |= Query::WILDCARD_PATTERN_SINGLE;
577 if (state_->
flags & QueryParser::FLAG_WILDCARD_MULTI)
578 query_flags |= Query::WILDCARD_PATTERN_MULTI;
580 subqs.reserve(prefixes.size());
581 for (
string root : prefixes) {
584 subqs.push_back(
Query(Query::OP_WILDCARD, root, max, query_flags,
587 Query * q =
new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
597 vector<Query> subqs_partial;
598 vector<Query> subqs_full;
600 for (
const string& prefix : field_info->prefixes) {
601 string root = prefix;
604 subqs_partial.push_back(
Query(Query::OP_WILDCARD, root, max, max_type,
606 if (!state->is_stopword(
this)) {
609 subqs_full.push_back(
Query(make_term(prefix), 1,
pos));
613 Query(Query::OP_SYNONYM,
614 subqs_partial.begin(), subqs_partial.end()),
615 Query(Query::OP_SYNONYM,
616 subqs_full.begin(), subqs_full.end()));
624 const auto& prefixes = field_info->prefixes;
626 vector<Query> prefix_subqs;
629 if (state->flags & QueryParser::FLAG_WORD_BREAKS) {
630 for (WordIterator tk(
name); tk != WordIterator(); ++tk) {
631 const string& token = *tk;
632 for (
const string& prefix : prefixes) {
633 prefix_subqs.push_back(
Query(prefix + token, 1,
pos));
637 q =
new Query(Query::OP_AND, prefix_subqs.begin(), prefix_subqs.end());
644 vector<Query> ngram_subqs;
646 for (
const string& prefix : prefixes) {
648 ngram_subqs.push_back(
Query(prefix + *tk, 1,
pos));
650 prefix_subqs.push_back(
Query(Query::OP_AND,
651 ngram_subqs.begin(), ngram_subqs.end()));
654 q =
new Query(Query::OP_OR, prefix_subqs.begin(), prefix_subqs.end());
674 return (ch && ch < 128 && strchr(
".-/:\\@", ch) != NULL);
680 return (ch && ch < 128 && strchr(
"(/\\@<>=*[{\"", ch) != NULL);
696 if (ch ==
'\'' || ch ==
'&' || ch == 0xb7 || ch == 0x5f4 || ch == 0x2027) {
708 if (ch == 0x2019 || ch == 0x201b)
713 if (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff)
738 if (ch >= 0x200b && (ch <= 0x200d || ch == 0x2060 || ch == 0xfeff))
749 QueryParser::Internal::add_prefix(string_view field, string_view prefix)
753 if (!field.empty() && field.back() ==
':') {
754 field = field.substr(0, field.size() - 1);
756 #ifdef __cpp_lib_associative_heterogeneous_insertion
757 auto [it, inserted] = field_map.try_emplace(field,
NON_BOOLEAN);
759 auto [it, inserted] = field_map.try_emplace(
string(field),
NON_BOOLEAN);
761 auto&&
p = it->second;
770 "add_boolean_prefix() on the "
771 "same field name, or "
772 "add_boolean_prefix() with "
773 "different values of the "
774 "'exclusive' parameter");
778 "and string prefixes currently "
785 auto& prefixes =
p.prefixes;
786 if (find(prefixes.begin(), prefixes.end(), prefix) == prefixes.end()) {
796 if (!field.empty() && field.back() ==
':') {
797 field = field.substr(0, field.size() - 1);
799 #ifdef __cpp_lib_associative_heterogeneous_insertion
800 auto [it, inserted] = field_map.try_emplace(field,
803 auto [it, inserted] = field_map.try_emplace(
string(field),
809 auto&&
p = it->second;
813 "add_boolean_prefix() on the "
814 "same field name, or "
815 "add_boolean_prefix() with "
816 "different values of the "
817 "'exclusive' parameter");
819 if (!
p.prefixes.empty())
821 "and string prefixes currently "
824 "for the same prefix currently not "
829 QueryParser::Internal::add_boolean_prefix(string_view field,
835 if (!field.empty() && field.back() ==
':') {
836 field = field.substr(0, field.size() - 1);
845 #ifdef __cpp_lib_associative_heterogeneous_insertion
846 auto [it, inserted] = field_map.try_emplace(field, type,
849 auto [it, inserted] = field_map.try_emplace(
string(field), type,
852 auto&&
p = it->second;
859 if (
p.type != type) {
861 "add_boolean_prefix() on the "
862 "same field name, or "
863 "add_boolean_prefix() with "
864 "different values of the "
865 "'exclusive' parameter");
869 "and string prefixes currently "
876 auto& prefixes =
p.prefixes;
877 if (find(prefixes.begin(), prefixes.end(), prefix) == prefixes.end()) {
878 prefixes.emplace_back(prefix);
883 QueryParser::Internal::add_boolean_prefix(string_view field,
889 if (!field.empty() && field.back() ==
':') {
890 field = field.substr(0, field.size() - 1);
899 #ifdef __cpp_lib_associative_heterogeneous_insertion
900 auto [it, inserted] = field_map.try_emplace(field, type, proc,
903 auto [it, inserted] = field_map.try_emplace(
string(field), type, proc,
909 auto&&
p = it->second;
911 if (
p.type != type) {
913 "add_boolean_prefix() on the "
914 "same field name, or "
915 "add_boolean_prefix() with "
916 "different values of the "
917 "'exclusive' parameter");
919 if (!
p.prefixes.empty())
921 "and string prefixes currently "
924 "for the same prefix currently not "
931 if (ch ==
'*')
return (flags & QueryParser::FLAG_WILDCARD_MULTI);
932 if (ch ==
'?')
return (flags & QueryParser::FLAG_WILDCARD_SINGLE);
938 bool try_word_break,
unsigned flags,
939 bool& needs_word_break,
bool& was_acronym,
940 size_t& first_wildcard,
942 unsigned& edit_distance)
954 }
while (
p != end && *
p ==
'.' && ++
p != end &&
U_isupper(*
p));
957 if (t.length() > 1) {
969 was_acronym = !
term.empty();
972 const char* start = it.
raw();
974 term.assign(start, it.
raw() - start);
975 needs_word_break =
true;
979 unsigned prevch = *it;
980 if (first_wildcard ==
term.npos &&
987 while (++it != end) {
991 if (first_wildcard ==
term.npos) {
992 first_wildcard = char_count;
1000 if (
p == end)
break;
1001 unsigned nextch = *
p;
1004 unsigned ch_orig = ch;
1027 string suff_term =
term;
1032 if (suff_term.size() -
term.size() == 3) {
1033 suff_term.resize(0);
1043 bool use_suff_term =
false;
1045 if (db.term_exists(lc)) {
1046 use_suff_term =
true;
1049 if (!db.term_exists(lc)) use_suff_term =
true;
1051 if (use_suff_term) {
1053 char_count += (suff_term.size() -
term.size());
1059 if (first_wildcard ==
term.npos &&
1060 (flags & QueryParser::FLAG_WILDCARD)) {
1062 if (it != end && *it ==
'*') {
1064 first_wildcard = char_count;
1068 (flags & QueryParser::FLAG_FUZZY) &&
1070 first_wildcard == string::npos &&
1079 unsigned distance = ch -
'0';
1081 distance = distance * 10 + (*
p -
'0');
1083 if (
p != end && *
p ==
'.') {
1084 if (distance == 0)
goto fractional;
1090 edit_distance = distance;
1092 }
else if (ch ==
'.') {
1094 double fraction = 0.0;
1097 fraction += digit * (*
p -
'0');
1102 unsigned codepoints = 0;
1106 edit_distance = unsigned(codepoints * fraction);
1114 #line 1771 "queryparser/queryparser.lemony"
1143 if (it == filter.end()) {
1144 filter.insert(make_pair(
grouping, qnew));
1146 Query & q = it->second;
1149 bool exclusive = !
grouping.empty();
1168 auto i = filter.begin();
1169 Assert(i != filter.end());
1170 Query q = i->second;
1171 while (++i != filter.end()) {
1201 for (
auto&& t : terms) {
1208 terms.push_back(
term);
1226 vector<Query> subqs;
1227 subqs.reserve(terms.size());
1228 if (state->
flags & QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS) {
1233 vector<Term*>::size_type begin = 0;
1234 vector<Term*>::size_type i = begin;
1235 while (terms.size() - i > 0) {
1236 size_t longest_match = 0;
1239 vector<Term*>::size_type longest_match_end = 0;
1240 if (terms.size() - i >= 2) {
1242 key = terms[i]->name;
1244 key += terms[i + 1]->name;
1247 if (synkey != synend) {
1248 longest_match = key.size();
1249 longest_match_end = i + 2;
1250 for (
auto j = i + 2; j < terms.size(); ++j) {
1252 key += terms[j]->name;
1254 if (synkey == synend)
1256 const string& found = *synkey;
1259 if (found.size() == key.size()) {
1260 longest_match = key.size();
1261 longest_match_end = j + 1;
1266 if (longest_match == 0) {
1268 if (stopper && (*stopper)(terms[i]->
name)) {
1271 if (default_op_is_positional)
1272 terms[i]->need_positions();
1273 subqs.push_back(terms[i]->get_query_with_auto_synonyms());
1278 i = longest_match_end;
1279 key.resize(longest_match);
1281 vector<Query> subqs2;
1282 for (
auto j = begin; j != i; ++j) {
1283 if (stopper && (*stopper)(terms[j]->
name)) {
1286 if (default_op_is_positional)
1287 terms[i]->need_positions();
1288 subqs2.push_back(terms[j]->get_query());
1291 Query q_original_terms;
1292 if (default_op_is_positional) {
1293 q_original_terms =
Query(default_op,
1294 subqs2.begin(), subqs2.end(),
1297 q_original_terms =
Query(default_op,
1298 subqs2.begin(), subqs2.end());
1304 Query q(Query::OP_SYNONYM,
1312 vector<Term*>::const_iterator i;
1313 for (i = terms.begin(); i != terms.end(); ++i) {
1314 if (stopper && (*stopper)((*i)->name)) {
1317 if (default_op_is_positional)
1318 (*i)->need_positions();
1319 subqs.push_back((*i)->get_query_with_auto_synonyms());
1324 if (!empty_ok && stopper &&
1327 stoplist_size < state->stoplist_size()) {
1336 if (!subqs.empty()) {
1337 if (default_op_is_positional) {
1338 q =
new Query(default_op, subqs.begin(), subqs.end(),
1341 q =
new Query(default_op, subqs.begin(), subqs.end());
1376 const vector<Query>& v,
1378 if (op == Query::OP_AND) {
1379 return Query(op, v.begin(), v.end());
1381 return Query(op, v.begin(), v.end(), w);
1386 if (window ==
size_t(-1)) op = Query::OP_AND;
1388 size_t n_terms = terms.size();
1390 if (uniform_prefixes) {
1392 for (
auto&& prefix : *prefixes) {
1393 vector<Query> subqs;
1394 subqs.reserve(n_terms);
1395 for (
Term* t : terms) {
1396 const string&
term = t->make_term(prefix);
1397 if (
term.empty())
continue;
1398 subqs.push_back(
Query(
term, 1, t->pos));
1400 add_to_query(q, Query::OP_OR, opwindow_subq(op, subqs, w));
1404 vector<Query> subqs;
1405 subqs.reserve(n_terms);
1406 for (
Term* t : terms) {
1409 subqs.push_back(
query);
1411 q =
new Query(opwindow_subq(op, subqs, w));
1419 : window(no_pos ? size_t(-1) : 0),
1420 uniform_prefixes(
true),
1426 return new Terms(state->
flags & QueryParser::FLAG_NO_POSITIONS);
1430 for (
auto&& t : terms) {
1437 const auto& term_prefixes =
term->field_info->prefixes;
1438 if (terms.empty()) {
1439 prefixes = &term_prefixes;
1440 }
else if (uniform_prefixes && prefixes != &term_prefixes) {
1441 if (*prefixes != term_prefixes) {
1443 uniform_prefixes =
false;
1446 term->need_positions();
1447 terms.push_back(
term);
1451 if (alternative_window > window) window = alternative_window;
1456 return as_opwindow_query(Query::OP_PHRASE, 0);
1463 for (
Term* t : terms) {
1464 if (!
name.empty()) {
1472 for (
auto&& prefix : *prefixes) {
1475 if (!prefix.empty()) {
1486 Query* q = as_opwindow_query(Query::OP_PHRASE, 0);
1506 return as_opwindow_query(Query::OP_NEAR, w - 1);
1516 return as_opwindow_query(Query::OP_PHRASE, w - 1);
1524 if (state->flags & QueryParser::FLAG_WORD_BREAKS) {
1525 for (WordIterator tk(
name); tk != WordIterator(); ++tk) {
1526 const string& t = *tk;
1527 Term * c =
new Term(state, t, field_info, unstemmed, stem,
pos);
1538 Term * c =
new Term(state, t, field_info, unstemmed, stem,
pos);
1549 #define VET_BOOL_ARGS(A, B, OP_TXT) \
1552 state->error = "Syntax: <expression> " OP_TXT " <expression>";\
1553 yy_parse_failed(yypParser);\
1558 #line 1559 "queryparser/queryparser_internal.cc"
1616 # define INTERFACE 1
1619 #define YYCODETYPE unsigned char
1621 #define YYACTIONTYPE unsigned char
1622 #define ParseTOKENTYPE Term *
1632 #ifndef YYSTACKDEPTH
1633 #define YYSTACKDEPTH 100
1635 #define ParseARG_SDECL State * state;
1636 #define ParseARG_PDECL ,State * state
1637 #define ParseARG_FETCH State * state = yypParser->state
1638 #define ParseARG_STORE yypParser->state = state
1642 #define YY_MAX_SHIFT 41
1643 #define YY_MIN_SHIFTREDUCE 83
1644 #define YY_MAX_SHIFTREDUCE 141
1645 #define YY_ERROR_ACTION 142
1646 #define YY_ACCEPT_ACTION 143
1647 #define YY_NO_ACTION 144
1648 #define YY_MIN_REDUCE 145
1649 #define YY_MAX_REDUCE 203
1661 # define yytestcase(X)
1715 #define YY_ACTTAB_COUNT (352)
1717 24, 25, 145, 144, 144, 3, 144, 34, 11, 10,
1718 2, 27, 144, 17, 13, 12, 111, 112, 113, 104,
1719 94, 16, 4, 146, 122, 105, 95, 7, 6, 1,
1720 8, 11, 10, 119, 27, 123, 17, 5, 5, 111,
1721 112, 113, 104, 94, 16, 4, 124, 122, 143, 41,
1722 41, 19, 9, 41, 21, 14, 18, 135, 36, 28,
1723 35, 33, 32, 40, 40, 40, 9, 40, 21, 14,
1724 18, 130, 36, 28, 35, 33, 11, 10, 128, 27,
1725 133, 17, 131, 120, 111, 112, 113, 104, 94, 16,
1726 4, 15, 122, 29, 29, 29, 9, 29, 21, 14,
1727 18, 134, 36, 28, 35, 33, 30, 30, 30, 9,
1728 30, 21, 14, 18, 132, 36, 28, 35, 33, 31,
1729 31, 19, 9, 31, 21, 14, 18, 144, 36, 28,
1730 35, 33, 153, 153, 153, 9, 153, 21, 14, 18,
1731 144, 36, 28, 35, 33, 26, 26, 26, 9, 26,
1732 21, 14, 18, 144, 36, 28, 35, 33, 23, 23,
1733 23, 9, 23, 21, 14, 18, 144, 36, 28, 35,
1734 33, 39, 39, 39, 9, 39, 21, 14, 18, 144,
1735 36, 28, 35, 33, 201, 201, 144, 27, 144, 22,
1736 144, 144, 111, 112, 113, 201, 201, 16, 4, 172,
1737 122, 172, 172, 172, 172, 38, 37, 38, 37, 1,
1738 8, 172, 144, 129, 127, 129, 127, 5, 27, 144,
1739 20, 144, 172, 111, 112, 113, 102, 144, 16, 4,
1740 27, 122, 20, 144, 144, 111, 112, 113, 106, 144,
1741 16, 4, 27, 122, 20, 144, 144, 111, 112, 113,
1742 103, 144, 16, 4, 27, 122, 20, 144, 144, 111,
1743 112, 113, 107, 144, 16, 4, 27, 122, 22, 144,
1744 144, 111, 112, 113, 144, 144, 16, 4, 203, 122,
1745 203, 203, 203, 203, 144, 144, 144, 144, 159, 159,
1746 203, 36, 28, 35, 33, 144, 144, 162, 144, 144,
1747 162, 203, 36, 28, 35, 33, 160, 144, 125, 160,
1748 144, 36, 28, 35, 33, 163, 121, 125, 163, 126,
1749 36, 28, 35, 33, 161, 114, 144, 161, 126, 36,
1750 28, 35, 33, 144, 144, 158, 158, 144, 36, 28,
1751 35, 33, 6, 1, 8, 144, 144, 144, 144, 144,
1755 36, 36, 0, 41, 41, 5, 41, 6, 8, 9,
1756 10, 11, 41, 13, 8, 9, 16, 17, 18, 19,
1757 20, 21, 22, 0, 24, 19, 20, 2, 3, 4,
1758 5, 8, 9, 23, 11, 13, 13, 12, 12, 16,
1759 17, 18, 19, 20, 21, 22, 24, 24, 27, 28,
1760 29, 30, 31, 32, 33, 34, 35, 13, 37, 38,
1761 39, 40, 7, 28, 29, 30, 31, 32, 33, 34,
1762 35, 14, 37, 38, 39, 40, 8, 9, 15, 11,
1763 13, 13, 25, 13, 16, 17, 18, 19, 20, 21,
1764 22, 21, 24, 28, 29, 30, 31, 32, 33, 34,
1765 35, 13, 37, 38, 39, 40, 28, 29, 30, 31,
1766 32, 33, 34, 35, 13, 37, 38, 39, 40, 28,
1767 29, 30, 31, 32, 33, 34, 35, 41, 37, 38,
1768 39, 40, 28, 29, 30, 31, 32, 33, 34, 35,
1769 41, 37, 38, 39, 40, 28, 29, 30, 31, 32,
1770 33, 34, 35, 41, 37, 38, 39, 40, 28, 29,
1771 30, 31, 32, 33, 34, 35, 41, 37, 38, 39,
1772 40, 28, 29, 30, 31, 32, 33, 34, 35, 41,
1773 37, 38, 39, 40, 8, 9, 41, 11, 41, 13,
1774 41, 41, 16, 17, 18, 19, 20, 21, 22, 0,
1775 24, 2, 3, 4, 5, 6, 7, 6, 7, 4,
1776 5, 12, 41, 14, 15, 14, 15, 12, 11, 41,
1777 13, 41, 23, 16, 17, 18, 19, 41, 21, 22,
1778 11, 24, 13, 41, 41, 16, 17, 18, 19, 41,
1779 21, 22, 11, 24, 13, 41, 41, 16, 17, 18,
1780 19, 41, 21, 22, 11, 24, 13, 41, 41, 16,
1781 17, 18, 19, 41, 21, 22, 11, 24, 13, 41,
1782 41, 16, 17, 18, 41, 41, 21, 22, 0, 24,
1783 2, 3, 4, 5, 41, 41, 41, 41, 34, 35,
1784 12, 37, 38, 39, 40, 41, 41, 32, 41, 41,
1785 35, 23, 37, 38, 39, 40, 32, 41, 13, 35,
1786 41, 37, 38, 39, 40, 32, 21, 13, 35, 24,
1787 37, 38, 39, 40, 32, 21, 41, 35, 24, 37,
1788 38, 39, 40, 41, 41, 34, 35, 41, 37, 38,
1789 39, 40, 3, 4, 5, 41, 41, 41, 41, 41,
1790 41, 12, 41, 41, 41, 41, 41, 41, 41, 41,
1791 41, 41, 41, 41, 41, 41, 41, 41, 41,
1793 #define YY_SHIFT_COUNT (41)
1794 #define YY_SHIFT_MIN (0)
1795 #define YY_SHIFT_MAX (339)
1797 23, 0, 68, 68, 68, 68, 68, 68, 68, 176,
1798 207, 219, 231, 243, 255, 22, 22, 199, 278, 25,
1799 201, 6, 201, 339, 295, 304, 205, 70, 57, 26,
1800 26, 10, 44, 55, 67, 1, 63, 88, 101, 26,
1803 #define YY_REDUCE_COUNT (16)
1804 #define YY_REDUCE_MIN (-36)
1805 #define YY_REDUCE_MAX (301)
1807 21, 35, 65, 78, 91, 104, 117, 130, 143, 254,
1808 265, 274, 283, 292, 301, -36, -35,
1811 154, 154, 154, 154, 154, 154, 154, 154, 154, 155,
1812 142, 142, 142, 142, 170, 142, 142, 171, 202, 142,
1813 172, 142, 171, 151, 142, 142, 152, 142, 178, 150,
1814 149, 199, 142, 180, 142, 179, 177, 142, 142, 148,
1877 #ifdef YYTRACKMAXSTACKDEPTH
1880 #ifndef YYNOERRORRECOVERY
1897 #if defined(YYCOVERAGE) || defined(XAPIAN_DEBUG_LOG)
1900 static const char *
const yyTokenName[] = {
1946 static const char *
const yyRuleName[] = {
1949 "expr ::= bool_arg AND bool_arg",
1950 "expr ::= bool_arg NOT bool_arg",
1951 "expr ::= bool_arg AND NOT bool_arg",
1952 "expr ::= bool_arg AND HATE_AFTER_AND bool_arg",
1953 "expr ::= bool_arg OR bool_arg",
1954 "expr ::= bool_arg XOR bool_arg",
1955 "expr ::= bool_arg SYN bool_arg",
1957 "prob_expr ::= prob",
1959 "prob ::= stop_prob RANGE",
1960 "prob ::= stop_term stop_term",
1961 "prob ::= prob stop_term",
1962 "prob ::= LOVE term",
1963 "prob ::= stop_prob LOVE term",
1964 "prob ::= HATE term",
1965 "prob ::= stop_prob HATE term",
1966 "prob ::= HATE BOOLEAN_FILTER",
1967 "prob ::= stop_prob HATE BOOLEAN_FILTER",
1968 "prob ::= BOOLEAN_FILTER",
1969 "prob ::= stop_prob BOOLEAN_FILTER",
1970 "prob ::= LOVE BOOLEAN_FILTER",
1971 "prob ::= stop_prob LOVE BOOLEAN_FILTER",
1972 "stop_prob ::= stop_term",
1973 "stop_term ::= TERM",
1975 "compound_term ::= EDIT_TERM",
1976 "compound_term ::= WILD_TERM",
1977 "compound_term ::= PARTIAL_TERM",
1978 "compound_term ::= QUOTE phrase QUOTE",
1979 "compound_term ::= phrased_term",
1980 "compound_term ::= group",
1981 "compound_term ::= near_expr",
1982 "compound_term ::= adj_expr",
1983 "compound_term ::= BRA expr KET",
1984 "compound_term ::= SYNONYM TERM",
1985 "compound_term ::= SYNONYM QUOTE phrase QUOTE",
1986 "compound_term ::= UNBROKEN_WORDS",
1988 "phrase ::= UNBROKEN_WORDS",
1989 "phrase ::= phrase TERM",
1990 "phrase ::= phrase UNBROKEN_WORDS",
1991 "phrased_term ::= TERM PHR_TERM",
1992 "phrased_term ::= phrased_term PHR_TERM",
1993 "group ::= TERM GROUP_TERM",
1994 "group ::= group GROUP_TERM",
1995 "group ::= group EMPTY_GROUP_OK",
1996 "near_expr ::= TERM NEAR TERM",
1997 "near_expr ::= near_expr NEAR TERM",
1998 "adj_expr ::= TERM ADJ TERM",
1999 "adj_expr ::= adj_expr ADJ TERM",
2000 "expr ::= prob_expr",
2001 "bool_arg ::= expr",
2002 "prob_expr ::= term",
2003 "stop_prob ::= prob",
2004 "stop_term ::= compound_term",
2005 "term ::= compound_term",
2012 static const char *ParseTokenName(
int tokenType){
2013 if( tokenType>=0 && tokenType<(
int)(
sizeof(yyTokenName)/
sizeof(yyTokenName[0])) ){
2014 return yyTokenName[tokenType];
2023 static const char *ParseRuleName(
int ruleNum){
2024 if( ruleNum>=0 && ruleNum<(
int)(
sizeof(yyRuleName)/
sizeof(yyRuleName[0])) ){
2025 return yyRuleName[ruleNum];
2036 #ifndef YYMALLOCARGTYPE
2037 # define YYMALLOCARGTYPE size_t
2044 #ifdef YYTRACKMAXSTACKDEPTH
2049 pParser->yytos = NULL;
2051 pParser->yystksz = 0;
2052 if( yyGrowStack(pParser) ){
2053 pParser->
yystack = &pParser->yystk0;
2054 pParser->yystksz = 1;
2058 #ifndef YYNOERRORRECOVERY
2062 pParser->yytos = pParser->
yystack;
2063 pParser->
yystack[0].stateno = 0;
2064 pParser->
yystack[0].major = 0;
2073 #ifndef Parse_ENGINEALWAYSONSTACK
2142 #line 2217 "queryparser/queryparser.lemony"
2143 delete (yypminor->
yy0);
2144 #line 2145 "queryparser/queryparser_internal.cc"
2154 #line 2295 "queryparser/queryparser.lemony"
2155 delete (yypminor->
yy1);
2156 #line 2157 "queryparser/queryparser_internal.cc"
2162 #line 2415 "queryparser/queryparser.lemony"
2163 delete (yypminor->
yy18);
2164 #line 2165 "queryparser/queryparser_internal.cc"
2172 #line 2612 "queryparser/queryparser.lemony"
2173 delete (yypminor->
yy36);
2174 #line 2175 "queryparser/queryparser_internal.cc"
2179 #line 2653 "queryparser/queryparser.lemony"
2180 delete (yypminor->
yy32);
2181 #line 2182 "queryparser/queryparser_internal.cc"
2200 LOGLINE(QUERYPARSER,
"Popping " << ParseTokenName(yytos->
major));
2213 #ifndef Parse_ENGINEALWAYSONSTACK
2233 #ifdef YYTRACKMAXSTACKDEPTH
2234 int ParseStackPeak(
yyParser *pParser){
2235 return pParser->yyhwm;
2244 #if defined(YYCOVERAGE)
2256 #if defined(YYCOVERAGE)
2257 int ParseCoverage(FILE *out){
2258 int stateno, iLookAhead, i;
2260 for(stateno=0; stateno<
YYNSTATE; stateno++){
2262 for(iLookAhead=0; iLookAhead<
YYNTOKEN; iLookAhead++){
2264 if( yycoverage[stateno][iLookAhead]==0 ) nMissed++;
2266 fprintf(out,
"State %d lookahead %s %s\n", stateno,
2267 yyTokenName[iLookAhead],
2268 yycoverage[stateno][iLookAhead] ?
"ok" :
"missed");
2285 int stateno = pParser->
yystack.back().stateno;
2289 #if defined(YYCOVERAGE)
2290 yycoverage[stateno][iLookAhead] = 1;
2302 if( iLookAhead<
sizeof(yyFallback)/
sizeof(yyFallback[0])
2303 && (iFallback = yyFallback[iLookAhead])!=0 ){
2305 "FALLBACK " << ParseTokenName(iLookAhead) <<
" => " <<
2306 ParseTokenName(iFallback));
2307 Assert( yyFallback[iFallback]==0 );
2308 iLookAhead = iFallback;
2314 int j = i - iLookAhead + YYWILDCARD;
2325 "WILDCARD " << ParseTokenName(iLookAhead) <<
" => " <<
2326 ParseTokenName(YYWILDCARD));
2347 #ifdef YYERRORSYMBOL
2357 #ifdef YYERRORSYMBOL
2374 static void yyStackOverflow(
yyParser *yypParser){
2379 fprintf(yyTraceFILE,
"%sStack Overflow!\n",yyTracePrompt);
2394 #ifdef XAPIAN_DEBUG_LOG
2397 LOGLINE(QUERYPARSER, zTag <<
" '" <<
2398 yyTokenName[yypParser->
yystack.back().major] <<
2399 "', go to state " << yyNewState);
2401 LOGLINE(QUERYPARSER, zTag <<
" '" <<
2402 yyTokenName[yypParser->
yystack.back().major] <<
2407 # define yyTraceShift(X,Y,Z)
2423 #ifdef YYTRACKMAXSTACKDEPTH
2424 if( (
int)(yypParser->
yystack.size()>yypParser->yyhwm ){
2426 Assert( yypParser->yyhwm == (int)(yypParser->yystack.size() );
2435 static const struct {
2514 unsigned int yyruleno,
2524 (void)yyLookaheadToken;
2525 yymsp = &yypParser->
yystack.back();
2527 #ifdef XAPIAN_DEBUG_LOG
2531 LOGLINE(QUERYPARSER,
"Reduce " << yyruleno <<
" [" <<
2532 ParseRuleName(yyruleno) <<
"], go to state " <<
2533 yymsp[yysize].stateno);
2535 LOGLINE(QUERYPARSER,
"Reduce " << yyruleno <<
" [" <<
2536 ParseRuleName(yyruleno) <<
"].");
2549 yymsp = &(yypParser->
yystack.back()) - 1;
2551 #ifdef YYTRACKMAXSTACKDEPTH
2552 if( (
int)(yypParser->yytos - yypParser->
yystack)>yypParser->yyhwm ){
2554 Assert( yypParser->yyhwm == (
int)(yypParser->yytos - yypParser->
yystack));
2558 if( yypParser->yytos>=yypParser->yystackEnd ){
2559 yyStackOverflow(yypParser);
2563 if( yypParser->yytos>=&yypParser->
yystack[yypParser->yystksz-1] ){
2564 if( yyGrowStack(yypParser) ){
2565 yyStackOverflow(yypParser);
2568 yymsp = yypParser->yytos;
2586 #line 2277 "queryparser/queryparser.lemony"
2589 if (yymsp[0].minor.yy1) {
2590 state->query = *yymsp[0].
minor.
yy1;
2593 state->query =
Query();
2596 #line 2597 "queryparser/queryparser_internal.cc"
2599 #line 2287 "queryparser/queryparser.lemony"
2602 state->query =
Query();
2604 #line 2605 "queryparser/queryparser_internal.cc"
2607 #line 2299 "queryparser/queryparser.lemony"
2613 #line 2614 "queryparser/queryparser_internal.cc"
2617 #line 2305 "queryparser/queryparser.lemony"
2619 if (!yymsp[-2].minor.yy1 && (state->flags & QueryParser::FLAG_PURE_NOT)) {
2632 #line 2633 "queryparser/queryparser_internal.cc"
2636 #line 2320 "queryparser/queryparser.lemony"
2642 #line 2643 "queryparser/queryparser_internal.cc"
2647 #line 2326 "queryparser/queryparser.lemony"
2653 #line 2654 "queryparser/queryparser_internal.cc"
2658 #line 2332 "queryparser/queryparser.lemony"
2664 #line 2665 "queryparser/queryparser_internal.cc"
2668 #line 2338 "queryparser/queryparser.lemony"
2674 #line 2675 "queryparser/queryparser_internal.cc"
2678 #line 2344 "queryparser/queryparser.lemony"
2684 #line 2685 "queryparser/queryparser_internal.cc"
2688 #line 2357 "queryparser/queryparser.lemony"
2695 #line 2696 "queryparser/queryparser_internal.cc"
2698 #line 2369 "queryparser/queryparser.lemony"
2703 if (yymsp[0].minor.yy18->love) {
2704 if (yymsp[0].minor.yy18->love->empty()) {
2706 delete yylhsminor.
yy1;
2708 }
else if (yylhsminor.
yy1) {
2717 if (!yymsp[0].minor.yy18->filter.empty()) {
2718 if (yylhsminor.
yy1) {
2722 yylhsminor.
yy1 =
new Query(Query::OP_SCALE_WEIGHT, yymsp[0].minor.yy18->merge_filters(), 0.0);
2727 if (!yylhsminor.
yy1) {
2736 #line 2737 "queryparser/queryparser_internal.cc"
2740 #line 2417 "queryparser/queryparser.lemony"
2747 #line 2748 "queryparser/queryparser_internal.cc"
2750 #line 2424 "queryparser/queryparser.lemony"
2756 #line 2757 "queryparser/queryparser_internal.cc"
2759 #line 2430 "queryparser/queryparser.lemony"
2762 if (yymsp[0].minor.yy1) {
2776 #line 2777 "queryparser/queryparser_internal.cc"
2779 #line 2447 "queryparser/queryparser.lemony"
2782 if (yymsp[0].minor.yy1)
add_to_query(yymsp[-1].minor.yy18->query, state->default_op(), yymsp[0].
minor.
yy1);
2784 #line 2785 "queryparser/queryparser_internal.cc"
2788 #line 2452 "queryparser/queryparser.lemony"
2791 if (state->default_op() == Query::OP_AND) {
2797 #line 2798 "queryparser/queryparser_internal.cc"
2801 #line 2461 "queryparser/queryparser.lemony"
2803 if (state->default_op() == Query::OP_AND) {
2812 #line 2813 "queryparser/queryparser_internal.cc"
2817 #line 2472 "queryparser/queryparser.lemony"
2822 #line 2823 "queryparser/queryparser_internal.cc"
2826 #line 2477 "queryparser/queryparser.lemony"
2830 #line 2831 "queryparser/queryparser_internal.cc"
2835 #line 2481 "queryparser/queryparser.lemony"
2841 #line 2842 "queryparser/queryparser_internal.cc"
2845 #line 2487 "queryparser/queryparser.lemony"
2850 #line 2851 "queryparser/queryparser_internal.cc"
2854 #line 2492 "queryparser/queryparser.lemony"
2860 #line 2861 "queryparser/queryparser_internal.cc"
2864 #line 2498 "queryparser/queryparser.lemony"
2869 #line 2870 "queryparser/queryparser_internal.cc"
2873 #line 2503 "queryparser/queryparser.lemony"
2880 #line 2881 "queryparser/queryparser_internal.cc"
2884 #line 2510 "queryparser/queryparser.lemony"
2889 q |= yymsp[0].
minor.
yy0->get_query();
2892 #line 2893 "queryparser/queryparser_internal.cc"
2896 #line 2525 "queryparser/queryparser.lemony"
2900 #line 2901 "queryparser/queryparser_internal.cc"
2903 #line 2538 "queryparser/queryparser.lemony"
2905 if (state->is_stopword(yymsp[0].
minor.
yy0)) {
2906 yylhsminor.
yy1 = NULL;
2907 state->add_to_stoplist(yymsp[0].minor.yy0);
2909 yylhsminor.
yy1 =
new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2913 #line 2914 "queryparser/queryparser_internal.cc"
2917 #line 2555 "queryparser/queryparser.lemony"
2919 yylhsminor.
yy1 =
new Query(yymsp[0].minor.yy0->get_query_with_auto_synonyms());
2922 #line 2923 "queryparser/queryparser_internal.cc"
2926 #line 2570 "queryparser/queryparser.lemony"
2928 #line 2929 "queryparser/queryparser_internal.cc"
2931 #line 2573 "queryparser/queryparser.lemony"
2933 #line 2934 "queryparser/queryparser_internal.cc"
2936 #line 2576 "queryparser/queryparser.lemony"
2938 #line 2939 "queryparser/queryparser_internal.cc"
2942 #line 2579 "queryparser/queryparser.lemony"
2944 #line 2945 "queryparser/queryparser_internal.cc"
2949 #line 2582 "queryparser/queryparser.lemony"
2951 #line 2952 "queryparser/queryparser_internal.cc"
2954 #line 2585 "queryparser/queryparser.lemony"
2956 #line 2957 "queryparser/queryparser_internal.cc"
2959 #line 2588 "queryparser/queryparser.lemony"
2961 #line 2962 "queryparser/queryparser_internal.cc"
2964 #line 2591 "queryparser/queryparser.lemony"
2966 #line 2967 "queryparser/queryparser_internal.cc"
2970 #line 2594 "queryparser/queryparser.lemony"
2972 #line 2973 "queryparser/queryparser_internal.cc"
2978 #line 2596 "queryparser/queryparser.lemony"
2980 yymsp[-1].
minor.
yy1 =
new Query(yymsp[0].minor.yy0->get_query_with_synonyms());
2983 #line 2984 "queryparser/queryparser_internal.cc"
2988 #line 2602 "queryparser/queryparser.lemony"
2990 #line 2991 "queryparser/queryparser_internal.cc"
2996 #line 2604 "queryparser/queryparser.lemony"
3000 #line 3001 "queryparser/queryparser_internal.cc"
3003 #line 2614 "queryparser/queryparser.lemony"
3008 #line 3009 "queryparser/queryparser_internal.cc"
3012 #line 2619 "queryparser/queryparser.lemony"
3015 yymsp[0].
minor.
yy0->as_positional_unbroken(yylhsminor.
yy36);
3017 #line 3018 "queryparser/queryparser_internal.cc"
3022 #line 2624 "queryparser/queryparser.lemony"
3026 #line 3027 "queryparser/queryparser_internal.cc"
3029 #line 2628 "queryparser/queryparser.lemony"
3031 yymsp[0].
minor.
yy0->as_positional_unbroken(yymsp[-1].minor.yy36);
3033 #line 3034 "queryparser/queryparser_internal.cc"
3036 #line 2639 "queryparser/queryparser.lemony"
3042 #line 3043 "queryparser/queryparser_internal.cc"
3046 #line 2655 "queryparser/queryparser.lemony"
3050 #line 3051 "queryparser/queryparser_internal.cc"
3053 #line 2659 "queryparser/queryparser.lemony"
3057 #line 3058 "queryparser/queryparser_internal.cc"
3060 #line 2663 "queryparser/queryparser.lemony"
3064 #line 3065 "queryparser/queryparser_internal.cc"
3069 #line 2673 "queryparser/queryparser.lemony"
3074 if (yymsp[-1].minor.yy0) {
3079 #line 3080 "queryparser/queryparser_internal.cc"
3084 #line 2683 "queryparser/queryparser.lemony"
3087 if (yymsp[-1].minor.yy0) {
3092 #line 3093 "queryparser/queryparser_internal.cc"
3129 #ifndef YYNOERRORRECOVERY
3134 LOGLINE(QUERYPARSER,
"Fail!");
3139 #line 2221 "queryparser/queryparser.lemony"
3142 if (!state->error) state->error =
"parse error";
3143 #line 3144 "queryparser/queryparser_internal.cc"
3160 #define TOKEN yyminor
3162 #line 2226 "queryparser/queryparser.lemony"
3165 #line 3166 "queryparser/queryparser_internal.cc"
3177 LOGLINE(QUERYPARSER,
"Accept!");
3178 #ifndef YYNOERRORRECOVERY
3217 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
3220 #ifdef YYERRORSYMBOL
3224 #if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
3225 yyendofinput = (yymajor==0);
3229 #ifdef XAPIAN_DEBUG_LOG
3231 int stateno = yypParser->
yystack.back().stateno;
3233 LOGLINE(QUERYPARSER,
"Input '" << ParseTokenName(yymajor) <<
3234 "'," << (yyminor ? yyminor->name :
"<<null>>") <<
3235 "in state " << stateno);
3237 LOGLINE(QUERYPARSER,
"Input '" << ParseTokenName(yymajor) <<
3238 "'," << (yyminor ? yyminor->name :
"<<null>>") <<
3249 yy_shift(yypParser,yyact,yymajor,yyminor);
3250 #ifndef YYNOERRORRECOVERY
3255 yypParser->
yystack.pop_back();
3260 yyminorunion.
yy0 = yyminor;
3261 #ifdef YYERRORSYMBOL
3264 LOGLINE(QUERYPARSER,
"Syntax Error!");
3265 #ifdef YYERRORSYMBOL
3288 yymx = yypParser->
yystack.back().major;
3289 if( yymx==YYERRORSYMBOL || yyerrorhit ){
3290 LOGLINE(QUERYPARSER,
"Discard input token " << ParseTokenName(yymajor));
3294 while( !yypParser->
yystack.empty()
3295 && yymx != YYERRORSYMBOL
3297 yypParser->
yystack.back().stateno,
3302 if( yypParser->
yystack.empty() || yymajor==0 ){
3305 #ifndef YYNOERRORRECOVERY
3309 }
else if( yymx!=YYERRORSYMBOL ){
3310 yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor);
3315 #elif defined(YYNOERRORRECOVERY)
3344 #ifndef YYNOERRORRECOVERY
3352 #ifdef XAPIAN_DEBUG_LOG
3355 LOGLINE(QUERYPARSER,
"Return. Stack=");
3356 for(i=1; i<=(int)yypParser->
yystack.size(); i++)
3364 #line 1088 "queryparser/queryparser.lemony"
3368 QueryParser::Internal::parse_query(string_view qs,
unsigned flags,
3369 string_view default_prefix)
3376 if (flags & FLAG_WORD_BREAKS) {
3378 "building Xapian to use ICU");
3381 bool try_word_break =
3385 bool ranges = !rangeprocs.empty() && (qs.find(
"..") != string::npos);
3390 State state(
this, flags);
3394 int correction_offset = 0;
3395 corrected_query.resize(0);
3398 list<const FieldInfo *> prefix_stack;
3404 const FieldInfo * default_field_info = &def_pfx;
3405 if (default_prefix.empty()) {
3406 auto f = field_map.find(string_view{});
3407 if (f != field_map.end()) default_field_info = &(f->second);
3411 prefix_stack.push_back(default_field_info);
3416 unsigned newprev =
' ';
3419 DEFAULT, IN_QUOTES, IN_PREFIXED_QUOTES, IN_PHRASED_TERM, IN_GROUP,
3420 IN_GROUP2, EXPLICIT_SYNONYM
3422 while (it != end && !state.
error) {
3423 bool last_was_operator =
false;
3424 bool last_was_operator_needing_term =
false;
3425 if (mode == EXPLICIT_SYNONYM) mode = DEFAULT;
3428 if (it == end)
break;
3430 last_was_operator_needing_term =
false;
3431 last_was_operator =
true;
3434 just_had_operator_needing_term:
3435 last_was_operator_needing_term =
true;
3436 last_was_operator =
true;
3438 if (mode == IN_PHRASED_TERM) mode = DEFAULT;
3443 if (it == end)
break;
3447 (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2)) {
3456 if (ch ==
'.' && *
p ==
'.') {
3462 a.resize(a.size() - 1);
3466 if (!a.empty() || (
p != end && *
p >
' ' && *
p !=
')')) {
3470 while (
p != end && *
p >
' ' && *
p !=
')') {
3475 state.
error =
"Unknown range operation";
3476 if (a.find(
':', 1) == string::npos) {
3492 if (ch <=
' ' || ch ==
'(')
break;
3498 unsigned prev = newprev;
3500 unsigned ch = *it++;
3503 if (mode == IN_GROUP || mode == IN_GROUP2)
3510 if (mode == DEFAULT) {
3527 if (flags & QueryParser::FLAG_PHRASE) {
3528 if (ch ==
'"' && it != end && *it ==
'"') {
3535 if (mode == DEFAULT) {
3539 if (mode == IN_PREFIXED_QUOTES)
3540 prefix_stack.pop_back();
3548 if (it == end)
goto done;
3549 if (prev >
' ' && prev !=
'(') {
3560 if (mode == DEFAULT && (flags & FLAG_LOVEHATE)) {
3564 }
else if (last_was_operator) {
3569 Parse(&parser, token, NULL, &state);
3570 goto just_had_operator_needing_term;
3580 if (it == end)
goto done;
3581 if (prev >
' ' && strchr(
"()+-", prev) == NULL) {
3590 if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3591 prefix_stack.push_back(prefix_stack.back());
3597 if (mode == DEFAULT && (flags & FLAG_BOOLEAN)) {
3601 if (prefix_stack.size() > 1) prefix_stack.pop_back();
3608 if (it == end)
goto done;
3609 if (mode == DEFAULT && (flags & FLAG_SYNONYM)) {
3610 if (prev >
' ' && strchr(
"+-(", prev) == NULL) {
3619 mode = EXPLICIT_SYNONYM;
3621 goto just_had_operator_needing_term;
3645 if (flags & FLAG_WILDCARD_MULTI) {
3647 goto leading_wildcard;
3651 if (flags & FLAG_WILDCARD_SINGLE) {
3653 goto leading_wildcard;
3664 size_t term_start_index = it.raw() - qs.data();
3670 if ((mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2 || mode == EXPLICIT_SYNONYM) &&
3671 !field_map.empty()) {
3674 if (
p != end && *
p ==
':' && ++
p != end && *
p >
' ' && *
p !=
')') {
3679 auto f = field_map.find(field);
3680 if (f != field_map.end()) {
3684 field_info = &(f->second);
3688 if (mode == IN_GROUP || mode == IN_GROUP2)
3694 bool fancy = (*it !=
'"');
3699 if (++it == end || *it !=
'"')
3716 while (it != end && *it >
' ' && *it !=
')')
3724 Term * token =
new Term(&state,
name, field_info, field);
3731 mode = IN_PREFIXED_QUOTES;
3736 prefix_stack.push_back(field_info);
3740 if (ch ==
'(' && (flags & FLAG_BOOLEAN)) {
3747 prefix_stack.push_back(field_info);
3775 bool needs_word_break =
false;
3776 size_t first_wildcard = string::npos;
3777 size_t term_char_count;
3779 string term = parse_term(it, end, try_word_break, flags,
3780 needs_word_break, was_acronym, first_wildcard,
3781 term_char_count, edit_distance);
3783 if (first_wildcard == string::npos &&
3785 (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) &&
3786 (flags & FLAG_BOOLEAN) &&
3793 if (flags & FLAG_BOOLEAN_ANY_CASE) {
3794 for (string::iterator i = op.begin(); i != op.end(); ++i) {
3798 if (op.size() == 3) {
3801 goto just_had_operator;
3805 goto just_had_operator;
3809 goto just_had_operator;
3812 if (it != end && *it ==
'/') {
3816 width = (width * 10) + (*
p -
'0');
3821 goto just_had_operator;
3825 goto just_had_operator;
3830 goto just_had_operator;
3832 }
else if (op.size() == 2) {
3834 Parse(&parser,
OR, NULL, &state);
3835 goto just_had_operator;
3837 }
else if (op.size() == 4) {
3839 if (it != end && *it ==
'/') {
3843 width = (width * 10) + (*
p -
'0');
3848 goto just_had_operator;
3852 goto just_had_operator;
3859 if (!field_info) field_info = prefix_stack.back();
3864 string unstemmed_term(
term);
3870 if (stem_term != STEM_NONE) {
3872 stem_term = STEM_NONE;
3873 }
else if (first_wildcard != string::npos ||
3875 stem_term = STEM_NONE;
3876 }
else if (stem_term == STEM_SOME ||
3877 stem_term == STEM_SOME_FULL_POS) {
3880 stem_term = STEM_NONE;
3883 stem_term = STEM_NONE;
3888 if (first_wildcard != string::npos) {
3890 errmsg =
"Too few characters before wildcard";
3896 unstemmed_term, stem_term, term_pos++,
3899 if (first_wildcard != string::npos ||
3901 if (mode == IN_GROUP || mode == IN_GROUP2) {
3904 if (mode == IN_GROUP2)
3915 if (needs_word_break) {
3918 if (mode == IN_GROUP || mode == IN_GROUP2)
3920 if (it == end)
break;
3924 if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
3925 if (it == end && (flags & FLAG_PARTIAL)) {
3927 if (term_char_count >= min_len) {
3928 if (mode == IN_GROUP || mode == IN_GROUP2) {
3931 if (mode == IN_GROUP2)
3945 if ((flags & FLAG_SPELLING_CORRECTION) && !was_acronym) {
3946 const auto& prefixes = field_info->
prefixes;
3947 for (
const string& prefix : prefixes) {
3948 if (!prefix.empty())
3950 const string & suggest = db.get_spelling_suggestion(
term);
3951 if (!suggest.empty()) {
3952 if (corrected_query.empty()) corrected_query = qs;
3953 size_t term_end_index = it.raw() - qs.data();
3954 size_t n = term_end_index - term_start_index;
3956 corrected_query.replace(
pos, n, suggest);
3964 if (mode == IN_PHRASED_TERM) {
3969 if ((mode == IN_GROUP || mode == IN_GROUP2) &&
3984 if (mode == IN_GROUP || mode == IN_GROUP2) {
3988 Parse(&parser, token, term_obj, &state);
3989 if (token ==
TERM && mode != DEFAULT)
3994 if (it == end)
break;
4004 mode = IN_PHRASED_TERM;
4005 term_start_index = it.raw() - qs.data();
4008 }
else if (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2) {
4009 int old_mode = mode;
4011 if (!last_was_operator_needing_term &&
is_whitespace(*it)) {
4020 if (old_mode == IN_GROUP || old_mode == IN_GROUP2) {
4032 if (mode == IN_QUOTES || mode == IN_PREFIXED_QUOTES)
4036 while (prefix_stack.size() > 1) {
4038 prefix_stack.pop_back();
4040 Parse(&parser, 0, NULL, &state);
4043 errmsg = state.
error;
4047 #line 4048 "queryparser/queryparser_internal.cc"
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Iterator returning unigrams and bigrams.
Parser State shared between the lexer and the parser.
unsigned get_min_partial_prefix_len() const
State(QueryParser::Internal *qpi_, unsigned flags_)
Query::op default_op() const
Xapian::termcount get_max_partial_expansion() const
Term * range(const string &a, const string &b)
Database get_database() const
Xapian::termcount get_max_fuzzy_expansion() const
int get_max_partial_type() const
void add_to_stoplist(const Term *term)
void stoplist_resize(size_t s)
int get_max_wildcard_type() const
const Stopper * get_stopper() const
unsigned get_min_wildcard_prefix_len() const
int get_max_fuzzy_type() const
Query::op effective_default_op
QueryParser::Internal * qpi
Xapian::termcount get_max_wildcard_expansion() const
QueryParser::stop_strategy get_stopper_strategy() const
void add_to_unstem(const string &term, const string &unstemmed)
size_t stoplist_size() const
string stem_term(const string &term)
bool is_stopword(const Term *term) const
unsigned int should_stem_mask
bool operator!=(const SynonymIterator &o) const
bool operator==(const SynonymIterator &o) const
const Xapian::Query operator*() const
SynonymIterator(const Xapian::TermIterator &i_, Xapian::termpos pos_=0, const Xapian::Query *first_=NULL)
SynonymIterator & operator++()
const Xapian::Query * first
std::input_iterator_tag iterator_category
Xapian::termcount_diff difference_type
Xapian::Query & reference
A group of terms separated only by whitespace.
Query * as_group(State *state) const
Convert to a Xapian::Query * using default_op.
TermGroup(Term *t1, Term *t2)
static TermGroup * create(Term *t1, Term *t2)
Factory function - ensures heap allocation.
void add_term(Term *term)
Add a Term object to this TermGroup object.
bool empty_ok
Controls how to handle a group where all terms are stopwords.
void set_empty_ok()
Set the empty_ok flag.
Class used to pass information about a token from lexer to parser.
string get_grouping() const
Term(const string &name_, termpos pos_)
QueryParser::stem_strategy stem
const FieldInfo * field_info
Term(const string &name_)
string make_term(const string &prefix) const
Query get_query_with_synonyms() const
void as_positional_unbroken(Terms *terms) const
Handle text without explicit word breaks in a positional context.
Query * as_fuzzy_query(State *state) const
Term(const string &name_, const FieldInfo *field_info_)
Term(const Xapian::Query &q, const string &grouping)
Query * as_partial_query(State *state_) const
Build a query for a term at the very end of the query string when FLAG_PARTIAL is in use.
Query * as_wildcarded_query(State *state) const
Query get_query_with_auto_synonyms() const
Query as_range_query() const
Range query.
Query * as_unbroken_query() const
Build a query for a string of words without explicit word breaks.
Term(State *state_, const string &name_, const FieldInfo *field_info_, const string &unstemmed_, QueryParser::stem_strategy stem_=QueryParser::STEM_NONE, termpos pos_=0, unsigned edit_distance_=NO_EDIT_DISTANCE)
termpos get_termpos() const
Some terms which form a positional sub-query.
Query * as_adj_query() const
Convert to a Xapian::Query * using OP_PHRASE to implement ADJ.
Query * as_opwindow_query(Query::op op, Xapian::termcount w_delta) const
Convert to a query using the given operator and window size.
size_t window
Window size.
static Terms * create(State *state)
Factory function - ensures heap allocation.
bool uniform_prefixes
Keep track of whether the terms added all have the same list of prefixes.
const vector< string > * prefixes
The list of prefixes of the terms added.
Query * as_near_query() const
Convert to a Xapian::Query * using OP_NEAR.
void adjust_window(size_t alternative_window)
Query * as_phrase_query() const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
Query opwindow_subq(Query::op op, const vector< Query > &v, Xapian::termcount w) const
void add_positional_term(Term *term)
Add an unstemmed Term object to this Terms object.
Query * as_synonym_phrase_query(State *state) const
Convert to a Xapian::Query * using adjacent OP_PHRASE.
An indexed database of documents.
Xapian::TermIterator synonym_keys_begin(std::string_view prefix={}) const
An iterator which returns all terms which have synonyms.
Xapian::TermIterator synonyms_end(std::string_view) const noexcept
End iterator corresponding to synonyms_begin(term).
Xapian::TermIterator synonym_keys_end(std::string_view={}) const noexcept
End iterator corresponding to synonym_keys_begin(prefix).
Xapian::TermIterator synonyms_begin(std::string_view term) const
An iterator which returns all the synonyms for a given term.
Indicates an attempt to use a feature which is unavailable.
Base class for field processors.
Xapian::valueno get_slot() const
InvalidOperationError indicates the API was used in an invalid way.
Xapian::termcount max_fuzzy_expansion
Xapian::Internal::opt_intrusive_ptr< const Stopper > stopper
unsigned min_partial_prefix_len
std::list< std::string > stoplist
Xapian::termcount max_wildcard_expansion
std::multimap< std::string, std::string, std::less<> > unstem
unsigned min_wildcard_prefix_len
std::list< RangeProc > rangeprocs
Xapian::termcount max_partial_expansion
Build a Xapian::Query object from a user query string.
stop_strategy
Stopper strategies, for use with set_stopper_strategy().
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Class representing a query.
op get_type() const noexcept
Get the type of the top level of the query.
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
@ OP_NEAR
Match only documents where all subqueries match near each other.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
@ LEAF_TERM
Value returned by get_type() for a term.
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
@ OP_INVALID
Construct an invalid query.
bool empty() const noexcept
Check if this query is Xapian::Query::MatchNothing.
Xapian::Internal::intrusive_ptr< Internal > internal
Xapian::Internal::intrusive_ptr< StemImplementation > internal
bool is_none() const
Return true if this is a no-op stemmer.
Abstract base class for stop-word decision functor.
Class for iterating over a list of terms.
void skip_to(std::string_view term)
Advance the iterator to term term.
UnimplementedError indicates an attempt to use an unimplemented feature.
An iterator which returns Unicode character values from a UTF-8 encoded string.
const char * raw() const
Return the raw const char* pointer for the current position.
#define UNSIGNED_OVERFLOW_OK(X)
Hierarchy of classes which Xapian can throw as exceptions.
string str(int value)
Convert int to std::string.
category get_category(int info)
void append_utf8(std::string &s, unsigned ch)
Append the UTF-8 representation of a single Unicode character to a std::string.
unsigned tolower(unsigned ch)
Convert a Unicode character to lowercase.
@ LOWERCASE_LETTER
Letter, lowercase (Ll)
@ MODIFIER_LETTER
Letter, modifier (Lm)
@ OTHER_LETTER
Letter, other (Lo)
@ DECIMAL_DIGIT_NUMBER
Number, decimal digit (Nd)
@ TITLECASE_LETTER
Letter, titlecase (Lt)
@ UPPERCASE_LETTER
Letter, uppercase (Lu)
bool is_wordchar(unsigned ch)
Test if a given Unicode character is "word character".
bool is_currency(unsigned ch)
Test if a given Unicode character is a currency symbol.
bool is_whitespace(unsigned ch)
Test if a given Unicode character is a whitespace character.
The Xapian namespace contains public interfaces for the Xapian library.
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Various assertion macros.
static void yy_pop_parser_stack(yyParser *pParser)
bool is_not_whitespace(unsigned ch)
const unsigned UNICODE_IGNORE
Value representing "ignore this" when returned by check_infix() or check_infix_digit().
#define VET_BOOL_ARGS(A, B, OP_TXT)
static const YYCODETYPE yy_lookahead[]
static unsigned int yy_find_shift_action(yyParser *pParser, YYCODETYPE iLookAhead)
bool is_extended_wildcard(unsigned ch, unsigned flags)
bool should_stem(const string &term, const State &state)
static const YYACTIONTYPE yy_action[]
static void yy_syntax_error(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor)
static const YYACTIONTYPE yy_default[]
static void add_to_query(Query *&q, Query::op op, Query *term)
bool U_isupper(unsigned ch)
#define YY_MIN_SHIFTREDUCE
static const unsigned short int yy_shift_ofst[]
static constexpr unsigned NO_EDIT_DISTANCE
bool is_not_wordchar(unsigned ch)
bool is_digit(unsigned ch)
static void yy_shift(yyParser *yypParser, int yyNewState, int yyMajor, ParseTOKENTYPE yyMinor)
unsigned check_infix_digit(unsigned ch)
bool is_suffix(unsigned ch)
static const short yy_reduce_ofst[]
static const struct @17 yyRuleInfo[]
static void yy_accept(yyParser *)
static int yy_find_reduce_action(int stateno, YYCODETYPE iLookAhead)
bool U_isdigit(unsigned ch)
bool is_stem_preventer(unsigned ch)
bool is_positional(Xapian::Query::op op)
static constexpr unsigned DEFAULT_EDIT_DISTANCE
static void ParseFinalize(yyParser *pParser)
bool is_phrase_generator(unsigned ch)
unsigned check_infix(unsigned ch)
#define YY_MAX_SHIFTREDUCE
#define yyTraceShift(X, Y, Z)
bool U_isalpha(unsigned ch)
bool prefix_needs_colon(const string &prefix, unsigned ch)
static void yy_parse_failed(yyParser *)
bool is_double_quote(unsigned ch)
static void ParseInit(yyParser *pParser)
static void Parse(yyParser *yypParser, int yymajor, ParseTOKENTYPE yyminor ParseARG_PDECL)
static void yy_reduce(yyParser *yypParser, unsigned int yyruleno, int yyLookahead, ParseTOKENTYPE yyLookaheadToken)
static void yy_destructor(yyParser *yypParser, YYCODETYPE yymajor, YYMINORTYPE *yypminor)
The non-lemon-generated parts of the QueryParser class.
static Xapian::Stem stemmer
Convert types to std::string.
Various handy string-related helpers.
bool startswith(std::string_view s, char pfx)
Information about how to handle a field prefix in the query string.
std::vector< std::string > prefixes
Field prefix strings.
filter_type type
The type of this field.
void append_filter(const string &grouping, const Query &qnew)
Query merge_filters() const
void add_filter_range(const string &grouping, const Query &range)
void append_filter_range(const string &grouping, const Query &range)
map< string, Query > filter
void add_filter(const string &grouping, const Query &q)
ParseARG_SDECL vector< yyStackEntry > yystack
yyStackEntry(YYACTIONTYPE stateno_, YYCODETYPE major_, ParseTOKENTYPE minor_)
Unicode and UTF-8 related classes and functions.
bool is_unbroken_script(unsigned p)
size_t get_unbroken(Xapian::Utf8Iterator &it)
bool is_ngram_enabled()
Should we use the n-gram code?
Handle text without explicit word breaks.