xapian-core  2.0.0
queryparser.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005-2026 Olly Betts
5  * Copyright (C) 2010 Adam Sjøgren
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "xapian/error.h"
25 #include <xapian/queryparser.h>
26 #include <xapian/termiterator.h>
27 
28 #include "api/vectortermlist.h"
29 #include "queryparser_internal.h"
30 
31 #include <cstring>
32 
33 using namespace Xapian;
34 
35 using namespace std;
36 
37 // Default implementation in case the user hasn't implemented it.
38 string
40 {
41  return "Xapian::Stopper subclass";
42 }
43 
44 string
46 {
47  string desc("Xapian::SimpleStopper(");
48  for (auto i = stop_words.begin(); i != stop_words.end(); ++i) {
49  if (i != stop_words.begin()) desc += ' ';
50  desc += *i;
51  }
52  desc += ')';
53  return desc;
54 }
55 
57 
59 
60 QueryParser::QueryParser(const QueryParser &) = default;
61 
63 QueryParser::operator=(const QueryParser &) = default;
64 
66 
69 
71 
73 
74 void
76 {
77  internal->stemmer = stemmer;
78 }
79 
80 void
82 {
83  internal->stem_action = strategy;
84 }
85 
86 void
88 {
89  internal->stopper = stopper;
90 }
91 
92 void
94 {
95  internal->stop_mode = strategy;
96 }
97 
98 void
100 {
101  switch (default_op) {
102  case Query::OP_AND:
103  case Query::OP_OR:
104  case Query::OP_NEAR:
105  case Query::OP_PHRASE:
106  case Query::OP_ELITE_SET:
107  case Query::OP_SYNONYM:
108  case Query::OP_MAX:
109  // These are OK.
110  break;
111  default:
113  "QueryParser::set_default_op() only accepts "
114  "OP_AND"
115  ", "
116  "OP_OR"
117  ", "
118  "OP_NEAR"
119  ", "
120  "OP_PHRASE"
121  ", "
122  "OP_ELITE_SET"
123  ", "
124  "OP_SYNONYM"
125  " or "
126  "OP_MAX");
127  }
128  internal->default_op = default_op;
129 }
130 
131 Query::op
133 {
134  return internal->default_op;
135 }
136 
137 void
139  internal->db = db;
140 }
141 
142 void
144  int max_type,
145  unsigned flags)
146 {
147  if (flags & FLAG_WILDCARD) {
148  internal->max_wildcard_expansion = max_expansion;
149  internal->max_wildcard_type = max_type;
150  }
151  if (flags & FLAG_PARTIAL) {
152  internal->max_partial_expansion = max_expansion;
153  internal->max_partial_type = max_type;
154  }
155  if (flags & FLAG_FUZZY) {
156  internal->max_fuzzy_expansion = max_expansion;
157  internal->max_fuzzy_type = max_type;
158  }
159 }
160 
161 void
162 QueryParser::set_min_wildcard_prefix(unsigned min_prefix_len,
163  unsigned flags)
164 {
165  if (flags & FLAG_WILDCARD) {
166  internal->min_wildcard_prefix_len = min_prefix_len;
167  }
168  if (flags & FLAG_PARTIAL) {
169  internal->min_partial_prefix_len = min_prefix_len;
170  }
171 }
172 
173 Query
174 QueryParser::parse_query(string_view query_string, unsigned flags,
175  string_view default_prefix)
176 {
177  if (!(flags & FLAG_ACCUMULATE)) {
178  internal->stoplist.clear();
179  internal->unstem.clear();
180  }
181  internal->errmsg = NULL;
182 
183  if (query_string.empty()) return Query();
184 
185  Query result = internal->parse_query(query_string, flags, default_prefix);
186  if (internal->errmsg && strcmp(internal->errmsg, "parse error") == 0) {
187  flags &=
188  FLAG_NGRAMS |
192  result = internal->parse_query(query_string, flags, default_prefix);
193  }
194 
196  return result;
197 }
198 
199 void
200 QueryParser::add_prefix(string_view field, string_view prefix)
201 {
202  internal->add_prefix(field, prefix);
203 }
204 
205 void
207 {
208  internal->add_prefix(field, proc);
209 }
210 
211 void
212 QueryParser::add_boolean_prefix(string_view field, string_view prefix,
213  const string* grouping)
214 {
215  internal->add_boolean_prefix(field, prefix, grouping);
216 }
217 
218 void
220  Xapian::FieldProcessor * proc,
221  const string* grouping)
222 {
223  internal->add_boolean_prefix(field, proc, grouping);
224 }
225 
228 {
229  const list<string> & sl = internal->stoplist;
230  return TermIterator(new VectorTermList(sl.begin(), sl.end()));
231 }
232 
234 QueryParser::unstem_begin(string_view term) const
235 {
236  using unstem_type = multimap<string, string, std::less<>>;
237  struct range_adaptor : public unstem_type::iterator {
238  range_adaptor(unstem_type::iterator i) : unstem_type::iterator(i) {}
239  const string & operator*() const { return (*this)->second; }
240  };
241  auto range = internal->unstem.equal_range(term);
242  return TermIterator(new VectorTermList(range_adaptor(range.first),
243  range_adaptor(range.second)));
244 }
245 
246 void
248  const std::string* grouping)
249 {
250  internal->rangeprocs.push_back(RangeProc(range_proc, grouping));
251 }
252 
253 string
255 {
256  return internal->corrected_query;
257 }
258 
259 string
261 {
262  // FIXME : describe better!
263  return "Xapian::QueryParser()";
264 }
This class stores a list of terms.
An indexed database of documents.
Definition: database.h:75
Base class for field processors.
Definition: queryparser.h:468
virtual ~FieldProcessor()
Destructor.
Definition: queryparser.cc:58
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
Indicates a query string can't be parsed.
Definition: error.h:875
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:516
void set_database(const Database &db)
Specify the database being searched.
Definition: queryparser.cc:138
void add_rangeprocessor(Xapian::RangeProcessor *range_proc, const std::string *grouping=NULL)
Register a RangeProcessor.
Definition: queryparser.cc:247
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
Definition: queryparser.cc:75
QueryParser()
Default constructor.
Definition: queryparser.cc:70
void set_min_wildcard_prefix(unsigned min_prefix_len, unsigned flags=FLAG_WILDCARD|FLAG_PARTIAL)
Specify minimum length for fixed initial portion in wildcard patterns.
Definition: queryparser.cc:162
~QueryParser()
Destructor.
Definition: queryparser.cc:72
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
Definition: queryparser.cc:81
void add_boolean_prefix(std::string_view field, std::string_view prefix, const std::string *grouping=NULL)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
Definition: queryparser.cc:212
std::string get_description() const
Return a string describing this object.
Definition: queryparser.cc:260
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
Definition: queryparser.h:519
Query::op get_default_op() const
Get the current default operator.
Definition: queryparser.cc:132
void set_max_expansion(Xapian::termcount max_expansion, int max_type=Xapian::Query::WILDCARD_LIMIT_ERROR, unsigned flags=FLAG_WILDCARD|FLAG_PARTIAL|FLAG_FUZZY)
Specify the maximum expansion of a wildcard and/or partial and/or fuzzy term.
Definition: queryparser.cc:143
void set_stopper_strategy(stop_strategy strategy)
Set the stopper strategy.
Definition: queryparser.cc:93
void add_prefix(std::string_view field, std::string_view prefix)
Add a free-text field term prefix.
Definition: queryparser.cc:200
TermIterator unstem_begin(std::string_view term) const
Begin iterator over unstemmed forms of the given stemmed query term.
Definition: queryparser.cc:234
QueryParser & operator=(const QueryParser &o)
Assignment.
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
Definition: queryparser.cc:174
void set_default_op(Query::op default_op)
Set the default operator.
Definition: queryparser.cc:99
void set_stopper(const Stopper *stop=NULL)
Set the stopper.
Definition: queryparser.cc:87
std::string get_corrected_query_string() const
Get the spelling-corrected query string.
Definition: queryparser.cc:254
stop_strategy
Stopper strategies, for use with set_stopper_strategy().
Definition: queryparser.h:796
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:788
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
Definition: queryparser.h:635
@ FLAG_ACCUMULATE
Accumulate unstem and stoplist results.
Definition: queryparser.h:726
@ FLAG_NO_PROPER_NOUN_HEURISTIC
Turn off special handling of capitalised words.
Definition: queryparser.h:775
@ FLAG_FUZZY
Support fuzzy matching.
Definition: queryparser.h:711
@ FLAG_WORD_BREAKS
Find word breaks for text in scripts without explicit word breaks.
Definition: queryparser.h:658
@ FLAG_WILDCARD
Support wildcards.
Definition: queryparser.h:549
@ FLAG_NO_POSITIONS
Produce a query which doesn't use positional information.
Definition: queryparser.h:737
@ FLAG_PARTIAL
Enable partial matching.
Definition: queryparser.h:577
TermIterator stoplist_begin() const
Begin iterator over terms omitted from the query as stopwords.
Definition: queryparser.cc:227
Class representing a query.
Definition: query.h:45
op
Query operators.
Definition: query.h:78
@ OP_MAX
Pick the maximum weight of any subquery.
Definition: query.h:249
@ OP_NEAR
Match only documents where all subqueries match near each other.
Definition: query.h:140
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:152
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
Base class for range processors.
Definition: queryparser.h:140
virtual ~RangeProcessor()
Destructor.
Definition: queryparser.cc:56
virtual std::string get_description() const
Return a string describing this object.
Definition: queryparser.cc:45
Class representing a stemming algorithm.
Definition: stem.h:74
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
virtual std::string get_description() const
Return a string describing this object.
Definition: queryparser.cc:39
Class for iterating over a list of terms.
Definition: termiterator.h:41
string term
Hierarchy of classes which Xapian can throw as exceptions.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
const Query operator*(double factor, const Query &q)
Scale a Xapian::Query object using OP_SCALE_WEIGHT.
Definition: query.h:827
parsing a user query string to build a Xapian::Query object
The non-lemon-generated parts of the QueryParser class.
static Xapian::Stem stemmer
Definition: stemtest.cc:42
Class for iterating over a list of terms.
A vector-like container of terms which can be iterated.