xapian-core  2.0.0
queryparser.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005-2026 Olly Betts
5  * Copyright (C) 2010 Adam Sjøgren
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_QUERYPARSER_H
23 #define XAPIAN_INCLUDED_QUERYPARSER_H
24 
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/queryparser.h> directly; include <xapian.h> instead.
27 #endif
28 
29 #include <xapian/attributes.h>
30 #include <xapian/intrusive_ptr.h>
31 #include <xapian/query.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/visibility.h>
34 
35 #include <string>
36 #include <string_view>
37 #include <unordered_set>
38 
39 namespace Xapian {
40 
41 class Database;
42 class Stem;
43 
52  void operator=(const Stopper &) = delete;
53 
55  Stopper(const Stopper &) = delete;
56 
57  public:
59  Stopper() { }
60 
65  virtual bool operator()(const std::string & term) const = 0;
66 
68  virtual ~Stopper() { }
69 
71  virtual std::string get_description() const;
72 
81  opt_intrusive_base::release();
82  return this;
83  }
84 
92  const Stopper * release() const {
93  opt_intrusive_base::release();
94  return this;
95  }
96 };
97 
100  std::unordered_set<std::string> stop_words;
101 
102  public:
105 
119  template<class Iterator>
120  SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) { }
121 
123  void add(const std::string & word) { stop_words.insert(word); }
124 
125  virtual bool operator()(const std::string & term) const {
126  return stop_words.find(term) != stop_words.end();
127  }
128 
129  virtual std::string get_description() const;
130 };
131 
132 enum {
136 };
137 
142  void operator=(const RangeProcessor &);
143 
146 
147  protected:
153 
155  std::string str;
156 
168  unsigned flags;
169 
170  public:
172  RangeProcessor() : slot(Xapian::BAD_VALUENO), flags(0) { }
173 
193  std::string_view str_ = {},
194  unsigned flags_ = 0)
195  : slot(slot_), str(str_), flags(flags_) { }
196 
198  virtual ~RangeProcessor();
199 
205  Xapian::Query check_range(const std::string& b, const std::string& e);
206 
222  virtual Xapian::Query
223  operator()(const std::string &begin, const std::string &end);
224 
233  opt_intrusive_base::release();
234  return this;
235  }
236 
244  const RangeProcessor * release() const {
245  opt_intrusive_base::release();
246  return this;
247  }
248 };
249 
256 
257  public:
272  unsigned flags_ = 0,
273  int epoch_year_ = 1970)
274  : RangeProcessor(slot_, {}, flags_),
275  epoch_year(epoch_year_) { }
276 
312  DateRangeProcessor(Xapian::valueno slot_, std::string_view str_,
313  unsigned flags_ = 0, int epoch_year_ = 1970)
314  : RangeProcessor(slot_, str_, flags_),
315  epoch_year(epoch_year_) { }
316 
351  Xapian::Query operator()(const std::string& begin, const std::string& end);
352 };
353 
362  public:
402  std::string_view str_ = {},
403  unsigned flags_ = 0)
404  : RangeProcessor(slot_, str_, flags_) { }
405 
418  Xapian::Query operator()(const std::string& begin, const std::string& end);
419 };
420 
431  public:
447  std::string_view str_ = {})
448  : RangeProcessor(slot_, str_) { }
449 
462  Xapian::Query operator()(const std::string& begin, const std::string& end);
463 };
464 
470  void operator=(const FieldProcessor &);
471 
474 
475  public:
478 
480  virtual ~FieldProcessor();
481 
488  virtual Xapian::Query operator()(const std::string &str) = 0;
489 
498  opt_intrusive_base::release();
499  return this;
500  }
501 
509  const FieldProcessor * release() const {
510  opt_intrusive_base::release();
511  return this;
512  }
513 };
514 
517  public:
519  class Internal;
522 
524  typedef enum {
526  FLAG_BOOLEAN = 1,
528  FLAG_PHRASE = 2,
530  FLAG_LOVEHATE = 4,
532  FLAG_BOOLEAN_ANY_CASE = 8,
549  FLAG_WILDCARD = 16,
556  FLAG_PURE_NOT = 32,
577  FLAG_PARTIAL = 64,
578 
592  FLAG_SPELLING_CORRECTION = 128,
593 
598  FLAG_SYNONYM = 256,
599 
604  FLAG_AUTO_SYNONYMS = 512,
605 
611  FLAG_AUTO_MULTIWORD_SYNONYMS = 1024,
612 
635  FLAG_NGRAMS = 2048,
636 
644  FLAG_CJK_NGRAM = FLAG_NGRAMS,
645 
658  FLAG_WORD_BREAKS = 4096,
659 
673  FLAG_WILDCARD_MULTI = 8192,
674 
688  FLAG_WILDCARD_SINGLE = 16384,
689 
699  FLAG_WILDCARD_GLOB = FLAG_WILDCARD_MULTI | FLAG_WILDCARD_SINGLE,
700 
711  FLAG_FUZZY = 32768,
712 
726  FLAG_ACCUMULATE = 65536,
727 
737  FLAG_NO_POSITIONS = 0x20000,
738 
775  FLAG_NO_PROPER_NOUN_HEURISTIC = 0x40000,
776 
784  FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE
785  } feature_flag;
786 
788  typedef enum {
789  STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS
790  } stem_strategy;
791 
796  typedef enum { STOP_NONE, STOP_ALL, STOP_STEMMED } stop_strategy;
797 
800 
803 
806 
809 
811  QueryParser();
812 
814  ~QueryParser();
815 
828  void set_stemmer(const Xapian::Stem & stemmer);
829 
853  void set_stemming_strategy(stem_strategy strategy);
854 
860  void set_stopper(const Stopper *stop = NULL);
861 
885  void set_stopper_strategy(stop_strategy strategy);
886 
901  void set_default_op(Query::op default_op);
902 
904  Query::op get_default_op() const;
905 
912  void set_database(const Database &db);
913 
941  void set_max_expansion(Xapian::termcount max_expansion,
943  unsigned flags = FLAG_WILDCARD |
944  FLAG_PARTIAL |
945  FLAG_FUZZY);
946 
970  void set_min_wildcard_prefix(unsigned min_prefix_len,
971  unsigned flags = FLAG_WILDCARD|FLAG_PARTIAL);
972 
996  Query parse_query(std::string_view query_string,
997  unsigned flags = FLAG_DEFAULT,
998  std::string_view default_prefix = {});
999 
1044  void add_prefix(std::string_view field, std::string_view prefix);
1045 
1048  void add_prefix(std::string_view field, Xapian::FieldProcessor* proc);
1049 
1109  void add_boolean_prefix(std::string_view field, std::string_view prefix,
1110  const std::string* grouping = NULL);
1111 
1134  void add_boolean_prefix(std::string_view field, std::string_view prefix,
1135  bool exclusive) {
1136  if (exclusive) {
1137  add_boolean_prefix(field, prefix);
1138  } else {
1139  std::string empty_grouping;
1140  add_boolean_prefix(field, prefix, &empty_grouping);
1141  }
1142  }
1143 
1146  void add_boolean_prefix(std::string_view field,
1147  Xapian::FieldProcessor* proc,
1148  const std::string* grouping = NULL);
1149 
1155  void add_boolean_prefix(std::string_view field,
1156  Xapian::FieldProcessor* proc,
1157  bool exclusive) {
1158  if (exclusive) {
1159  add_boolean_prefix(field, proc);
1160  } else {
1161  std::string empty_grouping;
1162  add_boolean_prefix(field, proc, &empty_grouping);
1163  }
1164  }
1165 
1167  TermIterator stoplist_begin() const;
1168 
1170  TermIterator stoplist_end() const noexcept {
1171  return TermIterator();
1172  }
1173 
1175  TermIterator unstem_begin(std::string_view term) const;
1176 
1178  TermIterator unstem_end(std::string_view) const noexcept {
1179  return TermIterator();
1180  }
1181 
1183  void add_rangeprocessor(Xapian::RangeProcessor * range_proc,
1184  const std::string* grouping = NULL);
1185 
1193  std::string get_corrected_query_string() const;
1194 
1196  std::string get_description() const;
1197 };
1198 
1201 size_t sortable_serialise_(double value, char* buf) noexcept;
1202 
1229 inline std::string sortable_serialise(double value) {
1230  char buf[9];
1231  return std::string(buf, sortable_serialise_(value, buf));
1232 }
1233 
1249 double sortable_unserialise(std::string_view serialised) noexcept;
1250 
1251 }
1252 
1253 #endif // XAPIAN_INCLUDED_QUERYPARSER_H
Compiler attribute macros.
An indexed database of documents.
Definition: database.h:75
Handle a date range.
Definition: queryparser.h:254
DateRangeProcessor(Xapian::valueno slot_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:271
DateRangeProcessor(Xapian::valueno slot_, std::string_view str_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:312
Base class for field processors.
Definition: queryparser.h:468
FieldProcessor * release()
Start reference counting this object.
Definition: queryparser.h:497
const FieldProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:509
FieldProcessor(const FieldProcessor &)
Don't allow copying.
void operator=(const FieldProcessor &)
Don't allow assignment.
FieldProcessor()
Default constructor.
Definition: queryparser.h:477
virtual Xapian::Query operator()(const std::string &str)=0
Convert a field-prefixed string to a Query object.
Base class for objects managed by opt_intrusive_ptr.
Handle a number range.
Definition: queryparser.h:361
NumberRangeProcessor(Xapian::valueno slot_, std::string_view str_={}, unsigned flags_=0)
Constructor.
Definition: queryparser.h:401
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:516
void add_boolean_prefix(std::string_view field, std::string_view prefix, bool exclusive)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
Definition: queryparser.h:1134
TermIterator unstem_end(std::string_view) const noexcept
End iterator over unstemmed forms of the given stemmed query term.
Definition: queryparser.h:1178
QueryParser & operator=(QueryParser &&o)
Move assignment operator.
QueryParser(QueryParser &&o)
Move constructor.
void add_boolean_prefix(std::string_view field, Xapian::FieldProcessor *proc, bool exclusive)
Register a FieldProcessor for a boolean prefix.
Definition: queryparser.h:1155
TermIterator stoplist_end() const noexcept
End iterator over terms omitted from the query as stopwords.
Definition: queryparser.h:1170
QueryParser & operator=(const QueryParser &o)
Assignment.
stop_strategy
Stopper strategies, for use with set_stopper_strategy().
Definition: queryparser.h:796
QueryParser(const QueryParser &o)
Copy constructor.
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:788
Class representing a query.
Definition: query.h:45
op
Query operators.
Definition: query.h:78
@ WILDCARD_LIMIT_ERROR
Throw an error if OP_WILDCARD exceeds its expansion limit.
Definition: query.h:305
Base class for range processors.
Definition: queryparser.h:140
unsigned flags
Flags.
Definition: queryparser.h:168
RangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:232
RangeProcessor(const RangeProcessor &)
Don't allow copying.
RangeProcessor(Xapian::valueno slot_, std::string_view str_={}, unsigned flags_=0)
Constructor.
Definition: queryparser.h:192
void operator=(const RangeProcessor &)
Don't allow assignment.
const RangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:244
std::string str
The prefix (or suffix with RP_SUFFIX) string to look for.
Definition: queryparser.h:155
Xapian::valueno slot
The value slot to process.
Definition: queryparser.h:152
RangeProcessor()
Default constructor.
Definition: queryparser.h:172
Simple implementation of Stopper class - this will suit most users.
Definition: queryparser.h:99
void add(const std::string &word)
Add a single stop word.
Definition: queryparser.h:123
SimpleStopper()
Default constructor.
Definition: queryparser.h:104
SimpleStopper(Iterator begin, Iterator end)
Initialise from a pair of iterators.
Definition: queryparser.h:120
std::unordered_set< std::string > stop_words
Definition: queryparser.h:100
virtual bool operator()(const std::string &term) const
Is term a stop-word?
Definition: queryparser.h:125
Class representing a stemming algorithm.
Definition: stem.h:74
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
virtual bool operator()(const std::string &term) const =0
Is term a stop-word?
const Stopper * release() const
Start reference counting this object.
Definition: queryparser.h:92
void operator=(const Stopper &)=delete
Don't allow assignment.
Stopper(const Stopper &)=delete
Don't allow copying.
Stopper()
Default constructor.
Definition: queryparser.h:59
Stopper * release()
Start reference counting this object.
Definition: queryparser.h:80
virtual ~Stopper()
Class has virtual methods, so provide a virtual destructor.
Definition: queryparser.h:68
Class for iterating over a list of terms.
Definition: termiterator.h:41
Handle a byte unit range.
Definition: queryparser.h:430
UnitRangeProcessor(Xapian::valueno slot_, std::string_view str_={})
Constructor.
Definition: queryparser.h:446
string term
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:100
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1229
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
size_t sortable_serialise_(double value, char *buf) noexcept
@ RP_DATE_PREFER_MDY
Definition: queryparser.h:135
@ RP_REPEATED
Definition: queryparser.h:134
@ RP_SUFFIX
Definition: queryparser.h:133
double sortable_unserialise(std::string_view serialised) noexcept
Convert a string encoded using sortable_serialise back to a floating point number.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
Xapian::Query API class.
static Xapian::Stem stemmer
Definition: stemtest.cc:42
Class for iterating over a list of terms.
Define XAPIAN_VISIBILITY_* macros.
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28