xapian-core  1.4.26
queryparser.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005-2023 Olly Betts
5  * Copyright (C) 2010 Adam Sjøgren
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #ifndef XAPIAN_INCLUDED_QUERYPARSER_H
24 #define XAPIAN_INCLUDED_QUERYPARSER_H
25 
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/queryparser.h> directly; include <xapian.h> instead.
28 #endif
29 
30 #include <xapian/attributes.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/query.h>
34 #include <xapian/termiterator.h>
35 #include <xapian/visibility.h>
36 
37 #include <set>
38 #include <string>
39 
40 namespace Xapian {
41 
42 class Database;
43 class Stem;
44 
53  void operator=(const Stopper &);
54 
56  Stopper(const Stopper &);
57 
58  public:
60  Stopper() { }
61 
66  virtual bool operator()(const std::string & term) const = 0;
67 
69  virtual ~Stopper() { }
70 
72  virtual std::string get_description() const;
73 
82  opt_intrusive_base::release();
83  return this;
84  }
85 
93  const Stopper * release() const {
94  opt_intrusive_base::release();
95  return this;
96  }
97 };
98 
101  std::set<std::string> stop_words;
102 
103  public:
106 
120  template<class Iterator>
121  SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) { }
122 
124  void add(const std::string & word) { stop_words.insert(word); }
125 
126  virtual bool operator()(const std::string & term) const {
127  return stop_words.find(term) != stop_words.end();
128  }
129 
130  virtual std::string get_description() const;
131 };
132 
133 enum {
137 };
138 
143  void operator=(const RangeProcessor &);
144 
147 
148  protected:
154 
156  std::string str;
157 
169  unsigned flags;
170 
171  public:
173  RangeProcessor() : slot(Xapian::BAD_VALUENO), flags(0) { }
174 
192  const std::string& str_ = std::string(),
193  unsigned flags_ = 0)
194  : slot(slot_), str(str_), flags(flags_) { }
195 
197  virtual ~RangeProcessor();
198 
204  Xapian::Query check_range(const std::string& b, const std::string& e);
205 
221  virtual Xapian::Query
222  operator()(const std::string &begin, const std::string &end);
223 
232  opt_intrusive_base::release();
233  return this;
234  }
235 
243  const RangeProcessor * release() const {
244  opt_intrusive_base::release();
245  return this;
246  }
247 };
248 
255 
256  public:
271  unsigned flags_ = 0,
272  int epoch_year_ = 1970)
273  : RangeProcessor(slot_, std::string(), flags_),
274  epoch_year(epoch_year_) { }
275 
317  DateRangeProcessor(Xapian::valueno slot_, const std::string &str_,
318  unsigned flags_ = 0, int epoch_year_ = 1970)
319  : RangeProcessor(slot_, str_, flags_),
320  epoch_year(epoch_year_) { }
321 
333  Xapian::Query operator()(const std::string& begin, const std::string& end);
334 };
335 
344  public:
384  const std::string &str_ = std::string(),
385  unsigned flags_ = 0)
386  : RangeProcessor(slot_, str_, flags_) { }
387 
400  Xapian::Query operator()(const std::string& begin, const std::string& end);
401 };
402 
407  void operator=(const ValueRangeProcessor &);
408 
411 
412  public:
415 
417  virtual ~ValueRangeProcessor();
418 
433  virtual Xapian::valueno operator()(std::string &begin, std::string &end) = 0;
434 
443  opt_intrusive_base::release();
444  return this;
445  }
446 
454  const ValueRangeProcessor * release() const {
455  opt_intrusive_base::release();
456  return this;
457  }
458 };
459 
467  protected:
470 
472  bool prefix;
473 
475  std::string str;
476 
477  public:
483  : valno(slot_), str() { }
484 
493  StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
494  bool prefix_ = true)
495  : valno(slot_), prefix(prefix_), str(str_) { }
496 
511  Xapian::valueno operator()(std::string &begin, std::string &end);
512 };
513 
523 
524  public:
535  DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_ = false,
536  int epoch_year_ = 1970)
537  : StringValueRangeProcessor(slot_),
538  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
539 
577  DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
578  bool prefix_ = true,
579  bool prefer_mdy_ = false, int epoch_year_ = 1970)
580  : StringValueRangeProcessor(slot_, str_, prefix_),
581  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
582 
583 #ifndef SWIG
584 
628  DateValueRangeProcessor(Xapian::valueno slot_, const char * str_,
629  bool prefix_ = true,
630  bool prefer_mdy_ = false, int epoch_year_ = 1970)
631  : StringValueRangeProcessor(slot_, str_, prefix_),
632  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
633 #endif
634 
650  Xapian::valueno operator()(std::string &begin, std::string &end);
651 };
652 
663  public:
669  : StringValueRangeProcessor(slot_) { }
670 
703  NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
704  bool prefix_ = true)
705  : StringValueRangeProcessor(slot_, str_, prefix_) { }
706 
724  Xapian::valueno operator()(std::string &begin, std::string &end);
725 };
726 
732  void operator=(const FieldProcessor &);
733 
736 
737  public:
740 
742  virtual ~FieldProcessor();
743 
750  virtual Xapian::Query operator()(const std::string &str) = 0;
751 
760  opt_intrusive_base::release();
761  return this;
762  }
763 
771  const FieldProcessor * release() const {
772  opt_intrusive_base::release();
773  return this;
774  }
775 };
776 
779  public:
781  class Internal;
784 
786  typedef enum {
788  FLAG_BOOLEAN = 1,
790  FLAG_PHRASE = 2,
792  FLAG_LOVEHATE = 4,
794  FLAG_BOOLEAN_ANY_CASE = 8,
809  FLAG_WILDCARD = 16,
816  FLAG_PURE_NOT = 32,
837  FLAG_PARTIAL = 64,
838 
852  FLAG_SPELLING_CORRECTION = 128,
853 
858  FLAG_SYNONYM = 256,
859 
864  FLAG_AUTO_SYNONYMS = 512,
865 
871  FLAG_AUTO_MULTIWORD_SYNONYMS = 1024,
872 
895  FLAG_NGRAMS = 2048,
896 
904  FLAG_CJK_NGRAM = FLAG_NGRAMS,
905 
919  FLAG_ACCUMULATE = 65536,
920 
930  FLAG_NO_POSITIONS = 0x20000,
931 
939  FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE
940  } feature_flag;
941 
943  typedef enum {
944  STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS
945  } stem_strategy;
946 
948  QueryParser(const QueryParser & o);
949 
951  QueryParser & operator=(const QueryParser & o);
952 
953 #ifdef XAPIAN_MOVE_SEMANTICS
954  QueryParser(QueryParser && o);
956 
958  QueryParser & operator=(QueryParser && o);
959 #endif
960 
962  QueryParser();
963 
965  ~QueryParser();
966 
979  void set_stemmer(const Xapian::Stem & stemmer);
980 
1004  void set_stemming_strategy(stem_strategy strategy);
1005 
1011  void set_stopper(const Stopper *stop = NULL);
1012 
1027  void set_default_op(Query::op default_op);
1028 
1030  Query::op get_default_op() const;
1031 
1038  void set_database(const Database &db);
1039 
1066  void set_max_expansion(Xapian::termcount max_expansion,
1067  int max_type = Xapian::Query::WILDCARD_LIMIT_ERROR,
1068  unsigned flags = FLAG_WILDCARD|FLAG_PARTIAL);
1069 
1082  XAPIAN_DEPRECATED(void set_max_wildcard_expansion(Xapian::termcount));
1083 
1107  Query parse_query(const std::string &query_string,
1108  unsigned flags = FLAG_DEFAULT,
1109  const std::string &default_prefix = std::string());
1110 
1155  void add_prefix(const std::string& field, const std::string& prefix);
1156 
1159  void add_prefix(const std::string& field, Xapian::FieldProcessor * proc);
1160 
1220  void add_boolean_prefix(const std::string &field, const std::string &prefix,
1221  const std::string* grouping = NULL);
1222 
1245  void add_boolean_prefix(const std::string &field, const std::string &prefix,
1246  bool exclusive) {
1247  if (exclusive) {
1248  add_boolean_prefix(field, prefix);
1249  } else {
1250  std::string empty_grouping;
1251  add_boolean_prefix(field, prefix, &empty_grouping);
1252  }
1253  }
1254 
1257  void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1258  const std::string* grouping = NULL);
1259 
1265  void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1266  bool exclusive) {
1267  if (exclusive) {
1268  add_boolean_prefix(field, proc);
1269  } else {
1270  std::string empty_grouping;
1271  add_boolean_prefix(field, proc, &empty_grouping);
1272  }
1273  }
1274 
1276  TermIterator stoplist_begin() const;
1277 
1279  TermIterator XAPIAN_NOTHROW(stoplist_end() const) {
1280  return TermIterator();
1281  }
1282 
1284  TermIterator unstem_begin(const std::string &term) const;
1285 
1287  TermIterator XAPIAN_NOTHROW(unstem_end(const std::string &) const) {
1288  return TermIterator();
1289  }
1290 
1292  void add_rangeprocessor(Xapian::RangeProcessor * range_proc,
1293  const std::string* grouping = NULL);
1294 
1301 #ifdef __GNUC__
1302 // Avoid deprecation warnings if compiling without optimisation.
1303 # pragma GCC diagnostic push
1304 # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1305 #endif
1306  class ShimRangeProcessor : public RangeProcessor {
1309 
1310  public:
1311  ShimRangeProcessor(Xapian::ValueRangeProcessor * vrp_)
1312  : RangeProcessor(Xapian::BAD_VALUENO), vrp(vrp_) { }
1313 
1315  operator()(const std::string &begin, const std::string &end)
1316  {
1317  std::string b = begin, e = end;
1318  slot = (*vrp)(b, e);
1319  if (slot == Xapian::BAD_VALUENO)
1321  return RangeProcessor::operator()(b, e);
1322  }
1323  };
1324 
1325  add_rangeprocessor((new ShimRangeProcessor(vrproc))->release());
1326 #ifdef __GNUC__
1327 # pragma GCC diagnostic pop
1328 #endif
1329  }
1330 
1338  std::string get_corrected_query_string() const;
1339 
1341  std::string get_description() const;
1342 };
1343 
1344 inline void
1346 {
1347  set_max_expansion(max_expansion,
1349  FLAG_WILDCARD);
1350 }
1351 
1354 size_t XAPIAN_NOTHROW(sortable_serialise_(double value, char * buf));
1355 
1382 inline std::string sortable_serialise(double value) {
1383  char buf[9];
1384  return std::string(buf, sortable_serialise_(value, buf));
1385 }
1386 
1402 double XAPIAN_NOTHROW(sortable_unserialise(const std::string & serialised));
1403 
1404 }
1405 
1406 #endif // XAPIAN_INCLUDED_QUERYPARSER_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Handle a date range.
Definition: queryparser.h:520
Stopper()
Default constructor.
Definition: queryparser.h:60
void set_max_wildcard_expansion(Xapian::termcount)
Specify the maximum expansion of a wildcard.
Definition: queryparser.h:1345
RangeProcessor()
Default constructor.
Definition: queryparser.h:173
DateRangeProcessor(Xapian::valueno slot_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:270
Simple implementation of Stopper class - this will suit most users.
Definition: queryparser.h:100
This class is used to access a database, or a group of databases.
Definition: database.h:68
bool prefix
Whether to look for str as a prefix or suffix.
Definition: queryparser.h:472
Class representing a stemming algorithm.
Definition: stem.h:62
void add_valuerangeprocessor(Xapian::ValueRangeProcessor *vrproc)
Register a ValueRangeProcessor.
Definition: queryparser.h:1300
Xapian::valueno valno
The value slot to process.
Definition: queryparser.h:469
op
Query operators.
Definition: query.h:78
FieldProcessor * release()
Start reference counting this object.
Definition: queryparser.h:759
Compiler attribute macros.
Handle a number range.
Definition: queryparser.h:343
RangeProcessor(Xapian::valueno slot_, const std::string &str_=std::string(), unsigned flags_=0)
Constructor.
Definition: queryparser.h:191
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
Define XAPIAN_DEPRECATED() and related macros.
ValueRangeProcessor()
Default constructor.
Definition: queryparser.h:414
SimpleStopper()
Default constructor.
Definition: queryparser.h:105
SimpleStopper(Iterator begin, Iterator end)
Initialise from a pair of iterators.
Definition: queryparser.h:121
STL namespace.
virtual Xapian::Query operator()(const std::string &begin, const std::string &end)
Check for a valid range of this type.
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1382
ValueRangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:442
void add(const std::string &word)
Add a single stop word.
Definition: queryparser.h:124
const Stopper * release() const
Start reference counting this object.
Definition: queryparser.h:93
static Xapian::Stem stemmer
Definition: stemtest.cc:41
Base class for value range processors.
Definition: queryparser.h:404
#define XAPIAN_DEPRECATED(X)
virtual bool operator()(const std::string &term) const
Is term a stop-word?
Definition: queryparser.h:126
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28
Xapian::Query API class.
Stopper * release()
Start reference counting this object.
Definition: queryparser.h:81
TermIterator unstem_end(const std::string &) const
End iterator over unstemmed forms of the given stemmed query term.
Definition: queryparser.h:1287
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::valueno slot
The value slot to process.
Definition: queryparser.h:153
Handle a date range.
Definition: queryparser.h:253
Base class for field processors.
Definition: queryparser.h:729
size_t sortable_serialise_(double value, char *buf)
Define XAPIAN_VISIBILITY_* macros.
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:943
#define XAPIAN_DEPRECATED_CLASS_EX
Definition: deprecated.h:39
void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc, bool exclusive)
Register a FieldProcessor for a boolean prefix.
Definition: queryparser.h:1265
TermIterator stoplist_end() const
End iterator over terms omitted from the query as stopwords.
Definition: queryparser.h:1279
DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:577
Throw an error if OP_WILDCARD exceeds its expansion limit.
Definition: query.h:291
const ValueRangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:454
void add_boolean_prefix(const std::string &field, const std::string &prefix, bool exclusive)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
Definition: queryparser.h:1245
std::string str
The prefix (or suffix if prefix==false) string to look for.
Definition: queryparser.h:475
Construct an invalid query.
Definition: query.h:263
NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true)
Constructor.
Definition: queryparser.h:703
NumberRangeProcessor(Xapian::valueno slot_, const std::string &str_=std::string(), unsigned flags_=0)
Constructor.
Definition: queryparser.h:383
Base class for objects managed by opt_intrusive_ptr.
unsigned flags
Flags.
Definition: queryparser.h:169
Handle a string range.
Definition: queryparser.h:466
Base class for range processors.
Definition: queryparser.h:140
std::string str
The prefix (or suffix with RP_SUFFIX) string to look for.
Definition: queryparser.h:156
double sortable_unserialise(const std::string &serialised)
Convert a string encoded using sortable_serialise back to a floating point number.
StringValueRangeProcessor(Xapian::valueno slot_)
Constructor.
Definition: queryparser.h:482
NumberValueRangeProcessor(Xapian::valueno slot_)
Constructor.
Definition: queryparser.h:668
virtual ~Stopper()
Class has virtual methods, so provide a virtual destructor.
Definition: queryparser.h:69
const RangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:243
Handle a number range.
Definition: queryparser.h:662
const FieldProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:771
RangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:231
StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true)
Constructor.
Definition: queryparser.h:493
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
FieldProcessor()
Default constructor.
Definition: queryparser.h:739
feature_flag
Enum of feature flags.
Definition: queryparser.h:786
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
Class representing a query.
Definition: query.h:46
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:125
DateRangeProcessor(Xapian::valueno slot_, const std::string &str_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:317
A smart pointer that optionally uses intrusive reference counting.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Class for iterating over a list of terms.
DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:535
std::set< std::string > stop_words
Definition: queryparser.h:101
DateValueRangeProcessor(Xapian::valueno slot_, const char *str_, bool prefix_=true, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:628