xapian-core  1.4.31
queryparser.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005-2023 Olly Betts
5  * Copyright (C) 2010 Adam Sjøgren
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #ifndef XAPIAN_INCLUDED_QUERYPARSER_H
24 #define XAPIAN_INCLUDED_QUERYPARSER_H
25 
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/queryparser.h> directly; include <xapian.h> instead.
28 #endif
29 
30 #include <xapian/attributes.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/query.h>
34 #include <xapian/termiterator.h>
35 #include <xapian/visibility.h>
36 
37 #include <set>
38 #include <string>
39 
40 namespace Xapian {
41 
42 class Database;
43 class Stem;
44 
53  void operator=(const Stopper &);
54 
56  Stopper(const Stopper &);
57 
58  public:
60  Stopper() { }
61 
66  virtual bool operator()(const std::string & term) const = 0;
67 
69  virtual ~Stopper() { }
70 
72  virtual std::string get_description() const;
73 
82  opt_intrusive_base::release();
83  return this;
84  }
85 
93  const Stopper * release() const {
94  opt_intrusive_base::release();
95  return this;
96  }
97 };
98 
101  std::set<std::string> stop_words;
102 
103  public:
106 
120  template<class Iterator>
121  SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) { }
122 
124  void add(const std::string & word) { stop_words.insert(word); }
125 
126  virtual bool operator()(const std::string & term) const {
127  return stop_words.find(term) != stop_words.end();
128  }
129 
130  virtual std::string get_description() const;
131 };
132 
133 enum {
137 };
138 
143  void operator=(const RangeProcessor &);
144 
147 
148  protected:
154 
156  std::string str;
157 
169  unsigned flags;
170 
171  public:
173  RangeProcessor() : slot(Xapian::BAD_VALUENO), flags(0) { }
174 
194  const std::string& str_ = std::string(),
195  unsigned flags_ = 0)
196  : slot(slot_), str(str_), flags(flags_) { }
197 
199  virtual ~RangeProcessor();
200 
206  Xapian::Query check_range(const std::string& b, const std::string& e);
207 
223  virtual Xapian::Query
224  operator()(const std::string &begin, const std::string &end);
225 
234  opt_intrusive_base::release();
235  return this;
236  }
237 
245  const RangeProcessor * release() const {
246  opt_intrusive_base::release();
247  return this;
248  }
249 };
250 
257 
258  public:
273  unsigned flags_ = 0,
274  int epoch_year_ = 1970)
275  : RangeProcessor(slot_, std::string(), flags_),
276  epoch_year(epoch_year_) { }
277 
313  DateRangeProcessor(Xapian::valueno slot_, const std::string &str_,
314  unsigned flags_ = 0, int epoch_year_ = 1970)
315  : RangeProcessor(slot_, str_, flags_),
316  epoch_year(epoch_year_) { }
317 
352  Xapian::Query operator()(const std::string& begin, const std::string& end);
353 };
354 
363  public:
403  const std::string &str_ = std::string(),
404  unsigned flags_ = 0)
405  : RangeProcessor(slot_, str_, flags_) { }
406 
419  Xapian::Query operator()(const std::string& begin, const std::string& end);
420 };
421 
427 
430 
431  public:
434 
436  virtual ~ValueRangeProcessor();
437 
452  virtual Xapian::valueno operator()(std::string &begin, std::string &end) = 0;
453 
462  opt_intrusive_base::release();
463  return this;
464  }
465 
473  const ValueRangeProcessor * release() const {
474  opt_intrusive_base::release();
475  return this;
476  }
477 };
478 
486  protected:
489 
491  bool prefix;
492 
494  std::string str;
495 
496  public:
502  : valno(slot_), str() { }
503 
512  StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
513  bool prefix_ = true)
514  : valno(slot_), prefix(prefix_), str(str_) { }
515 
530  Xapian::valueno operator()(std::string &begin, std::string &end);
531 };
532 
542 
543  public:
554  DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_ = false,
555  int epoch_year_ = 1970)
556  : StringValueRangeProcessor(slot_),
557  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
558 
596  DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
597  bool prefix_ = true,
598  bool prefer_mdy_ = false, int epoch_year_ = 1970)
599  : StringValueRangeProcessor(slot_, str_, prefix_),
600  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
601 
602 #ifndef SWIG
647  DateValueRangeProcessor(Xapian::valueno slot_, const char * str_,
648  bool prefix_ = true,
649  bool prefer_mdy_ = false, int epoch_year_ = 1970)
650  : StringValueRangeProcessor(slot_, str_, prefix_),
651  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
652 #endif
653 
669  Xapian::valueno operator()(std::string &begin, std::string &end);
670 };
671 
682  public:
688  : StringValueRangeProcessor(slot_) { }
689 
722  NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
723  bool prefix_ = true)
724  : StringValueRangeProcessor(slot_, str_, prefix_) { }
725 
743  Xapian::valueno operator()(std::string &begin, std::string &end);
744 };
745 
751  void operator=(const FieldProcessor &);
752 
755 
756  public:
759 
761  virtual ~FieldProcessor();
762 
769  virtual Xapian::Query operator()(const std::string &str) = 0;
770 
779  opt_intrusive_base::release();
780  return this;
781  }
782 
790  const FieldProcessor * release() const {
791  opt_intrusive_base::release();
792  return this;
793  }
794 };
795 
798  public:
800  class Internal;
803 
805  typedef enum {
807  FLAG_BOOLEAN = 1,
809  FLAG_PHRASE = 2,
811  FLAG_LOVEHATE = 4,
813  FLAG_BOOLEAN_ANY_CASE = 8,
828  FLAG_WILDCARD = 16,
835  FLAG_PURE_NOT = 32,
856  FLAG_PARTIAL = 64,
857 
871  FLAG_SPELLING_CORRECTION = 128,
872 
877  FLAG_SYNONYM = 256,
878 
883  FLAG_AUTO_SYNONYMS = 512,
884 
890  FLAG_AUTO_MULTIWORD_SYNONYMS = 1024,
891 
914  FLAG_NGRAMS = 2048,
915 
923  FLAG_CJK_NGRAM = FLAG_NGRAMS,
924 
938  FLAG_ACCUMULATE = 65536,
939 
949  FLAG_NO_POSITIONS = 0x20000,
950 
958  FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE
959  } feature_flag;
960 
962  typedef enum {
963  STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS
964  } stem_strategy;
965 
967  QueryParser(const QueryParser & o);
968 
970  QueryParser & operator=(const QueryParser & o);
971 
972 #ifdef XAPIAN_MOVE_SEMANTICS
974  QueryParser(QueryParser && o);
975 
977  QueryParser & operator=(QueryParser && o);
978 #endif
979 
981  QueryParser();
982 
984  ~QueryParser();
985 
998  void set_stemmer(const Xapian::Stem & stemmer);
999 
1023  void set_stemming_strategy(stem_strategy strategy);
1024 
1030  void set_stopper(const Stopper *stop = NULL);
1031 
1046  void set_default_op(Query::op default_op);
1047 
1049  Query::op get_default_op() const;
1050 
1057  void set_database(const Database &db);
1058 
1085  void set_max_expansion(Xapian::termcount max_expansion,
1086  int max_type = Xapian::Query::WILDCARD_LIMIT_ERROR,
1087  unsigned flags = FLAG_WILDCARD|FLAG_PARTIAL);
1088 
1101  XAPIAN_DEPRECATED(void set_max_wildcard_expansion(Xapian::termcount));
1102 
1126  Query parse_query(const std::string &query_string,
1127  unsigned flags = FLAG_DEFAULT,
1128  const std::string &default_prefix = std::string());
1129 
1174  void add_prefix(const std::string& field, const std::string& prefix);
1175 
1178  void add_prefix(const std::string& field, Xapian::FieldProcessor * proc);
1179 
1239  void add_boolean_prefix(const std::string &field, const std::string &prefix,
1240  const std::string* grouping = NULL);
1241 
1264  void add_boolean_prefix(const std::string &field, const std::string &prefix,
1265  bool exclusive) {
1266  if (exclusive) {
1267  add_boolean_prefix(field, prefix);
1268  } else {
1269  std::string empty_grouping;
1270  add_boolean_prefix(field, prefix, &empty_grouping);
1271  }
1272  }
1273 
1276  void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1277  const std::string* grouping = NULL);
1278 
1284  void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1285  bool exclusive) {
1286  if (exclusive) {
1287  add_boolean_prefix(field, proc);
1288  } else {
1289  std::string empty_grouping;
1290  add_boolean_prefix(field, proc, &empty_grouping);
1291  }
1292  }
1293 
1295  TermIterator stoplist_begin() const;
1296 
1298  TermIterator XAPIAN_NOTHROW(stoplist_end() const) {
1299  return TermIterator();
1300  }
1301 
1303  TermIterator unstem_begin(const std::string &term) const;
1304 
1306  TermIterator XAPIAN_NOTHROW(unstem_end(const std::string &) const) {
1307  return TermIterator();
1308  }
1309 
1311  void add_rangeprocessor(Xapian::RangeProcessor * range_proc,
1312  const std::string* grouping = NULL);
1313 
1320 #ifdef __GNUC__
1321 // Avoid deprecation warnings if compiling without optimisation.
1322 # pragma GCC diagnostic push
1323 # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1324 #endif
1326  class ShimRangeProcessor : public RangeProcessor {
1328 
1329  public:
1330  ShimRangeProcessor(Xapian::ValueRangeProcessor * vrp_)
1331  : RangeProcessor(Xapian::BAD_VALUENO), vrp(vrp_) { }
1332 
1334  operator()(const std::string &begin, const std::string &end)
1335  {
1336  std::string b = begin, e = end;
1337  slot = (*vrp)(b, e);
1338  if (slot == Xapian::BAD_VALUENO)
1340  return RangeProcessor::operator()(b, e);
1341  }
1342  };
1343 
1344  add_rangeprocessor((new ShimRangeProcessor(vrproc))->release());
1345 #ifdef __GNUC__
1346 # pragma GCC diagnostic pop
1347 #endif
1348  }
1349 
1357  std::string get_corrected_query_string() const;
1358 
1360  std::string get_description() const;
1361 };
1362 
1363 inline void
1365 {
1366  set_max_expansion(max_expansion,
1368  FLAG_WILDCARD);
1369 }
1370 
1373 size_t XAPIAN_NOTHROW(sortable_serialise_(double value, char * buf));
1374 
1401 inline std::string sortable_serialise(double value) {
1402  char buf[9];
1403  return std::string(buf, sortable_serialise_(value, buf));
1404 }
1405 
1421 double XAPIAN_NOTHROW(sortable_unserialise(const std::string & serialised));
1422 
1423 }
1424 
1425 #endif // XAPIAN_INCLUDED_QUERYPARSER_H
Compiler attribute macros.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Handle a date range.
Definition: queryparser.h:255
DateRangeProcessor(Xapian::valueno slot_, const std::string &str_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:313
DateRangeProcessor(Xapian::valueno slot_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:272
Handle a date range.
Definition: queryparser.h:539
DateValueRangeProcessor(Xapian::valueno slot_, const char *str_, bool prefix_=true, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:647
DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:596
DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:554
Base class for field processors.
Definition: queryparser.h:749
FieldProcessor * release()
Start reference counting this object.
Definition: queryparser.h:778
const FieldProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:790
FieldProcessor(const FieldProcessor &)
Don't allow copying.
void operator=(const FieldProcessor &)
Don't allow assignment.
FieldProcessor()
Default constructor.
Definition: queryparser.h:758
virtual Xapian::Query operator()(const std::string &str)=0
Convert a field-prefixed string to a Query object.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:82
Base class for objects managed by opt_intrusive_ptr.
A smart pointer that optionally uses intrusive reference counting.
Handle a number range.
Definition: queryparser.h:362
NumberRangeProcessor(Xapian::valueno slot_, const std::string &str_=std::string(), unsigned flags_=0)
Constructor.
Definition: queryparser.h:402
Handle a number range.
Definition: queryparser.h:681
NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true)
Constructor.
Definition: queryparser.h:722
NumberValueRangeProcessor(Xapian::valueno slot_)
Constructor.
Definition: queryparser.h:687
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:797
void set_max_wildcard_expansion(Xapian::termcount)
Specify the maximum expansion of a wildcard.
Definition: queryparser.h:1364
void set_max_expansion(Xapian::termcount max_expansion, int max_type=Xapian::Query::WILDCARD_LIMIT_ERROR, unsigned flags=FLAG_WILDCARD|FLAG_PARTIAL)
Specify the maximum expansion of a wildcard and/or partial term.
Definition: queryparser.cc:147
TermIterator stoplist_end() const
End iterator over terms omitted from the query as stopwords.
Definition: queryparser.h:1298
void add_boolean_prefix(const std::string &field, const std::string &prefix, bool exclusive)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
Definition: queryparser.h:1264
void add_valuerangeprocessor(Xapian::ValueRangeProcessor *vrproc)
Register a ValueRangeProcessor.
Definition: queryparser.h:1319
@ FLAG_WILDCARD
Support wildcards.
Definition: queryparser.h:828
void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc, bool exclusive)
Register a FieldProcessor for a boolean prefix.
Definition: queryparser.h:1284
TermIterator unstem_end(const std::string &) const
End iterator over unstemmed forms of the given stemmed query term.
Definition: queryparser.h:1306
Class representing a query.
Definition: query.h:56
@ WILDCARD_LIMIT_ERROR
Throw an error if OP_WILDCARD exceeds its expansion limit.
Definition: query.h:301
op
Query operators.
Definition: query.h:88
@ OP_INVALID
Construct an invalid query.
Definition: query.h:273
Base class for range processors.
Definition: queryparser.h:141
unsigned flags
Flags.
Definition: queryparser.h:169
RangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:233
virtual Xapian::Query operator()(const std::string &begin, const std::string &end)
Check for a valid range of this type.
RangeProcessor(const RangeProcessor &)
Don't allow copying.
void operator=(const RangeProcessor &)
Don't allow assignment.
const RangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:245
std::string str
The prefix (or suffix with RP_SUFFIX) string to look for.
Definition: queryparser.h:156
RangeProcessor(Xapian::valueno slot_, const std::string &str_=std::string(), unsigned flags_=0)
Constructor.
Definition: queryparser.h:193
Xapian::valueno slot
The value slot to process.
Definition: queryparser.h:153
RangeProcessor()
Default constructor.
Definition: queryparser.h:173
Simple implementation of Stopper class - this will suit most users.
Definition: queryparser.h:100
std::set< std::string > stop_words
Definition: queryparser.h:101
void add(const std::string &word)
Add a single stop word.
Definition: queryparser.h:124
SimpleStopper()
Default constructor.
Definition: queryparser.h:105
SimpleStopper(Iterator begin, Iterator end)
Initialise from a pair of iterators.
Definition: queryparser.h:121
virtual bool operator()(const std::string &term) const
Is term a stop-word?
Definition: queryparser.h:126
Class representing a stemming algorithm.
Definition: stem.h:62
Abstract base class for stop-word decision functor.
Definition: queryparser.h:51
Stopper(const Stopper &)
Don't allow copying.
virtual bool operator()(const std::string &term) const =0
Is term a stop-word?
const Stopper * release() const
Start reference counting this object.
Definition: queryparser.h:93
Stopper()
Default constructor.
Definition: queryparser.h:60
Stopper * release()
Start reference counting this object.
Definition: queryparser.h:81
virtual ~Stopper()
Class has virtual methods, so provide a virtual destructor.
Definition: queryparser.h:69
void operator=(const Stopper &)
Don't allow assignment.
Handle a string range.
Definition: queryparser.h:485
StringValueRangeProcessor(Xapian::valueno slot_)
Constructor.
Definition: queryparser.h:501
Xapian::valueno valno
The value slot to process.
Definition: queryparser.h:488
bool prefix
Whether to look for str as a prefix or suffix.
Definition: queryparser.h:491
std::string str
The prefix (or suffix if prefix==false) string to look for.
Definition: queryparser.h:494
StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true)
Constructor.
Definition: queryparser.h:512
Class for iterating over a list of terms.
Definition: termiterator.h:41
Base class for value range processors.
Definition: queryparser.h:424
ValueRangeProcessor(const ValueRangeProcessor &)
Don't allow copying.
const ValueRangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:473
ValueRangeProcessor()
Default constructor.
Definition: queryparser.h:433
void operator=(const ValueRangeProcessor &)
Don't allow assignment.
virtual Xapian::valueno operator()(std::string &begin, std::string &end)=0
Check for a valid range of this type.
ValueRangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:461
Define XAPIAN_DEPRECATED() and related macros.
#define XAPIAN_DEPRECATED_CLASS_EX
Definition: deprecated.h:39
string str(int value)
Convert int to std::string.
Definition: str.cc:90
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:125
double sortable_unserialise(const std::string &serialised)
Convert a string encoded using sortable_serialise back to a floating point number.
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1401
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
@ RP_DATE_PREFER_MDY
Definition: queryparser.h:136
@ RP_REPEATED
Definition: queryparser.h:135
@ RP_SUFFIX
Definition: queryparser.h:134
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
size_t sortable_serialise_(double value, char *buf)
#define XAPIAN_DEPRECATED(X)
Xapian::Query API class.
static Xapian::Stem stemmer
Definition: stemtest.cc:42
Class for iterating over a list of terms.
Define XAPIAN_VISIBILITY_* macros.
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28