xapian-core  1.4.21
queryparser.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2021 Olly Betts
5  * Copyright (C) 2010 Adam Sj√łgren
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #ifndef XAPIAN_INCLUDED_QUERYPARSER_H
24 #define XAPIAN_INCLUDED_QUERYPARSER_H
25 
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error Never use <xapian/queryparser.h> directly; include <xapian.h> instead.
28 #endif
29 
30 #include <xapian/attributes.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/query.h>
34 #include <xapian/termiterator.h>
35 #include <xapian/visibility.h>
36 
37 #include <set>
38 #include <string>
39 
40 namespace Xapian {
41 
42 class Database;
43 class Stem;
44 
53  void operator=(const Stopper &);
54 
56  Stopper(const Stopper &);
57 
58  public:
60  Stopper() { }
61 
66  virtual bool operator()(const std::string & term) const = 0;
67 
69  virtual ~Stopper() { }
70 
72  virtual std::string get_description() const;
73 
82  opt_intrusive_base::release();
83  return this;
84  }
85 
93  const Stopper * release() const {
94  opt_intrusive_base::release();
95  return this;
96  }
97 };
98 
101  std::set<std::string> stop_words;
102 
103  public:
106 
120  template<class Iterator>
121  SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) { }
122 
124  void add(const std::string & word) { stop_words.insert(word); }
125 
126  virtual bool operator()(const std::string & term) const {
127  return stop_words.find(term) != stop_words.end();
128  }
129 
130  virtual std::string get_description() const;
131 };
132 
133 enum {
137 };
138 
143  void operator=(const RangeProcessor &);
144 
147 
148  protected:
154 
156  std::string str;
157 
169  unsigned flags;
170 
171  public:
173  RangeProcessor() : slot(Xapian::BAD_VALUENO), flags(0) { }
174 
192  const std::string& str_ = std::string(),
193  unsigned flags_ = 0)
194  : slot(slot_), str(str_), flags(flags_) { }
195 
197  virtual ~RangeProcessor();
198 
204  Xapian::Query check_range(const std::string& b, const std::string& e);
205 
221  virtual Xapian::Query
222  operator()(const std::string &begin, const std::string &end);
223 
232  opt_intrusive_base::release();
233  return this;
234  }
235 
243  const RangeProcessor * release() const {
244  opt_intrusive_base::release();
245  return this;
246  }
247 };
248 
255 
256  public:
271  unsigned flags_ = 0,
272  int epoch_year_ = 1970)
273  : RangeProcessor(slot_, std::string(), flags_),
274  epoch_year(epoch_year_) { }
275 
317  DateRangeProcessor(Xapian::valueno slot_, const std::string &str_,
318  unsigned flags_ = 0, int epoch_year_ = 1970)
319  : RangeProcessor(slot_, str_, flags_),
320  epoch_year(epoch_year_) { }
321 
333  Xapian::Query operator()(const std::string& begin, const std::string& end);
334 };
335 
344  public:
384  const std::string &str_ = std::string(),
385  unsigned flags_ = 0)
386  : RangeProcessor(slot_, str_, flags_) { }
387 
400  Xapian::Query operator()(const std::string& begin, const std::string& end);
401 };
402 
407  void operator=(const ValueRangeProcessor &);
408 
411 
412  public:
415 
417  virtual ~ValueRangeProcessor();
418 
433  virtual Xapian::valueno operator()(std::string &begin, std::string &end) = 0;
434 
443  opt_intrusive_base::release();
444  return this;
445  }
446 
454  const ValueRangeProcessor * release() const {
455  opt_intrusive_base::release();
456  return this;
457  }
458 };
459 
467  protected:
470 
472  bool prefix;
473 
475  std::string str;
476 
477  public:
483  : valno(slot_), str() { }
484 
493  StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
494  bool prefix_ = true)
495  : valno(slot_), prefix(prefix_), str(str_) { }
496 
511  Xapian::valueno operator()(std::string &begin, std::string &end);
512 };
513 
523 
524  public:
535  DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_ = false,
536  int epoch_year_ = 1970)
537  : StringValueRangeProcessor(slot_),
538  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
539 
577  DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
578  bool prefix_ = true,
579  bool prefer_mdy_ = false, int epoch_year_ = 1970)
580  : StringValueRangeProcessor(slot_, str_, prefix_),
581  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
582 
583 #ifndef SWIG
584 
628  DateValueRangeProcessor(Xapian::valueno slot_, const char * str_,
629  bool prefix_ = true,
630  bool prefer_mdy_ = false, int epoch_year_ = 1970)
631  : StringValueRangeProcessor(slot_, str_, prefix_),
632  prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
633 #endif
634 
650  Xapian::valueno operator()(std::string &begin, std::string &end);
651 };
652 
663  public:
669  : StringValueRangeProcessor(slot_) { }
670 
703  NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
704  bool prefix_ = true)
705  : StringValueRangeProcessor(slot_, str_, prefix_) { }
706 
724  Xapian::valueno operator()(std::string &begin, std::string &end);
725 };
726 
732  void operator=(const FieldProcessor &);
733 
736 
737  public:
740 
742  virtual ~FieldProcessor();
743 
750  virtual Xapian::Query operator()(const std::string &str) = 0;
751 
760  opt_intrusive_base::release();
761  return this;
762  }
763 
771  const FieldProcessor * release() const {
772  opt_intrusive_base::release();
773  return this;
774  }
775 };
776 
779  public:
781  class Internal;
784 
786  typedef enum {
788  FLAG_BOOLEAN = 1,
790  FLAG_PHRASE = 2,
792  FLAG_LOVEHATE = 4,
794  FLAG_BOOLEAN_ANY_CASE = 8,
809  FLAG_WILDCARD = 16,
816  FLAG_PURE_NOT = 32,
837  FLAG_PARTIAL = 64,
838 
852  FLAG_SPELLING_CORRECTION = 128,
853 
858  FLAG_SYNONYM = 256,
859 
864  FLAG_AUTO_SYNONYMS = 512,
865 
871  FLAG_AUTO_MULTIWORD_SYNONYMS = 1024,
872 
886  FLAG_CJK_NGRAM = 2048,
887 
901  FLAG_ACCUMULATE = 65536,
902 
912  FLAG_NO_POSITIONS = 0x20000,
913 
921  FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE
922  } feature_flag;
923 
925  typedef enum {
926  STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z, STEM_SOME_FULL_POS
927  } stem_strategy;
928 
930  QueryParser(const QueryParser & o);
931 
933  QueryParser & operator=(const QueryParser & o);
934 
935 #ifdef XAPIAN_MOVE_SEMANTICS
936  QueryParser(QueryParser && o);
938 
940  QueryParser & operator=(QueryParser && o);
941 #endif
942 
944  QueryParser();
945 
947  ~QueryParser();
948 
961  void set_stemmer(const Xapian::Stem & stemmer);
962 
986  void set_stemming_strategy(stem_strategy strategy);
987 
993  void set_stopper(const Stopper *stop = NULL);
994 
1009  void set_default_op(Query::op default_op);
1010 
1012  Query::op get_default_op() const;
1013 
1020  void set_database(const Database &db);
1021 
1048  void set_max_expansion(Xapian::termcount max_expansion,
1049  int max_type = Xapian::Query::WILDCARD_LIMIT_ERROR,
1050  unsigned flags = FLAG_WILDCARD|FLAG_PARTIAL);
1051 
1064  XAPIAN_DEPRECATED(void set_max_wildcard_expansion(Xapian::termcount));
1065 
1089  Query parse_query(const std::string &query_string,
1090  unsigned flags = FLAG_DEFAULT,
1091  const std::string &default_prefix = std::string());
1092 
1132  void add_prefix(const std::string& field, const std::string& prefix);
1133 
1136  void add_prefix(const std::string& field, Xapian::FieldProcessor * proc);
1137 
1191  void add_boolean_prefix(const std::string &field, const std::string &prefix,
1192  const std::string* grouping = NULL);
1193 
1210  void add_boolean_prefix(const std::string &field, const std::string &prefix,
1211  bool exclusive) {
1212  if (exclusive) {
1213  add_boolean_prefix(field, prefix);
1214  } else {
1215  std::string empty_grouping;
1216  add_boolean_prefix(field, prefix, &empty_grouping);
1217  }
1218  }
1219 
1222  void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1223  const std::string* grouping = NULL);
1224 
1230  void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1231  bool exclusive) {
1232  if (exclusive) {
1233  add_boolean_prefix(field, proc);
1234  } else {
1235  std::string empty_grouping;
1236  add_boolean_prefix(field, proc, &empty_grouping);
1237  }
1238  }
1239 
1241  TermIterator stoplist_begin() const;
1242 
1244  TermIterator XAPIAN_NOTHROW(stoplist_end() const) {
1245  return TermIterator();
1246  }
1247 
1249  TermIterator unstem_begin(const std::string &term) const;
1250 
1252  TermIterator XAPIAN_NOTHROW(unstem_end(const std::string &) const) {
1253  return TermIterator();
1254  }
1255 
1257  void add_rangeprocessor(Xapian::RangeProcessor * range_proc,
1258  const std::string* grouping = NULL);
1259 
1266 #ifdef __GNUC__
1267 // Avoid deprecation warnings if compiling without optimisation.
1268 # pragma GCC diagnostic push
1269 # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1270 #endif
1271  class ShimRangeProcessor : public RangeProcessor {
1274 
1275  public:
1276  ShimRangeProcessor(Xapian::ValueRangeProcessor * vrp_)
1277  : RangeProcessor(Xapian::BAD_VALUENO), vrp(vrp_) { }
1278 
1280  operator()(const std::string &begin, const std::string &end)
1281  {
1282  std::string b = begin, e = end;
1283  slot = (*vrp)(b, e);
1284  if (slot == Xapian::BAD_VALUENO)
1286  return RangeProcessor::operator()(b, e);
1287  }
1288  };
1289 
1290  add_rangeprocessor((new ShimRangeProcessor(vrproc))->release());
1291 #ifdef __GNUC__
1292 # pragma GCC diagnostic pop
1293 #endif
1294  }
1295 
1303  std::string get_corrected_query_string() const;
1304 
1306  std::string get_description() const;
1307 };
1308 
1309 inline void
1311 {
1312  set_max_expansion(max_expansion,
1314  FLAG_WILDCARD);
1315 }
1316 
1319 size_t XAPIAN_NOTHROW(sortable_serialise_(double value, char * buf));
1320 
1347 inline std::string sortable_serialise(double value) {
1348  char buf[9];
1349  return std::string(buf, sortable_serialise_(value, buf));
1350 }
1351 
1367 double XAPIAN_NOTHROW(sortable_unserialise(const std::string & serialised));
1368 
1369 }
1370 
1371 #endif // XAPIAN_INCLUDED_QUERYPARSER_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Handle a date range.
Definition: queryparser.h:520
Stopper()
Default constructor.
Definition: queryparser.h:60
void set_max_wildcard_expansion(Xapian::termcount)
Specify the maximum expansion of a wildcard.
Definition: queryparser.h:1310
RangeProcessor()
Default constructor.
Definition: queryparser.h:173
DateRangeProcessor(Xapian::valueno slot_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:270
Simple implementation of Stopper class - this will suit most users.
Definition: queryparser.h:100
This class is used to access a database, or a group of databases.
Definition: database.h:68
bool prefix
Whether to look for str as a prefix or suffix.
Definition: queryparser.h:472
Class representing a stemming algorithm.
Definition: stem.h:62
void add_valuerangeprocessor(Xapian::ValueRangeProcessor *vrproc)
Register a ValueRangeProcessor.
Definition: queryparser.h:1265
Xapian::valueno valno
The value slot to process.
Definition: queryparser.h:469
op
Query operators.
Definition: query.h:78
FieldProcessor * release()
Start reference counting this object.
Definition: queryparser.h:759
Compiler attribute macros.
Handle a number range.
Definition: queryparser.h:343
RangeProcessor(Xapian::valueno slot_, const std::string &str_=std::string(), unsigned flags_=0)
Constructor.
Definition: queryparser.h:191
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
Define XAPIAN_DEPRECATED() and related macros.
ValueRangeProcessor()
Default constructor.
Definition: queryparser.h:414
SimpleStopper()
Default constructor.
Definition: queryparser.h:105
SimpleStopper(Iterator begin, Iterator end)
Initialise from a pair of iterators.
Definition: queryparser.h:121
STL namespace.
virtual Xapian::Query operator()(const std::string &begin, const std::string &end)
Check for a valid range of this type.
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1347
ValueRangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:442
void add(const std::string &word)
Add a single stop word.
Definition: queryparser.h:124
const Stopper * release() const
Start reference counting this object.
Definition: queryparser.h:93
static Xapian::Stem stemmer
Definition: stemtest.cc:41
Base class for value range processors.
Definition: queryparser.h:404
#define XAPIAN_DEPRECATED(X)
virtual bool operator()(const std::string &term) const
Is term a stop-word?
Definition: queryparser.h:126
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28
Xapian::Query API class.
Stopper * release()
Start reference counting this object.
Definition: queryparser.h:81
TermIterator unstem_end(const std::string &) const
End iterator over unstemmed forms of the given stemmed query term.
Definition: queryparser.h:1252
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::valueno slot
The value slot to process.
Definition: queryparser.h:153
Handle a date range.
Definition: queryparser.h:253
Base class for field processors.
Definition: queryparser.h:729
size_t sortable_serialise_(double value, char *buf)
Define XAPIAN_VISIBILITY_* macros.
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
Definition: queryparser.h:925
#define XAPIAN_DEPRECATED_CLASS_EX
Definition: deprecated.h:39
void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc, bool exclusive)
Register a FieldProcessor for a boolean prefix.
Definition: queryparser.h:1230
TermIterator stoplist_end() const
End iterator over terms omitted from the query as stopwords.
Definition: queryparser.h:1244
DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:577
Throw an error if OP_WILDCARD exceeds its expansion limit.
Definition: query.h:291
const ValueRangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:454
void add_boolean_prefix(const std::string &field, const std::string &prefix, bool exclusive)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
Definition: queryparser.h:1210
std::string str
The prefix (or suffix if prefix==false) string to look for.
Definition: queryparser.h:475
Construct an invalid query.
Definition: query.h:263
NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true)
Constructor.
Definition: queryparser.h:703
NumberRangeProcessor(Xapian::valueno slot_, const std::string &str_=std::string(), unsigned flags_=0)
Constructor.
Definition: queryparser.h:383
Base class for objects managed by opt_intrusive_ptr.
unsigned flags
Flags.
Definition: queryparser.h:169
Handle a string range.
Definition: queryparser.h:466
Base class for range processors.
Definition: queryparser.h:140
std::string str
The prefix (or suffix with RP_SUFFIX) string to look for.
Definition: queryparser.h:156
double sortable_unserialise(const std::string &serialised)
Convert a string encoded using sortable_serialise back to a floating point number.
StringValueRangeProcessor(Xapian::valueno slot_)
Constructor.
Definition: queryparser.h:482
NumberValueRangeProcessor(Xapian::valueno slot_)
Constructor.
Definition: queryparser.h:668
virtual ~Stopper()
Class has virtual methods, so provide a virtual destructor.
Definition: queryparser.h:69
const RangeProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:243
Handle a number range.
Definition: queryparser.h:662
const FieldProcessor * release() const
Start reference counting this object.
Definition: queryparser.h:771
RangeProcessor * release()
Start reference counting this object.
Definition: queryparser.h:231
StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_, bool prefix_=true)
Constructor.
Definition: queryparser.h:493
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
FieldProcessor()
Default constructor.
Definition: queryparser.h:739
feature_flag
Enum of feature flags.
Definition: queryparser.h:786
Abstract base class for stop-word decision functor.
Definition: queryparser.h:50
Class representing a query.
Definition: query.h:46
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:125
DateRangeProcessor(Xapian::valueno slot_, const std::string &str_, unsigned flags_=0, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:317
A smart pointer that optionally uses intrusive reference counting.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Class for iterating over a list of terms.
DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:535
std::set< std::string > stop_words
Definition: queryparser.h:101
DateValueRangeProcessor(Xapian::valueno slot_, const char *str_, bool prefix_=true, bool prefer_mdy_=false, int epoch_year_=1970)
Constructor.
Definition: queryparser.h:628