xapian-core  2.0.0
queryinternal.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2011-2026 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_QUERYINTERNAL_H
22 #define XAPIAN_INCLUDED_QUERYINTERNAL_H
23 
24 #include "api/editdistance.h"
25 #include "queryvector.h"
26 #include "stringutils.h"
27 #include "xapian/intrusive_ptr.h"
28 #include "xapian/query.h"
29 
32 
33 namespace Xapian {
34 namespace Internal {
35 
36 class PostList;
37 struct PostListAndEstimate;
38 class QueryOptimiser;
39 
40 class QueryTerm : public Query::Internal {
41  std::string term;
42 
44 
46 
47  public:
48  // Construct a "MatchAll" QueryTerm.
49  QueryTerm() : term(), wqf(1), pos(0) { }
50 
51  QueryTerm(std::string_view term_,
52  Xapian::termcount wqf_,
53  Xapian::termpos pos_)
54  : term(term_), wqf(wqf_), pos(pos_) { }
55 
57 
58  const std::string & get_term() const { return term; }
59 
60  termcount get_wqf() const { return wqf; }
61 
62  termpos get_pos() const { return pos; }
63 
65  double factor,
66  TermFreqs* termfreqs) const;
67 
69  QueryOptimiser* qopt,
70  double factor,
71  TermFreqs* termfreqs) const;
72 
74  return wqf;
75  }
76 
77  void serialise(std::string & result) const;
78 
79  std::string get_description() const;
80 
81  void gather_terms(void * void_terms) const;
82 };
83 
86 
87  public:
88  explicit QueryPostingSource(PostingSource * source_);
89 
91  double factor,
92  TermFreqs* termfreqs) const;
93 
94  void serialise(std::string & result) const;
95 
97 
98  std::string get_description() const;
99 };
100 
101 class QueryScaleWeight : public Query::Internal {
102  double scale_factor;
103 
105 
106  public:
107  QueryScaleWeight(double factor, const Query & subquery_);
108 
110  double factor,
111  TermFreqs* termfreqs) const;
112 
114  QueryOptimiser* qopt,
115  double factor,
116  TermFreqs* termfreqs) const;
117 
119  return subquery.internal->get_length();
120  }
121 
122  void serialise(std::string & result) const;
123 
125  size_t get_num_subqueries() const noexcept XAPIAN_PURE_FUNCTION;
126  const Query get_subquery(size_t n) const;
127 
128  std::string get_description() const;
129 
130  void gather_terms(void * void_terms) const;
131 };
132 
133 class QueryValueBase : public Query::Internal {
134  protected:
136 
137  public:
139  : slot(slot_) { }
140 
141  Xapian::valueno get_slot() const { return slot; }
142 };
143 
145  std::string begin, end;
146 
147  public:
149  std::string_view begin_,
150  std::string_view end_)
151  : QueryValueBase(slot_), begin(begin_), end(end_) { }
152 
154  double factor,
155  TermFreqs* termfreqs) const;
156 
157  void serialise(std::string & result) const;
158 
160 
161  std::string get_description() const;
162 };
163 
164 class QueryValueLE : public QueryValueBase {
165  std::string limit;
166 
167  public:
168  QueryValueLE(Xapian::valueno slot_, std::string_view limit_)
169  : QueryValueBase(slot_), limit(limit_) { }
170 
172  double factor,
173  TermFreqs* termfreqs) const;
174 
175  void serialise(std::string & result) const;
176 
178 
179  std::string get_description() const;
180 };
181 
182 class QueryValueGE : public QueryValueBase {
183  std::string limit;
184 
185  public:
186  QueryValueGE(Xapian::valueno slot_, std::string_view limit_)
187  : QueryValueBase(slot_), limit(limit_) { }
188 
190  double factor,
191  TermFreqs* termfreqs) const;
192 
193  void serialise(std::string & result) const;
194 
196 
197  std::string get_description() const;
198 };
199 
200 class QueryBranch : public Query::Internal {
201  virtual Xapian::Query::op get_op() const = 0;
202 
203  protected:
205 
206  explicit QueryBranch(size_t n_subqueries) : subqueries(n_subqueries) { }
207 
208  void serialise_(std::string & result, Xapian::termcount parameter = 0) const;
209 
210  void do_bool_or_like(OrContext& ctx,
211  QueryOptimiser* qopt,
212  TermFreqs* termfreqs,
213  size_t first = 0) const;
214 
222  void do_or_like(OrContext& ctx, QueryOptimiser* qopt, double factor,
223  TermFreqs* termfreqs,
224  Xapian::termcount elite_set_size = 0, size_t first = 0,
225  bool keep_zero_weight = true) const;
226 
227  PostListAndEstimate do_synonym(QueryOptimiser* qopt,
228  double factor,
229  TermFreqs* termfreqs) const;
230 
231  PostListAndEstimate do_max(QueryOptimiser* qopt,
232  double factor,
233  TermFreqs* termfreqs) const;
234 
235  const std::string get_description_helper(const char * op,
236  Xapian::termcount window = 0) const;
237 
238  public:
239  termcount get_length() const noexcept XAPIAN_PURE_FUNCTION;
240 
241  void serialise(std::string & result) const;
242 
243  void gather_terms(void * void_terms) const;
244 
245  virtual void add_subquery(const Xapian::Query & subquery) = 0;
246 
247  Xapian::Query::op get_type() const noexcept XAPIAN_PURE_FUNCTION;
248  size_t get_num_subqueries() const noexcept XAPIAN_PURE_FUNCTION;
249  const Query get_subquery(size_t n) const;
250 
251  virtual Query::Internal * done() = 0;
252 };
253 
254 class QueryAndLike : public QueryBranch {
255  protected:
256  explicit QueryAndLike(size_t num_subqueries_)
257  : QueryBranch(num_subqueries_) { }
258 
259  public:
260  void add_subquery(const Xapian::Query & subquery);
261 
262  Query::Internal * done();
263 
265  double factor,
266  TermFreqs* termfreqs) const;
267 
269  QueryOptimiser* qopt,
270  double factor,
271  TermFreqs* termfreqs) const;
272 };
273 
274 class QueryOrLike : public QueryBranch {
275  protected:
276  explicit QueryOrLike(size_t num_subqueries_)
277  : QueryBranch(num_subqueries_) { }
278 
279  public:
280  void add_subquery(const Xapian::Query & subquery);
281 
282  Query::Internal * done();
283 };
284 
285 class QueryAnd : public QueryAndLike {
286  Xapian::Query::op get_op() const;
287 
288  public:
289  explicit QueryAnd(size_t n_subqueries) : QueryAndLike(n_subqueries) { }
290 
291  std::string get_description() const;
292 };
293 
294 class QueryOr : public QueryOrLike {
295  Xapian::Query::op get_op() const;
296 
297  public:
298  explicit QueryOr(size_t n_subqueries) : QueryOrLike(n_subqueries) { }
299 
301  double factor,
302  TermFreqs* termfreqs) const;
303 
305  double factor,
306  TermFreqs* termfreqs,
307  bool keep_zero_weight) const;
308 
310  QueryOptimiser* qopt,
311  TermFreqs* termfreqs) const;
312 
313  std::string get_description() const;
314 };
315 
316 class QueryAndNot : public QueryBranch {
317  Xapian::Query::op get_op() const;
318 
319  public:
320  explicit QueryAndNot(size_t n_subqueries) : QueryBranch(n_subqueries) { }
321 
323  double factor,
324  TermFreqs* termfreqs) const;
325 
327  QueryOptimiser* qopt,
328  double factor,
329  TermFreqs* termfreqs) const;
330 
331  void add_subquery(const Xapian::Query & subquery);
332 
333  Query::Internal * done();
334 
335  std::string get_description() const;
336 };
337 
338 class QueryXor : public QueryOrLike {
339  Xapian::Query::op get_op() const;
340 
341  public:
342  explicit QueryXor(size_t n_subqueries) : QueryOrLike(n_subqueries) { }
343 
345  double factor,
346  TermFreqs* termfreqs) const;
347 
348  void postlist_sub_xor(XorContext& ctx,
349  QueryOptimiser* qopt,
350  double factor,
351  TermFreqs* termfreqs) const;
352 
353  std::string get_description() const;
354 };
355 
356 class QueryAndMaybe : public QueryBranch {
357  Xapian::Query::op get_op() const;
358 
359  public:
360  explicit QueryAndMaybe(size_t n_subqueries) : QueryBranch(n_subqueries) { }
361 
363  double factor,
364  TermFreqs* termfreqs) const;
365 
367  QueryOptimiser* qopt,
368  double factor,
369  TermFreqs* termfreqs) const;
370 
371  void add_subquery(const Xapian::Query & subquery);
372 
373  Query::Internal * done();
374 
375  std::string get_description() const;
376 };
377 
378 class QueryFilter : public QueryAndLike {
379  Xapian::Query::op get_op() const;
380 
381  public:
382  explicit QueryFilter(size_t n_subqueries) : QueryAndLike(n_subqueries) { }
383 
385  double factor,
386  TermFreqs* termfreqs) const;
387 
389  QueryOptimiser* qopt,
390  double factor,
391  TermFreqs* termfreqs) const;
392 
393  std::string get_description() const;
394 };
395 
396 class QueryWindowed : public QueryAndLike {
397  protected:
399 
400  QueryWindowed(size_t n_subqueries, Xapian::termcount window_)
401  : QueryAndLike(n_subqueries), window(window_) { }
402 
403  bool postlist_windowed(Xapian::Query::op op, AndContext& ctx,
404  QueryOptimiser* qopt, double factor,
405  TermFreqs* termfreqs) const;
406 
407  public:
408  size_t get_window() const { return window; }
409 
410  Query::Internal * done();
411 };
412 
413 class QueryNear : public QueryWindowed {
414  Xapian::Query::op get_op() const;
415 
416  public:
417  QueryNear(size_t n_subqueries, Xapian::termcount window_)
418  : QueryWindowed(n_subqueries, window_) { }
419 
420  void serialise(std::string & result) const;
421 
423  QueryOptimiser* qopt,
424  double factor,
425  TermFreqs* termfreqs) const;
426 
427  std::string get_description() const;
428 };
429 
430 class QueryPhrase : public QueryWindowed {
431  Xapian::Query::op get_op() const;
432 
433  public:
434  QueryPhrase(size_t n_subqueries, Xapian::termcount window_)
435  : QueryWindowed(n_subqueries, window_) { }
436 
437  void serialise(std::string & result) const;
438 
440  QueryOptimiser* qopt,
441  double factor,
442  TermFreqs* termfreqs) const;
443 
444  std::string get_description() const;
445 };
446 
447 class QueryEliteSet : public QueryOrLike {
448  Xapian::Query::op get_op() const;
449 
451 
452  public:
453  QueryEliteSet(size_t n_subqueries, Xapian::termcount set_size_)
454  : QueryOrLike(n_subqueries),
455  set_size(set_size_ ? set_size_ : DEFAULT_ELITE_SET_SIZE) { }
456 
457  void serialise(std::string & result) const;
458 
460  double factor,
461  TermFreqs* termfreqs) const;
462 
464  QueryOptimiser* qopt,
465  double factor,
466  TermFreqs* termfreqs,
467  bool keep_zero_weight) const;
468 
469  std::string get_description() const;
470 };
471 
472 class QuerySynonym : public QueryOrLike {
473  Xapian::Query::op get_op() const;
474 
475  public:
476  explicit QuerySynonym(size_t n_subqueries) : QueryOrLike(n_subqueries) { }
477 
479  double factor,
480  TermFreqs* termfreqs) const;
481 
482  Query::Internal * done();
483 
484  std::string get_description() const;
485 };
486 
487 class QueryMax : public QueryOrLike {
488  Xapian::Query::op get_op() const;
489 
490  public:
491  explicit QueryMax(size_t n_subqueries) : QueryOrLike(n_subqueries) { }
492 
494  double factor,
495  TermFreqs* termfreqs) const;
496 
497  std::string get_description() const;
498 };
499 
501  std::string pattern;
502 
504 
505  int flags;
506 
508 
513  size_t head = 0, tail = 0, min_len = 0, max_len = 0;
514 
515  // If candidate.size() >= min_len && candidate.size() < min_check_len then
516  // we don't need to actually do the pattern check since the length and
517  // head/tail checks are enough.
518  //
519  // This optimises cases `*` or `*?` or `?*` (which covers a lot of common
520  // cases) and also a pattern where there's no `*` and a single `?` where
521  // we only need to check the pattern when candidate.size() > min_len.
522  //
523  // Note that we can't handle cases like `*??` here since `?` matches a
524  // single UTF-8 character, which can be more than one byte.
525  size_t min_check_len = size_t(-1);
526 
527  std::string prefix, suffix;
528 
529  bool test_wildcard_(const std::string& candidate, size_t o, size_t p,
530  size_t i) const;
531 
532  public:
533  QueryWildcard(std::string_view pattern_,
534  Xapian::termcount max_expansion_,
535  int flags_,
536  Query::op combiner_);
537 
539  bool test_prefix_known(const std::string& candidate) const;
540 
542  bool test(const std::string& candidate) const {
543  return startswith(candidate, prefix) && test_prefix_known(candidate);
544  }
545 
547 
548  std::string get_pattern() const { return pattern; }
549 
550  Xapian::termcount get_max_expansion() const { return max_expansion; }
551 
552  int get_just_flags() const {
553  return flags &~ Xapian::Query::WILDCARD_LIMIT_MASK_;
554  }
555 
556  int get_max_type() const {
558  }
559 
561  double factor,
562  TermFreqs* termfreqs) const;
563 
564  termcount get_length() const noexcept XAPIAN_PURE_FUNCTION;
565 
566  void serialise(std::string & result) const;
567 
574  QueryWildcard* change_combiner(Xapian::Query::op new_op) {
575  if (_refs == 1) {
576  combiner = new_op;
577  return this;
578  }
579  return new QueryWildcard(pattern,
580  max_expansion,
581  flags,
582  new_op);
583  }
584 
586  std::string get_fixed_prefix() const { return prefix; }
587 
588  std::string get_description() const;
589 };
590 
592  std::string pattern;
593 
595 
596  int flags;
597 
599 
601 
602  unsigned edit_distance;
603 
605 
606  public:
607  QueryEditDistance(std::string_view pattern_,
608  Xapian::termcount max_expansion_,
609  int flags_,
610  Query::op combiner_,
611  unsigned edit_distance_ = 2,
612  size_t fixed_prefix_len_ = 0)
613  : pattern(pattern_),
614  max_expansion(max_expansion_),
615  flags(flags_),
616  combiner(combiner_),
617  edcalc(pattern),
618  edit_distance(edit_distance_),
619  fixed_prefix_len(fixed_prefix_len_) { }
620 
625  int test(const std::string& candidate) const;
626 
628 
629  std::string get_pattern() const { return pattern; }
630 
631  size_t get_fixed_prefix_len() const { return fixed_prefix_len; }
632 
633  Xapian::termcount get_max_expansion() const { return max_expansion; }
634 
635  int get_just_flags() const {
636  return flags &~ Xapian::Query::WILDCARD_LIMIT_MASK_;
637  }
638 
639  int get_max_type() const {
641  }
642 
643  unsigned get_threshold() const {
644  return edit_distance;
645  }
646 
648  double factor,
649  TermFreqs* termfreqs) const;
650 
651  termcount get_length() const noexcept XAPIAN_PURE_FUNCTION;
652 
653  void serialise(std::string & result) const;
654 
661  QueryEditDistance* change_combiner(Xapian::Query::op new_op) {
662  if (_refs == 1) {
663  combiner = new_op;
664  return this;
665  }
666  return new QueryEditDistance(pattern,
667  max_expansion,
668  flags,
669  new_op,
670  edit_distance,
671  fixed_prefix_len);
672  }
673 
674  std::string get_description() const;
675 };
676 
678  public:
680 
682 
684  double factor,
685  TermFreqs* termfreqs) const;
686 
687  void serialise(std::string & result) const;
688 
689  std::string get_description() const;
690 };
691 
692 }
693 
694 }
695 
696 #endif // XAPIAN_INCLUDED_QUERYINTERNAL_H
#define XAPIAN_PURE_FUNCTION
Like XAPIAN_CONST_FUNCTION, but such a function can also examine global memory, perhaps via pointer o...
Definition: attributes.h:59
Calculate edit distances to a target string.
Definition: editdistance.h:43
QueryAndLike(size_t num_subqueries_)
QueryAndMaybe(size_t n_subqueries)
QueryAndNot(size_t n_subqueries)
QueryAnd(size_t n_subqueries)
virtual Xapian::Query::op get_op() const =0
QueryBranch(size_t n_subqueries)
Xapian::termcount get_max_expansion() const
QueryEditDistance(std::string_view pattern_, Xapian::termcount max_expansion_, int flags_, Query::op combiner_, unsigned edit_distance_=2, size_t fixed_prefix_len_=0)
QueryEliteSet(size_t n_subqueries, Xapian::termcount set_size_)
QueryFilter(size_t n_subqueries)
QueryMax(size_t n_subqueries)
QueryNear(size_t n_subqueries, Xapian::termcount window_)
QueryOrLike(size_t num_subqueries_)
QueryOr(size_t n_subqueries)
QueryPhrase(size_t n_subqueries, Xapian::termcount window_)
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Internal::opt_intrusive_ptr< PostingSource > source
Definition: queryinternal.h:85
Xapian::Query::op get_type() const noexcept
QueryPostingSource(PostingSource *source_)
void serialise(std::string &result) const
termcount get_length() const noexcept
QuerySynonym(size_t n_subqueries)
void serialise(std::string &result) const
std::string get_description() const
void gather_terms(void *void_terms) const
QueryTerm(std::string_view term_, Xapian::termcount wqf_, Xapian::termpos pos_)
Definition: queryinternal.h:51
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
const std::string & get_term() const
Definition: queryinternal.h:58
termcount get_length() const noexcept
Definition: queryinternal.h:73
termcount get_wqf() const
Definition: queryinternal.h:60
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_type() const noexcept
QueryValueBase(Xapian::valueno slot_)
Xapian::valueno get_slot() const
QueryValueGE(Xapian::valueno slot_, std::string_view limit_)
QueryValueLE(Xapian::valueno slot_, std::string_view limit_)
QueryValueRange(Xapian::valueno slot_, std::string_view begin_, std::string_view end_)
std::string get_fixed_prefix() const
Return the fixed prefix from the wildcard pattern.
bool test(const std::string &candidate) const
Perform full wildcard test on candidate.
Xapian::termcount get_max_expansion() const
QueryWindowed(size_t n_subqueries, Xapian::termcount window_)
QueryXor(size_t n_subqueries)
unsigned _refs
Reference count.
Definition: intrusive_ptr.h:74
A smart pointer that optionally uses intrusive reference counting.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
virtual void postlist_sub_bool_or_like(Xapian::Internal::OrContext &ctx, Xapian::Internal::QueryOptimiser *qopt, Xapian::Internal::TermFreqs *termfreqs) const
virtual termcount get_length() const noexcept
virtual const Query get_subquery(size_t n) const
virtual bool postlist_sub_and_like(Xapian::Internal::AndContext &ctx, Xapian::Internal::QueryOptimiser *qopt, double factor, Xapian::Internal::TermFreqs *termfreqs) const
virtual void postlist_sub_or_like(Xapian::Internal::OrContext &ctx, Xapian::Internal::QueryOptimiser *qopt, double factor, Xapian::Internal::TermFreqs *termfreqs, bool keep_zero_weight=true) const
virtual void gather_terms(void *void_terms) const
virtual void postlist_sub_xor(Xapian::Internal::XorContext &ctx, Xapian::Internal::QueryOptimiser *qopt, double factor, Xapian::Internal::TermFreqs *termfreqs) const
virtual size_t get_num_subqueries() const noexcept
Internal() noexcept
Definition: query.h:919
Class representing a query.
Definition: query.h:45
op
Query operators.
Definition: query.h:78
@ WILDCARD_LIMIT_MASK_
Definition: query.h:324
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:48
PositionList * p
Edit distance calculation algorithm.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
Xapian::Query API class.
const Xapian::termcount DEFAULT_ELITE_SET_SIZE
Default set_size for OP_ELITE_SET:
Definition: queryinternal.h:31
Append only vector of Query objects.
Various handy string-related helpers.
bool startswith(std::string_view s, char pfx)
Definition: stringutils.h:56
The frequencies for a term.