xapian-core  2.0.0
queryinternal.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007-2026 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "queryinternal.h"
25 
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
29 #include "xapian/unicode.h"
30 
31 #include "api/editdistance.h"
32 #include "backends/postlist.h"
33 #include "heap.h"
35 #include "matcher/andnotpostlist.h"
36 #include "matcher/andpostlist.h"
37 #include "matcher/boolorpostlist.h"
40 #include "matcher/maxpostlist.h"
41 #include "matcher/nearpostlist.h"
42 #include "matcher/orpospostlist.h"
43 #include "matcher/orpostlist.h"
44 #include "matcher/phrasepostlist.h"
45 #include "matcher/queryoptimiser.h"
48 #include "matcher/xorpostlist.h"
49 #include "pack.h"
50 #include "serialise-double.h"
51 #include "stringutils.h"
52 #include "termlist.h"
53 
54 #include "debuglog.h"
55 #include "omassert.h"
56 #include "str.h"
57 #include "stringutils.h"
59 
60 #include <algorithm>
61 #include <limits>
62 #include <list>
63 #include <memory>
64 #include <string>
65 #include <string_view>
66 #include <unordered_set>
67 #include <vector>
68 
69 using namespace std;
70 
71 static constexpr unsigned MAX_UTF_8_CHARACTER_LENGTH = 4;
72 
76 
77 namespace Xapian {
78 
79 namespace Internal {
80 
89 struct CmpMaxOrTerms {
91  bool operator()(PostList* a, PostList* b) {
92 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
93  defined(__mc68000__) || defined(__mc68010__) || \
94  defined(__mc68020__) || defined(__mc68030__)
95  // On some architectures, most common of which is x86, floating point
96  // values are calculated and stored in registers with excess precision.
97  // If the two recalc_maxweight() calls below return identical values in a
98  // register, the excess precision may be dropped for one of them but
99  // not the other (e.g. because the compiler saves the first calculated
100  // weight to memory while calculating the second, then reloads it to
101  // compare). This leads to both a > b and b > a being true, which
102  // violates the antisymmetry property of the strict weak ordering
103  // required by nth_element(). This can have serious consequences (e.g.
104  // segfaults).
105  //
106  // Note that m68k only has excess precision in earlier models - 68040
107  // and later are OK:
108  // https://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
109  //
110  // To avoid this, we store each result in a volatile double prior to
111  // comparing them. This means that the result of this test should
112  // match that on other architectures with the same double format (which
113  // is desirable), and actually has less overhead than rounding both
114  // results to float (which is another approach which works).
115  volatile double a_max_wt = a->recalc_maxweight();
116  volatile double b_max_wt = b->recalc_maxweight();
117  return a_max_wt > b_max_wt;
118 #else
119  return a->recalc_maxweight() > b->recalc_maxweight();
120 #endif
121  }
122 };
123 
127  bool operator()(const PostList* a,
128  const PostList* b) const {
129  return a->get_termfreq() > b->get_termfreq();
130  }
131 };
132 
133 class Context {
134  protected:
136 
137  vector<PostList*> pls;
138 
140 
141  vector<TermFreqs> termfreqs_list;
142 
145 
147  Xapian::docid last = 0;
148 
149  public:
150  Context(QueryOptimiser* qopt_, size_t reserve)
151  : qopt(qopt_), estimates(reserve) {
152  pls.reserve(reserve);
153  }
154 
156  shrink(0);
157  }
158 
159  Xapian::docid get_first() const { return first; }
160 
161  Xapian::docid get_last() const { return last; }
162 
163  void add_termfreqs(TermFreqs* termfreqs) {
164  if (termfreqs) termfreqs_list.emplace_back(*termfreqs);
165  }
166 
167  void add_postlist(PostList* pl, EstimateOp* estimate,
168  TermFreqs* termfreqs) {
169  add_termfreqs(termfreqs);
170  if (pl) {
171  pls.emplace_back(pl);
172  // estimate can be NULL e.g. under the RHS of OP_AND_MAYBE.
173  if (estimate) estimates.push_back(estimate);
174  // Take the union of the docid ranges, which is suitable for
175  // OrContext and XorContext. AndContext() implements its own
176  // version of add_postlist() which takes the intersection.
177  Xapian::docid f = 1;
179  pl->get_docid_range(f, l);
180  first = std::min(first, f);
181  last = std::max(last, l);
182  } else {
183  Assert(!estimate);
184  }
185  }
186 
188  add_postlist(p.pl, p.est.release(), termfreqs);
189  }
190 
191  bool empty() const {
192  return pls.empty();
193  }
194 
196  return Xapian::termcount(pls.size());
197  }
198 
199  void shrink(size_t new_size) {
200  AssertRel(new_size, <=, pls.size());
201  if (new_size >= pls.size())
202  return;
203 
204  for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
205  qopt->destroy_postlist(*i);
206  }
207  pls.resize(new_size);
208  estimates.erase(estimates.begin() + new_size, estimates.end());
209  }
210 
212  void expand_wildcard(const QueryWildcard* query,
213  double factor,
214  TermFreqs* termfreqs);
215 
217  void expand_edit_distance(const QueryEditDistance* query,
218  double factor,
219  TermFreqs* termfreqs);
220 };
221 
222 inline void
223 Context::expand_wildcard(const QueryWildcard* query,
224  double factor,
225  TermFreqs* termfreqs)
226 {
227  unique_ptr<TermList> t(qopt->db.open_allterms(query->get_fixed_prefix()));
228  bool skip_ucase = query->get_fixed_prefix().empty();
229  auto max_type = query->get_max_type();
230  Xapian::termcount expansions_left = query->get_max_expansion();
231  // If there's no expansion limit, set expansions_left to the maximum
232  // value it can hold.
233  if (expansions_left == 0)
234  expansions_left = numeric_limits<decltype(expansions_left)>::max();
235  while (true) {
236  TermList* ret = t->next();
237 done_skip_to:
238  if (ret) {
239  // Pruning shouldn't be possible, as this is iterating allterms for
240  // a single shard.
241  Assert(ret == t.get());
242  // End of entries.
243  break;
244  }
245 
246  const string & term = t->get_termname();
247  if (skip_ucase && term[0] >= 'A') {
248  // If there's a leading wildcard then skip terms that start
249  // with A-Z, as we don't want the expansion to include prefixed
250  // terms.
251  //
252  // This assumes things about the structure of terms which the
253  // Query class otherwise doesn't need to care about, but it
254  // seems hard to avoid here.
255  skip_ucase = false;
256  if (term[0] <= 'Z') {
257  static_assert('Z' + 1 == '[', "'Z' + 1 == '['");
258  ret = t->skip_to("[");
259  goto done_skip_to;
260  }
261  }
262 
263  if (!query->test_prefix_known(term)) continue;
264 
266  if (expansions_left == 0) {
267  if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
268  break;
269  string msg("Wildcard ");
270  msg += query->get_pattern();
271  if (query->get_just_flags() == 0)
272  msg += '*';
273  msg += " expands to more than ";
274  msg += str(query->get_max_expansion());
275  msg += " terms";
276  throw Xapian::WildcardError(msg);
277  }
278  --expansions_left;
279  }
280 
281  add_postlist(qopt->open_lazy_post_list(term, 1, factor), NULL);
282  // Generate a single EstimateOp to avoid overhead for wildcards
283  // which expand to a lot of terms? (FIXME)
284  }
285 
287  auto set_size = query->get_max_expansion();
288  if (size() > set_size) {
289  auto begin = pls.begin();
290  nth_element(begin, begin + set_size - 1, pls.end(),
292  shrink(set_size);
293  }
294  }
295 
296  // Now register the postlists we're actually using for stats.
297  for (auto pl : pls) {
298  // FIXME: LocalSubMatch::open_lazy_post_list() above returns a
299  // PostList* that actually points to a LeafPostList. It would be
300  // better to find a way to be more type-safe here and avoid need to
301  // cast back.
302  qopt->register_lazy_postlist_for_stats(static_cast<LeafPostList*>(pl),
303  termfreqs);
304  add_termfreqs(termfreqs);
305  }
306 }
307 
308 inline void
309 Context::expand_edit_distance(const QueryEditDistance* query,
310  double factor,
311  TermFreqs* termfreqs)
312 {
313  string pfx(query->get_pattern(), 0, query->get_fixed_prefix_len());
314  unique_ptr<TermList> t(qopt->db.open_allterms(pfx));
315  bool skip_ucase = pfx.empty();
316  auto max_type = query->get_max_type();
317  Xapian::termcount expansions_left = query->get_max_expansion();
318  // If there's no expansion limit, set expansions_left to the maximum
319  // value it can hold.
320  if (expansions_left == 0)
321  expansions_left = numeric_limits<decltype(expansions_left)>::max();
322  while (true) {
323  TermList* res = t->next();
324 done_skip_to:
325  if (res) {
326  // Pruning shouldn't be possible, as this is iterating allterms for
327  // a single shard.
328  Assert(res == t.get());
329  // Out of entries.
330  break;
331  }
332 
333  const string& term = t->get_termname();
334  if (!startswith(term, pfx))
335  break;
336  if (skip_ucase && term[0] >= 'A') {
337  // Skip terms that start with A-Z, as we don't want the expansion
338  // to include prefixed terms.
339  //
340  // This assumes things about the structure of terms which the
341  // Query class otherwise doesn't need to care about, but it
342  // seems hard to avoid here.
343  skip_ucase = false;
344  if (term[0] <= 'Z') {
345  static_assert('Z' + 1 == '[', "'Z' + 1 == '['");
346  res = t->skip_to("[");
347  goto done_skip_to;
348  }
349  }
350 
351  if (!query->test(term)) continue;
352 
354  if (expansions_left == 0) {
355  if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
356  break;
357  string msg("Edit distance ");
358  msg += query->get_pattern();
359  msg += '~';
360  msg += str(query->get_threshold());
361  msg += " expands to more than ";
362  msg += str(query->get_max_expansion());
363  msg += " terms";
364  throw Xapian::WildcardError(msg);
365  }
366  --expansions_left;
367  }
368 
369  add_postlist(qopt->open_lazy_post_list(term, 1, factor), NULL);
370  }
371 
373  auto set_size = query->get_max_expansion();
374  if (size() > set_size) {
375  auto begin = pls.begin();
376  nth_element(begin, begin + set_size - 1, pls.end(),
378  shrink(set_size);
379  }
380  }
381 
382  // Now register the postlists we're actually using for stats.
383  for (auto pl : pls) {
384  // FIXME: Be more typesafe?
385  qopt->register_lazy_postlist_for_stats(static_cast<LeafPostList*>(pl),
386  termfreqs);
387  add_termfreqs(termfreqs);
388  }
389 }
390 
391 class OrContext : public Context {
392  public:
393  OrContext(QueryOptimiser* qopt_, size_t reserve)
394  : Context(qopt_, reserve) { }
395 
396  void estimate_termfreqs(TermFreqs* termfreqs);
397 
399  void select_elite_set(size_t set_size, size_t out_of);
400 
401  PostListAndEstimate postlist(TermFreqs* termfreqs, bool bool_or = false);
402 
403  PostListAndEstimate postlist_max();
404 };
405 
406 void
407 OrContext::estimate_termfreqs(TermFreqs* termfreqs)
408 {
409  Assert(termfreqs);
410 
411  Assert(!termfreqs_list.empty());
412 
413  // We calculate the estimate assuming independence. The simplest
414  // way to calculate this seems to be a series of (n - 1) pairwise
415  // calculations, which gives the same answer regardless of the order.
416  const TermFreqs& freqs = termfreqs_list[0];
417  auto& stats = *qopt->get_stats();
418 
419  // Our caller should have ensured this.
420  Assert(stats.collection_size);
421  double scale = 1.0 / stats.collection_size;
422  double P_est = freqs.termfreq * scale;
423  double rtf_scale = 0.0;
424  if (stats.rset_size != 0) {
425  rtf_scale = 1.0 / stats.rset_size;
426  }
427  double Pr_est = freqs.reltermfreq * rtf_scale;
428  // If total_length is 0, cf must always be 0 so cf_scale is irrelevant.
429  double cf_scale = 0.0;
430  if (usual(stats.total_length != 0)) {
431  cf_scale = 1.0 / stats.total_length;
432  }
433  double Pc_est = freqs.collfreq * cf_scale;
434 
435  for (size_t i = 1; i < termfreqs_list.size(); ++i) {
436  const TermFreqs& f = termfreqs_list[i];
437  double P_i = f.termfreq * scale;
438  P_est += P_i - P_est * P_i;
439  double Pc_i = f.collfreq * cf_scale;
440  Pc_est += Pc_i - Pc_est * Pc_i;
441  // If the rset is empty, Pr_est should be 0 already, so leave
442  // it alone.
443  if (stats.rset_size != 0) {
444  double Pr_i = f.reltermfreq * rtf_scale;
445  Pr_est += Pr_i - Pr_est * Pr_i;
446  }
447  }
448 
449  *termfreqs =
450  TermFreqs(Xapian::doccount(P_est * stats.collection_size + 0.5),
451  Xapian::doccount(Pr_est * stats.rset_size + 0.5),
452  Xapian::termcount(Pc_est * stats.total_length + 0.5));
453 }
454 
455 void
456 OrContext::select_elite_set(size_t set_size, size_t out_of)
457 {
458  auto begin = pls.begin() + pls.size() - out_of;
459  nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
460  shrink(pls.size() - out_of + set_size);
461 }
462 
464 OrContext::postlist(TermFreqs* termfreqs, bool bool_or)
465 {
466  if (!termfreqs_list.empty()) estimate_termfreqs(termfreqs);
467 
468  switch (pls.size()) {
469  case 0:
470  return {};
471  case 1: {
472  PostList* pl = pls[0];
473  pls.clear();
474  // If no_estimates was set then estimates will be empty.
475  return {pl, estimates.empty() ? nullptr : estimates.release_at(0)};
476  }
477  }
478 
479  unique_ptr<EstimateOp> est;
480  if (!qopt->get_no_estimates()) {
481  est.reset(new EstimateOp(EstimateOp::OR, first, last,
482  std::move(estimates)));
483  }
484 
485  if (bool_or) {
486  auto pl = new BoolOrPostList(pls.begin(), pls.end(), qopt->db_size);
487  // Empty pls so our destructor doesn't delete them all!
488  pls.clear();
489  return {pl, std::move(est)};
490  }
491 
492  // Make postlists into a heap so that the postlist with the greatest term
493  // frequency is at the top of the heap.
494  Heap::make(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
495 
496  // Now build a tree of binary OrPostList objects.
497  //
498  // The algorithm used to build the tree is like that used to build an
499  // optimal Huffman coding tree. If we called next() repeatedly, this
500  // arrangement would minimise the number of method calls. Generally we
501  // don't actually do that, but this arrangement is still likely to be a
502  // good one, and it does minimise the work in the worst case.
503  while (true) {
504  // We build the tree such that at each branch:
505  //
506  // l.get_termfreq() >= r.get_termfreq()
507  //
508  // We do this so that the OrPostList class can be optimised assuming
509  // that this is the case.
510  PostList* r = pls.front();
511  Heap::pop(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
512  pls.pop_back();
513  auto pl = new OrPostList(pls.front(), r, qopt->matcher);
514 
515  if (pls.size() == 1) {
516  pls.clear();
517  return {pl, std::move(est)};
518  }
519 
520  pls[0] = pl;
521  Heap::replace(pls.begin(), pls.end(),
523  }
524 }
525 
527 OrContext::postlist_max()
528 {
529  switch (pls.size()) {
530  case 0:
531  return {};
532  case 1: {
533  PostList* pl = pls[0];
534  pls.clear();
535  return {pl, estimates.release_at(0)};
536  }
537  }
538 
539  // Sort the postlists so that the postlist with the greatest term frequency
540  // is first.
541  sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
542 
543  PostList* pl = new MaxPostList(pls.begin(), pls.end(),
544  qopt->matcher, qopt->db_size);
545  unique_ptr<EstimateOp> est;
546  if (!qopt->get_no_estimates()) {
547  // Same as OR for number of matches.
548  est.reset(new EstimateOp(EstimateOp::OR, first, last,
549  std::move(estimates)));
550  }
551  pls.clear();
552  return {pl, std::move(est)};
553 }
554 
555 class XorContext : public Context {
556  public:
557  XorContext(QueryOptimiser* qopt_, size_t reserve)
558  : Context(qopt_, reserve) { }
559 
560  PostListAndEstimate postlist(TermFreqs* termfreqs);
561 };
562 
564 XorContext::postlist(TermFreqs* termfreqs)
565 {
566  if (pls.empty())
567  return {};
568 
569  if (termfreqs) {
570  Assert(!termfreqs_list.empty());
571 
572  // We calculate the estimate assuming independence. The simplest
573  // way to calculate this seems to be a series of (n - 1) pairwise
574  // calculations, which gives the same answer regardless of the order.
575  auto& stats = *qopt->get_stats();
576  const TermFreqs& freqs = termfreqs_list[0];
577 
578  // Our caller should have ensured this.
579  Assert(stats.collection_size);
580  double scale = 1.0 / stats.collection_size;
581  double P_est = freqs.termfreq * scale;
582  double rtf_scale = 0.0;
583  if (stats.rset_size != 0) {
584  rtf_scale = 1.0 / stats.rset_size;
585  }
586  double Pr_est = freqs.reltermfreq * rtf_scale;
587  // If total_length is 0, cf must always be 0 so cf_scale is irrelevant.
588  double cf_scale = 0.0;
589  if (usual(stats.total_length != 0)) {
590  cf_scale = 1.0 / stats.total_length;
591  }
592  double Pc_est = freqs.collfreq * cf_scale;
593 
594  for (size_t i = 1; i < termfreqs_list.size(); ++i) {
595  const TermFreqs& f = termfreqs_list[i];
596  double P_i = f.termfreq * scale;
597  P_est += P_i - 2.0 * P_est * P_i;
598  double Pc_i = f.collfreq * cf_scale;
599  Pc_est += Pc_i - 2.0 * Pc_est * Pc_i;
600  // If the rset is empty, Pr_est should be 0 already, so leave
601  // it alone.
602  if (stats.rset_size != 0) {
603  double Pr_i = f.reltermfreq * rtf_scale;
604  Pr_est += Pr_i - 2.0 * Pr_est * Pr_i;
605  }
606  }
607 
608  *termfreqs =
609  TermFreqs(Xapian::doccount(P_est * stats.collection_size + 0.5),
610  Xapian::doccount(Pr_est * stats.rset_size + 0.5),
611  Xapian::termcount(Pc_est * stats.total_length + 0.5));
612  }
613 
614  unique_ptr<EstimateOp> est;
615  if (!qopt->get_no_estimates()) {
616  est.reset(new EstimateOp(EstimateOp::XOR, first, last,
617  std::move(estimates)));
618  }
619  auto pl = new XorPostList(pls.begin(), pls.end(), qopt->matcher,
620  qopt->db_size);
621  // Empty pls so our destructor doesn't delete them all!
622  pls.clear();
623  return {pl, std::move(est)};
624 }
625 
626 class PosFilter {
628 
630  size_t begin, end;
631 
633 
634  public:
635  PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
636  Xapian::termcount window_)
637  : op_(op__), begin(begin_), end(end_), window(window_) { }
638 
640  EstimateOp* est,
641  const vector<PostList*>& pls,
642  PostListTree* pltree,
643  TermFreqs* termfreqs) const
644  try {
645  auto terms_begin = pls.begin() + begin;
646  auto terms_end = pls.begin() + end;
647 
648  if (op_ == Xapian::Query::OP_NEAR) {
649  if (termfreqs) *termfreqs /= 2;
650  if (est) {
651  est = new EstimateOp(EstimateOp::NEAR, est);
652  }
653  pl = new NearPostList(pl, est,
654  window, terms_begin, terms_end, pltree);
655  } else if (window != end - begin) {
657  if (termfreqs) *termfreqs /= 3;
658  if (est) {
659  est = new EstimateOp(EstimateOp::PHRASE, est);
660  }
661  pl = new PhrasePostList(pl, est,
662  window, terms_begin, terms_end, pltree);
663  } else {
665  if (termfreqs) *termfreqs /= 4;
666  if (est) {
667  est = new EstimateOp(EstimateOp::EXACT_PHRASE, est);
668  }
669  pl = new ExactPhrasePostList(pl, est,
670  terms_begin, terms_end, pltree);
671  }
672  return {pl, est};
673  } catch (...) {
674  delete pl;
675  delete est;
676  throw;
677  }
678 };
679 
680 class AndContext : public Context {
681  list<PosFilter> pos_filters;
682 
683  unique_ptr<OrContext> not_ctx;
684 
685  unique_ptr<OrContext> maybe_ctx;
686 
692  bool match_all = false;
693 
694  public:
695  AndContext(QueryOptimiser* qopt_, size_t reserve)
696  : Context(qopt_, reserve) {
697  first = 1;
698  last = Xapian::docid(-1);
699  }
700 
701  bool add_postlist(PostList* pl, unique_ptr<EstimateOp>&& estimate,
702  TermFreqs* termfreqs) {
703  add_termfreqs(termfreqs);
704  if (pl) {
705  if (pls.empty() && termfreqs_list.size() > 1) {
706  qopt->destroy_postlist(pl);
707  return true;
708  }
709  pls.emplace_back(pl);
710  estimates.push_back(estimate.release());
711  Xapian::docid pl_first = first, pl_last = last;
712  pl->get_docid_range(pl_first, pl_last);
713  first = std::max(first, pl_first);
714  last = std::min(last, pl_last);
715  if (first <= last) {
716  return true;
717  }
718  }
719  shrink(0);
720  match_all = false;
721  return termfreqs != NULL;
722  }
723 
725  return add_postlist(p.pl, std::move(p.est), termfreqs);
726  }
727 
728  void set_match_all() { match_all = true; }
729 
730  void add_pos_filter(Query::op op_,
731  size_t n_subqs,
732  Xapian::termcount window);
733 
734  OrContext& get_not_ctx(size_t reserve) {
735  if (!not_ctx) {
736  not_ctx.reset(new OrContext(qopt, reserve));
737  }
738  return *not_ctx;
739  }
740 
741  OrContext& get_maybe_ctx(size_t reserve) {
742  if (!maybe_ctx) {
743  maybe_ctx.reset(new OrContext(qopt, reserve));
744  }
745  return *maybe_ctx;
746  }
747 
748  PostListAndEstimate postlist(TermFreqs* termfreqs);
749 };
750 
751 void
752 AndContext::add_pos_filter(Query::op op_,
753  size_t n_subqs,
754  Xapian::termcount window)
755 {
756  Assert(n_subqs > 1);
757  size_t end = pls.size();
758  size_t begin = end - n_subqs;
759  pos_filters.push_back(PosFilter(op_, begin, end, window));
760 }
761 
762 template<typename T, typename U>
763 inline static T
764 estimate_and_not(T l, T r, U n)
765 {
766  // We calculate the estimates assuming independence. With this assumption,
767  // the estimate is the product of the estimates for the sub-postlists
768  // (with the right side this is inverted by subtracting from the total
769  // size), divided by the total size.
770  return static_cast<T>((l * double(n - r)) / n + 0.5);
771 }
772 
774 AndContext::postlist(TermFreqs* termfreqs)
775 {
776  auto matcher = qopt->matcher;
777  auto db_size = qopt->db_size;
778 
779  if (termfreqs) {
780  Assert(!termfreqs_list.empty());
781 
782  // We calculate the estimate assuming independence. With this
783  // assumption, the estimate is the product of the estimates for the
784  // sub-postlists divided by db_size (n - 1) times.
785  const TermFreqs& freqs = termfreqs_list[0];
786 
787  double freqest = double(freqs.termfreq);
788  double relfreqest = double(freqs.reltermfreq);
789  double collfreqest = double(freqs.collfreq);
790 
791  auto& stats = *qopt->get_stats();
792 
793  // Our caller should have ensured this.
794  Assert(stats.collection_size);
795 
796  for (size_t i = 1; i < termfreqs_list.size(); ++i) {
797  const TermFreqs& f = termfreqs_list[i];
798 
799  // If the collection is empty, freqest should be 0 already, so
800  // leave it alone.
801  freqest *= f.termfreq / stats.collection_size;
802  if (usual(stats.total_length != 0)) {
803  collfreqest *= f.collfreq / stats.total_length;
804  }
805 
806  // If the rset is empty, relfreqest should be 0 already, so leave
807  // it alone.
808  if (stats.rset_size != 0)
809  relfreqest *= f.reltermfreq / stats.rset_size;
810  }
811 
812  *termfreqs =
813  TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
814  static_cast<Xapian::doccount>(relfreqest + 0.5),
815  static_cast<Xapian::termcount>(collfreqest + 0.5));
816  }
817 
818  unique_ptr<PostList> pl;
819  unique_ptr<EstimateOp> est;
820  switch (pls.size()) {
821  case 0: {
822  if (!match_all) {
823  // The "and" part doesn't match anything, so any "not" part or
824  // positional filters are irrelevant.
825  return {};
826  }
827  auto [new_pl, new_est] = qopt->open_post_list({}, 0, 0.0, nullptr);
828  pl.reset(new_pl);
829  est = std::move(new_est);
830  break;
831  }
832  case 1:
833  pl.reset(pls[0]);
834  est.reset(estimates.release_at(0));
835  break;
836  default:
837  pl.reset(new AndPostList(pls.begin(), pls.end(), matcher));
838  if (!qopt->get_no_estimates()) {
839  est.reset(new EstimateOp(EstimateOp::AND, first, last,
840  std::move(estimates)));
841  }
842  break;
843  }
844 
845  if (not_ctx && !not_ctx->empty()) {
846  if (not_ctx->get_last() < first || not_ctx->get_first() > last) {
847  // The ranges don't overlap so the right side has no effect.
848  // The call to not_ctx.reset() below will clean up the estimate
849  // stack.
850  } else {
851  TermFreqs r_freqs;
852  auto [rhs, rhs_est] = not_ctx->postlist(termfreqs ? &r_freqs : NULL,
853  true);
854  if (termfreqs) {
855  TermFreqs& freqs = *termfreqs;
856  auto& stats = *qopt->get_stats();
857 
858  // Our caller should have ensured this.
859  Assert(stats.collection_size);
860  freqs.termfreq = estimate_and_not(freqs.termfreq,
861  r_freqs.termfreq,
862  stats.collection_size);
863 
864  // If total_length is 0 then collfreq should always be 0 (since
865  // total_length is the sum of all collfreq values) so nothing
866  // to do.
867  if (stats.total_length != 0) {
868  freqs.collfreq = estimate_and_not(freqs.collfreq,
869  r_freqs.collfreq,
870  stats.total_length);
871  }
872 
873  // If the rset is empty then relfreqest should always be 0 so
874  // nothing to do.
875  if (stats.rset_size != 0) {
877  r_freqs.reltermfreq,
878  stats.rset_size);
879  }
880  }
881 
882  pl.reset(new AndNotPostList(pl.release(), rhs, db_size));
883  if (!qopt->get_no_estimates()) {
884  // The bounds are the same as those for the left side.
885  est.reset(new EstimateOp(EstimateOp::AND_NOT, first, last,
886  std::move(est), std::move(rhs_est)));
887  }
888  }
889  not_ctx.reset();
890  }
891 
892  // Sort the positional filters to try to apply them in an efficient order.
893  // FIXME: We need to figure out what that is! Try applying lowest cf/tf
894  // first?
895 
896  // Apply any positional filters.
897  for (const PosFilter& filter : pos_filters) {
898  auto [new_pl, new_est] = filter.postlist(pl.release(), est.release(),
899  pls, matcher, termfreqs);
900  pl.reset(new_pl);
901  est = std::move(new_est);
902  }
903 
904  // Empty pls so our destructor doesn't delete them all!
905  pls.clear();
906 
907  if (maybe_ctx && !maybe_ctx->empty()) {
908  if (maybe_ctx->get_last() < first || maybe_ctx->get_first() > last) {
909  // The ranges don't overlap so the right side has no effect.
910  // The call to maybe_ctx.reset() below will clean up the estimate
911  // stack.
912  } else {
913  // For OP_AND_MAYBE only the LHS determines which documents match
914  // (the RHS only adds weight) so the estimate is just that for the
915  // LHS. It would be useless extra work to generate estimates for
916  // anything on the RHS, and create a problem with the lifetime of
917  // the EstimateOp object which would need to live until after the
918  // match for any PostList that can report stats.
919  bool save_no_estimates = qopt->get_no_estimates();
920  qopt->set_no_estimates(true);
921  auto [rhs, rhs_est] = maybe_ctx->postlist(termfreqs);
922  qopt->set_no_estimates(save_no_estimates);
923 
924  // If this assertion fails, there's probably a missing check for
925  // qopt->get_no_estimates() somewhere.
926  Assert(!rhs_est);
927 
928  // A NULL PostList from OrContext::postlist() can only mean that
929  // maybe_ctx is empty, but in that case get_last() returns zero
930  // which means we would have taken the branch above.
931  Assert(rhs);
932 
933  pl.reset(new AndMaybePostList(pl.release(), rhs, matcher));
934  }
935  maybe_ctx.reset();
936  }
937 
938  return {pl.release(), est.release()};
939 }
940 
941 }
942 
943 Query::Internal::~Internal() { }
944 
945 size_t
946 Query::Internal::get_num_subqueries() const noexcept
947 {
948  return 0;
949 }
950 
951 const Query
952 Query::Internal::get_subquery(size_t) const
953 {
954  throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
955 }
956 
958 Query::Internal::get_wqf() const
959 {
960  throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
961 }
962 
964 Query::Internal::get_pos() const
965 {
966  throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
967 }
968 
969 void
970 Query::Internal::gather_terms(void *) const
971 {
972 }
973 
975 Query::Internal::get_length() const noexcept
976 {
977  return 0;
978 }
979 
981 Query::Internal::unserialise(const char ** p, const char * end,
982  const Registry & reg)
983 {
984  if (*p == end)
985  return NULL;
986  unsigned char ch = *(*p)++;
987  switch (ch >> 5) {
988  case 4: case 5: case 6: case 7: {
989  // Multi-way branch
990  //
991  // 1ccccnnn where:
992  // nnn -> n_subqs (0 means encoded value follows)
993  // cccc -> code (which OP_XXX)
994  size_t n_subqs = ch & 0x07;
995  if (n_subqs == 0) {
996  if (!unpack_uint(p, end, &n_subqs)) {
998  }
999  n_subqs += 8;
1000  }
1001  unsigned char code = (ch >> 3) & 0x0f;
1002  Xapian::termcount parameter = 0;
1003  if (code >= 13) {
1004  if (!unpack_uint(p, end, &parameter)) {
1006  }
1007  }
1009  switch (code) {
1010  case 0: // OP_AND
1011  result = new Xapian::Internal::QueryAnd(n_subqs);
1012  break;
1013  case 1: // OP_OR
1014  result = new Xapian::Internal::QueryOr(n_subqs);
1015  break;
1016  case 2: // OP_AND_NOT
1017  result = new Xapian::Internal::QueryAndNot(n_subqs);
1018  break;
1019  case 3: // OP_XOR
1020  result = new Xapian::Internal::QueryXor(n_subqs);
1021  break;
1022  case 4: // OP_AND_MAYBE
1023  result = new Xapian::Internal::QueryAndMaybe(n_subqs);
1024  break;
1025  case 5: // OP_FILTER
1026  result = new Xapian::Internal::QueryFilter(n_subqs);
1027  break;
1028  case 6: // OP_SYNONYM
1029  result = new Xapian::Internal::QuerySynonym(n_subqs);
1030  break;
1031  case 7: // OP_MAX
1032  result = new Xapian::Internal::QueryMax(n_subqs);
1033  break;
1034  case 13: // OP_ELITE_SET
1035  result = new Xapian::Internal::QueryEliteSet(n_subqs,
1036  parameter);
1037  break;
1038  case 14: // OP_NEAR
1039  result = new Xapian::Internal::QueryNear(n_subqs,
1040  parameter);
1041  break;
1042  case 15: // OP_PHRASE
1043  result = new Xapian::Internal::QueryPhrase(n_subqs,
1044  parameter);
1045  break;
1046  default:
1047  // 8 to 12 are currently unused.
1048  throw SerialisationError("Unknown multi-way branch Query operator");
1049  }
1050  do {
1051  result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
1052  } while (--n_subqs);
1053  result->done();
1054  return result;
1055  }
1056  case 2: case 3: { // Term
1057  // Term
1058  //
1059  // 01ccLLLL where:
1060  // LLLL -> length (0 means encoded value follows)
1061  // cc -> code:
1062  // 0: wqf = 0; pos = 0
1063  // 1: wqf = 1; pos = 0
1064  // 2: wqf = 1; pos -> encoded value follows
1065  // 3: wqf -> encoded value follows; pos -> encoded value follows
1066  size_t len = ch & 0x0f;
1067  if (len == 0) {
1068  if (!unpack_uint(p, end, &len)) {
1070  }
1071  len += 16;
1072  }
1073  if (size_t(end - *p) < len)
1074  throw SerialisationError("Not enough data");
1075  string term(*p, len);
1076  *p += len;
1077 
1078  int code = ((ch >> 4) & 0x03);
1079 
1080  Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
1081  if (code == 3) {
1082  if (!unpack_uint(p, end, &wqf)) {
1084  }
1085  }
1086 
1087  Xapian::termpos pos = 0;
1088  if (code >= 2) {
1089  if (!unpack_uint(p, end, &pos)) {
1091  }
1092  }
1093 
1094  return new Xapian::Internal::QueryTerm(term, wqf, pos);
1095  }
1096  case 1: {
1097  // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
1098  //
1099  // 001tssss where:
1100  // ssss -> slot number (15 means encoded value follows)
1101  // t -> op:
1102  // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
1103  // 1: OP_VALUE_GE
1104  Xapian::valueno slot = ch & 15;
1105  if (slot == 15) {
1106  if (!unpack_uint(p, end, &slot)) {
1108  }
1109  slot += 15;
1110  }
1111  string begin;
1112  if (!unpack_string(p, end, begin)) {
1114  }
1115  if (ch & 0x10) {
1116  // OP_VALUE_GE
1117  return new Xapian::Internal::QueryValueGE(slot, begin);
1118  }
1119 
1120  // OP_VALUE_RANGE
1121  string end_;
1122  if (!unpack_string(p, end, end_)) {
1124  }
1125  if (begin.empty()) // FIXME: is this right?
1126  return new Xapian::Internal::QueryValueLE(slot, end_);
1127  return new Xapian::Internal::QueryValueRange(slot, begin, end_);
1128  }
1129  case 0: {
1130  // Other operators
1131  //
1132  // 000ttttt where:
1133  // ttttt -> encodes which OP_XXX
1134  switch (ch & 0x1f) {
1135  case 0x00: // OP_INVALID
1136  return new Xapian::Internal::QueryInvalid();
1137  case 0x0a: { // Edit distance
1138  Xapian::termcount max_expansion;
1139  if (!unpack_uint(p, end, &max_expansion) || end - *p < 2) {
1140  throw SerialisationError("not enough data");
1141  }
1142  int flags = static_cast<unsigned char>(*(*p)++);
1143  op combiner = static_cast<op>(*(*p)++);
1144  unsigned edit_distance;
1145  size_t fixed_prefix_len;
1146  string pattern;
1147  if (!unpack_uint(p, end, &edit_distance) ||
1148  !unpack_uint(p, end, &fixed_prefix_len) ||
1149  !unpack_string(p, end, pattern)) {
1150  throw SerialisationError("not enough data");
1151  }
1153  return new QueryEditDistance(pattern,
1154  max_expansion,
1155  flags,
1156  combiner,
1157  edit_distance,
1158  fixed_prefix_len);
1159  }
1160  case 0x0b: { // Wildcard
1161  Xapian::termcount max_expansion;
1162  if (!unpack_uint(p, end, &max_expansion) || end - *p < 2) {
1163  throw SerialisationError("not enough data");
1164  }
1165  int flags = static_cast<unsigned char>(*(*p)++);
1166  op combiner = static_cast<op>(*(*p)++);
1167  string pattern;
1168  if (!unpack_string(p, end, pattern)) {
1169  throw SerialisationError("not enough data");
1170  }
1171  return new Xapian::Internal::QueryWildcard(pattern,
1172  max_expansion,
1173  flags,
1174  combiner);
1175  }
1176  case 0x0c: { // PostingSource
1177  string name;
1178  if (!unpack_string(p, end, name)) {
1179  throw SerialisationError("not enough data");
1180  }
1181 
1182  const PostingSource * reg_source = reg.get_posting_source(name);
1183  if (!reg_source) {
1184  string m = "PostingSource ";
1185  m += name;
1186  m += " not registered";
1187  throw SerialisationError(m);
1188  }
1189 
1190  string serialised_source;
1191  if (!unpack_string(p, end, serialised_source)) {
1192  throw SerialisationError("not enough data");
1193  }
1194  PostingSource* source =
1195  reg_source->unserialise_with_registry(serialised_source,
1196  reg);
1197  return new Xapian::Internal::QueryPostingSource(source->release());
1198  }
1199  case 0x0d: {
1201  double scale_factor = unserialise_double(p, end);
1202  return new QueryScaleWeight(scale_factor,
1203  Query(unserialise(p, end, reg)));
1204  }
1205  case 0x0e: {
1206  Xapian::termcount wqf;
1208  if (!unpack_uint(p, end, &wqf) ||
1209  !unpack_uint(p, end, &pos)) {
1210  throw SerialisationError("not enough data");
1211  }
1212  return new Xapian::Internal::QueryTerm({}, wqf, pos);
1213  }
1214  case 0x0f:
1215  return new Xapian::Internal::QueryTerm();
1216  default: // Others currently unused.
1217  break;
1218  }
1219  break;
1220  }
1221  }
1222  string msg = "Unknown Query serialisation: ";
1223  msg += str(ch);
1224  throw SerialisationError(msg);
1225 }
1226 
1227 bool
1228 Query::Internal::postlist_sub_and_like(AndContext& ctx,
1229  QueryOptimiser * qopt,
1230  double factor,
1231  TermFreqs* termfreqs) const
1232 {
1233  return ctx.add_postlist(postlist(qopt, factor, termfreqs), termfreqs);
1234 }
1235 
1236 void
1237 Query::Internal::postlist_sub_or_like(OrContext& ctx,
1238  QueryOptimiser* qopt,
1239  double factor,
1240  TermFreqs* termfreqs,
1241  bool keep_zero_weight) const
1242 {
1243  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1244  auto [pl_, est] = postlist(qopt, factor, termfreqs);
1245  unique_ptr<PostList> pl{pl_};
1246  if (!keep_zero_weight && pl && pl->recalc_maxweight() == 0.0) {
1247  // This subquery can't contribute any weight, so can be discarded.
1248  //
1249  // Restore the value of total_subqs so that percentages don't get
1250  // messed up if we increased total_subqs in the call to postlist()
1251  // above.
1252  qopt->set_total_subqs(save_total_subqs);
1253  qopt->destroy_postlist(pl.release());
1254  return;
1255  }
1256  ctx.add_postlist(pl.release(), est.release(), termfreqs);
1257 }
1258 
1259 void
1260 Query::Internal::postlist_sub_bool_or_like(OrContext& ctx,
1261  QueryOptimiser* qopt,
1262  TermFreqs* termfreqs) const
1263 {
1264  ctx.add_postlist(postlist(qopt, 0.0, termfreqs), termfreqs);
1265 }
1266 
1267 void
1268 Query::Internal::postlist_sub_xor(XorContext& ctx,
1269  QueryOptimiser* qopt,
1270  double factor,
1271  TermFreqs* termfreqs) const
1272 {
1273  ctx.add_postlist(postlist(qopt, factor, termfreqs), termfreqs);
1274 }
1275 
1276 namespace Internal {
1277 
1278 Query::op
1279 QueryTerm::get_type() const noexcept
1280 {
1281  return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
1282 }
1283 
1284 string
1285 QueryTerm::get_description() const
1286 {
1287  string desc;
1288  if (term.empty()) {
1289  desc = "<alldocuments>";
1290  } else {
1291  description_append(desc, term);
1292  }
1293  if (wqf != 1) {
1294  desc += '#';
1295  desc += str(wqf);
1296  }
1297  if (pos) {
1298  desc += '@';
1299  desc += str(pos);
1300  }
1301  return desc;
1302 }
1303 
1304 QueryPostingSource::QueryPostingSource(PostingSource * source_)
1305  : source(source_)
1306 {
1307  if (!source_)
1308  throw Xapian::InvalidArgumentError("source parameter can't be NULL");
1309  if (source->_refs == 0) {
1310  // source_ isn't reference counted, so try to clone it. If clone()
1311  // isn't implemented, just use the object provided and it's the
1312  // caller's responsibility to ensure it stays valid while in use.
1313  PostingSource * cloned_source = source->clone();
1314  if (cloned_source) source = cloned_source->release();
1315  }
1316 }
1317 
1318 Query::op
1320 {
1322 }
1323 
1324 string
1326 {
1327  string desc = "PostingSource(";
1328  desc += source->get_description();
1329  desc += ')';
1330  return desc;
1331 }
1332 
1333 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
1334  : scale_factor(factor), subquery(subquery_)
1335 {
1336  if (rare(scale_factor < 0.0))
1337  throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
1338 }
1339 
1340 Query::op
1342 {
1343  return Query::OP_SCALE_WEIGHT;
1344 }
1345 
1346 size_t
1348 {
1349  return 1;
1350 }
1351 
1352 const Query
1354 {
1355  return subquery;
1356 }
1357 
1358 string
1360 {
1362  string desc = str(scale_factor);
1363  desc += " * ";
1364  desc += subquery.internal->get_description();
1365  return desc;
1366 }
1367 
1370  TermFreqs* termfreqs) const
1371 {
1372  LOGCALL(QUERY, PostListAndEstimate, "QueryTerm::postlist", qopt | factor | termfreqs);
1373  if (factor != 0.0)
1374  qopt->inc_total_subqs();
1375  RETURN(qopt->open_post_list(term, wqf, factor, termfreqs));
1376 }
1377 
1378 bool
1380  QueryOptimiser* qopt,
1381  double factor,
1382  TermFreqs* termfreqs) const
1383 {
1384  if (term.empty() && !qopt->need_positions && factor == 0.0 && !termfreqs) {
1385  // No-op MatchAll.
1386  ctx.set_match_all();
1387  return true;
1388  }
1389  return ctx.add_postlist(postlist(qopt, factor, termfreqs), termfreqs);
1390 }
1391 
1394  TermFreqs* termfreqs) const
1395 {
1396  LOGCALL(QUERY, PostListAndEstimate, "QueryPostingSource::postlist", qopt | factor | termfreqs);
1397  Assert(source);
1398  if (factor != 0.0)
1399  qopt->inc_total_subqs();
1400  unique_ptr<EstimateOp> est;
1401  if (!qopt->get_no_estimates()) {
1402  est.reset(new EstimateOp());
1403  }
1404  // Casting away const on the Database::Internal here is OK, as we wrap
1405  // them in a const Xapian::Database so non-const methods can't actually
1406  // be called on the Database::Internal object.
1407  const Xapian::Database wrappeddb(
1408  const_cast<Xapian::Database::Internal*>(&(qopt->db)));
1409  auto pl =
1410  new ExternalPostList(wrappeddb, source.get(), est.get(), factor,
1412  qopt->shard_index);
1413  if (termfreqs) {
1414  auto& stats = *qopt->get_stats();
1415  auto db_size = qopt->db_size;
1416  // Scale proportionately from this shard to the whole collection. Not
1417  // as good as summing over all shards, but that's harder to do.
1418  double termfreq = pl->get_termfreq();
1419  auto tf = termfreq * stats.collection_size / db_size;
1420  auto rtf = termfreq * stats.rset_size / db_size;
1421  auto cf = termfreq * stats.total_length / db_size;
1422  *termfreqs = TermFreqs(static_cast<Xapian::doccount>(tf + 0.5),
1423  static_cast<Xapian::doccount>(rtf + 0.5),
1424  static_cast<Xapian::termcount>(cf + 0.5));
1425  }
1426  RETURN({pl, std::move(est)});
1427 }
1428 
1431  TermFreqs* termfreqs) const
1432 {
1433  LOGCALL(QUERY, PostListAndEstimate, "QueryScaleWeight::postlist", qopt | factor | termfreqs);
1434  RETURN(subquery.internal->postlist(qopt, factor * scale_factor, termfreqs));
1435 }
1436 
1437 bool
1439  QueryOptimiser* qopt,
1440  double factor,
1441  TermFreqs* termfreqs) const
1442 {
1443  return subquery.internal->postlist_sub_and_like(ctx, qopt,
1444  factor * scale_factor,
1445  termfreqs);
1446 }
1447 
1448 void
1449 QueryTerm::gather_terms(void * void_terms) const
1450 {
1451  // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
1452  if (!term.empty()) {
1453  vector<pair<Xapian::termpos, string>> &terms =
1454  *static_cast<vector<pair<Xapian::termpos, string>>*>(void_terms);
1455  terms.push_back(make_pair(pos, term));
1456  }
1457 }
1458 
1459 static double
1460 string_frac(const string& s, size_t prefix)
1461 {
1462  double r = 0;
1463  double f = 1.0;
1464  for (size_t i = prefix; i != s.size(); ++i) {
1465  f /= 256.0;
1466  r += static_cast<unsigned char>(s[i]) * f;
1467  }
1468 
1469  return r;
1470 }
1471 
1472 static Xapian::doccount
1473 estimate_range_freq(const string& lo, const string& hi,
1474  const string& begin, const string* end,
1475  Xapian::doccount value_freq)
1476 {
1477  // Assume the values are evenly spread out between lo and hi.
1478  // FIXME: Perhaps we should store some sort of binned distribution?
1479  AssertRel(lo, <=, hi);
1480 
1481  size_t common_prefix_len = size_t(-1);
1482  do {
1483  UNSIGNED_OVERFLOW_OK(++common_prefix_len);
1484  // lo <= hi so while we're in the common prefix hi can't run out
1485  // before lo.
1486  if (common_prefix_len == lo.size()) {
1487  if (common_prefix_len != hi.size())
1488  break;
1489  // All values in the slot are the same. We should have optimised
1490  // to NULL if that singular value is outside the range, and if it's
1491  // inside the range then we know that the frequency is exactly the
1492  // value frequency.
1493  Assert(begin <= lo && (!end || hi <= *end));
1494  return value_freq;
1495  }
1496  AssertRel(common_prefix_len, !=, hi.size());
1497  } while (lo[common_prefix_len] == hi[common_prefix_len]);
1498 
1499  double l = string_frac(lo, common_prefix_len);
1500  double h = string_frac(hi, common_prefix_len);
1501  double denom = h - l;
1502  if (rare(denom == 0.0)) {
1503  // Weird corner case - hi != lo (because that's handled inside the loop
1504  // above) but they give the same string_frac value. Because we only
1505  // calculate the fraction starting from the first difference, this
1506  // should only happen if hi is lo + one or more trailing zero bytes.
1507 
1508  // The case where all set values lie within the range should be handled
1509  // at a higher level and we shouldn't get called.
1510  Assert(!(begin <= lo && (!end || hi <= *end)));
1511 
1512  // There must be partial overlap as the cases where the range
1513  // dominates the bounds and where the range is entirely outside the
1514  // bounds are both handled at a higher level.
1515  return value_freq / 2;
1516  }
1517 
1518  double b = l;
1519  if (begin > lo) {
1520  b = string_frac(begin, common_prefix_len);
1521  }
1522  double e = h;
1523  if (end && *end < hi) {
1524  // end is NULL for a ValueGePostList
1525  e = string_frac(*end, common_prefix_len);
1526  }
1527 
1528  double est = (e - b) / denom * value_freq;
1529  return Xapian::doccount(est + 0.5);
1530 }
1531 
1534  TermFreqs* termfreqs) const
1535 {
1536  LOGCALL(QUERY, PostListAndEstimate, "QueryValueRange::postlist", qopt | factor | termfreqs);
1537  if (factor != 0.0)
1538  qopt->inc_total_subqs();
1539  const Xapian::Database::Internal & db = qopt->db;
1540  const auto db_size = qopt->db_size;
1541  const string & lb = db.get_value_lower_bound(slot);
1542  if (lb.empty()) {
1543  // This should only happen if there are no values in this slot (which
1544  // could be because the backend just doesn't support values at all).
1545  // If there were values in the slot, the backend should have a
1546  // non-empty lower bound, even if it isn't a tight one.
1547  AssertEq(db.get_value_freq(slot), 0);
1548  if (termfreqs) *termfreqs = TermFreqs();
1549  RETURN({});
1550  }
1551  if (end < lb) {
1552  if (termfreqs) *termfreqs = TermFreqs();
1553  RETURN({});
1554  }
1555  const string & ub = db.get_value_upper_bound(slot);
1556  if (begin > ub) {
1557  if (termfreqs) *termfreqs = TermFreqs();
1558  RETURN({});
1559  }
1560 
1561  if (termfreqs) {
1562  // We scale these below.
1563  auto& stats = *qopt->get_stats();
1564  *termfreqs = TermFreqs(stats.collection_size,
1565  stats.rset_size,
1566  stats.total_length);
1567  }
1568 
1569  auto value_freq = db.get_value_freq(slot);
1570  if (end >= ub) {
1571  if (begin <= lb) {
1572  // The known bounds for the slot both fall within the range so we
1573  // know the range matches whenever the value is set, which is
1574  // exactly value_freq times.
1575  unique_ptr<EstimateOp> est;
1576  if (!qopt->get_no_estimates())
1577  est.reset(new EstimateOp(value_freq));
1578  if (value_freq == db_size) {
1579  // This value is set for all documents in the current shard, so
1580  // we can replace it with a MatchAll postlist, which is
1581  // especially efficient if there are no gaps in the docids.
1582  RETURN({db.open_post_list({}), std::move(est)});
1583  }
1584  // We need to check which documents have a value set in this slot
1585  // but don't need to worry about the range bounds so we can use
1586  // ValueGePostList with an empty string as the lower bound which
1587  // means the range test just becomes a cheap `>= string()` test.
1588  if (termfreqs) *termfreqs *= double(value_freq) / db_size;
1589  auto pl = new ValueGePostList(&db, est.get(),
1590  value_freq, slot, string());
1591  RETURN({pl, std::move(est)});
1592  }
1593  auto tf_est = estimate_range_freq(lb, ub, begin, NULL, value_freq);
1594  unique_ptr<EstimateOp> est;
1595  if (!qopt->get_no_estimates())
1596  est.reset(new EstimateOp(Estimates{0, tf_est, value_freq}));
1597  if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1598  auto pl = new ValueGePostList(&db, est.get(), tf_est, slot, begin);
1599  RETURN({pl, std::move(est)});
1600  }
1601  auto tf_est = estimate_range_freq(lb, ub, begin, &end, value_freq);
1602  unique_ptr<EstimateOp> est;
1603  if (!qopt->get_no_estimates())
1604  est.reset(new EstimateOp(Estimates{0, tf_est, value_freq}));
1605  if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1606  auto pl = new ValueRangePostList(&db, est.get(), tf_est, slot, begin, end);
1607  RETURN({pl, std::move(est)});
1608 }
1609 
1610 void
1611 QueryValueRange::serialise(string & result) const
1612 {
1613  if (slot < 15) {
1614  result += static_cast<char>(0x20 | slot);
1615  } else {
1616  result += static_cast<char>(0x20 | 15);
1617  pack_uint(result, slot - 15);
1618  }
1619  pack_string(result, begin);
1620  pack_string(result, end);
1621 }
1622 
1623 Query::op
1625 {
1626  return Query::OP_VALUE_RANGE;
1627 }
1628 
1629 string
1631 {
1632  string desc = "VALUE_RANGE ";
1633  desc += str(slot);
1634  desc += ' ';
1635  description_append(desc, begin);
1636  desc += ' ';
1637  description_append(desc, end);
1638  return desc;
1639 }
1640 
1643  TermFreqs* termfreqs) const
1644 {
1645  LOGCALL(QUERY, PostListAndEstimate, "QueryValueLE::postlist", qopt | factor | termfreqs);
1646  if (factor != 0.0)
1647  qopt->inc_total_subqs();
1648  const Xapian::Database::Internal & db = qopt->db;
1649  const auto db_size = qopt->db_size;
1650  const string & lb = db.get_value_lower_bound(slot);
1651  if (lb.empty()) {
1652  // This should only happen if there are no values in this slot (which
1653  // could be because the backend just doesn't support values at all).
1654  // If there were values in the slot, the backend should have a
1655  // non-empty lower bound, even if it isn't a tight one.
1656  AssertEq(db.get_value_freq(slot), 0);
1657  if (termfreqs) *termfreqs = TermFreqs();
1658  RETURN({});
1659  }
1660  if (limit < lb) {
1661  if (termfreqs) *termfreqs = TermFreqs();
1662  RETURN({});
1663  }
1664 
1665  if (termfreqs) {
1666  // We scale these below.
1667  auto& stats = *qopt->get_stats();
1668  *termfreqs = TermFreqs(stats.collection_size,
1669  stats.rset_size,
1670  stats.total_length);
1671  }
1672 
1673  auto value_freq = db.get_value_freq(slot);
1674  const string& ub = db.get_value_upper_bound(slot);
1675  if (limit >= ub) {
1676  // The known bounds for the slot both fall within the range so we
1677  // know the range matches whenever the value is set, which is
1678  // exactly value_freq times.
1679  unique_ptr<EstimateOp> est;
1680  if (!qopt->get_no_estimates())
1681  est.reset(new EstimateOp(value_freq));
1682  if (value_freq == db_size) {
1683  // This value is set for all documents in the current shard, so
1684  // we can replace it with a MatchAll postlist, which is
1685  // especially efficient if there are no gaps in the docids.
1686  RETURN({db.open_post_list({}), std::move(est)});
1687  }
1688  // We need to check which documents have a value set in this slot
1689  // but don't need to worry about the range bounds so we can use
1690  // ValueGePostList with an empty string as the lower bound which
1691  // means the range test just becomes a cheap `>= string()` test.
1692  if (termfreqs) *termfreqs *= double(value_freq) / db_size;
1693  auto pl = new ValueGePostList(&db, est.get(),
1694  value_freq, slot, string());
1695  RETURN({pl, std::move(est)});
1696  }
1697  auto tf_est = estimate_range_freq(lb, ub, string(), &limit, value_freq);
1698  unique_ptr<EstimateOp> est;
1699  if (!qopt->get_no_estimates())
1700  est.reset(new EstimateOp(Estimates{0, tf_est, value_freq}));
1701  if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1702  auto pl = new ValueRangePostList(&db, est.get(),
1703  tf_est, slot, string(), limit);
1704  RETURN({pl, std::move(est)});
1705 }
1706 
1707 void
1708 QueryValueLE::serialise(string & result) const
1709 {
1710  // Encode as a range with an empty start (which only takes a single byte to
1711  // encode).
1712  if (slot < 15) {
1713  result += static_cast<char>(0x20 | slot);
1714  } else {
1715  result += static_cast<char>(0x20 | 15);
1716  pack_uint(result, slot - 15);
1717  }
1718  pack_string_empty(result);
1719  pack_string(result, limit);
1720 }
1721 
1722 Query::op
1723 QueryValueLE::get_type() const noexcept
1724 {
1725  return Query::OP_VALUE_LE;
1726 }
1727 
1728 string
1730 {
1731  string desc = "VALUE_LE ";
1732  desc += str(slot);
1733  desc += ' ';
1734  description_append(desc, limit);
1735  return desc;
1736 }
1737 
1740  TermFreqs* termfreqs) const
1741 {
1742  LOGCALL(QUERY, PostListAndEstimate, "QueryValueGE::postlist", qopt | factor | termfreqs);
1743  if (factor != 0.0)
1744  qopt->inc_total_subqs();
1745  const Xapian::Database::Internal & db = qopt->db;
1746  const auto db_size = qopt->db_size;
1747  const string & lb = db.get_value_lower_bound(slot);
1748  if (lb.empty()) {
1749  // This should only happen if there are no values in this slot (which
1750  // could be because the backend just doesn't support values at all).
1751  // If there were values in the slot, the backend should have a
1752  // non-empty lower bound, even if it isn't a tight one.
1753  AssertEq(db.get_value_freq(slot), 0);
1754  if (termfreqs) *termfreqs = TermFreqs();
1755  RETURN({});
1756  }
1757  const string& ub = db.get_value_upper_bound(slot);
1758  if (limit > ub) {
1759  if (termfreqs) *termfreqs = TermFreqs();
1760  RETURN({});
1761  }
1762 
1763  if (termfreqs) {
1764  // We scale these below.
1765  auto& stats = *qopt->get_stats();
1766  *termfreqs = TermFreqs(stats.collection_size,
1767  stats.rset_size,
1768  stats.total_length);
1769  }
1770 
1771  auto value_freq = db.get_value_freq(slot);
1772  if (limit <= lb) {
1773  // The known bounds for the slot both fall within the range so we
1774  // know the range matches whenever the value is set, which is
1775  // exactly value_freq times.
1776  unique_ptr<EstimateOp> est;
1777  if (!qopt->get_no_estimates())
1778  est.reset(new EstimateOp(value_freq));
1779  if (value_freq == db_size) {
1780  // This value is set for all documents in the current shard, so
1781  // we can replace it with a MatchAll postlist, which is
1782  // especially efficient if there are no gaps in the docids.
1783  RETURN({db.open_post_list({}), std::move(est)});
1784  }
1785  // We need to check which documents have a value set in this slot
1786  // but don't need to worry about the range bounds so we can use
1787  // ValueGePostList with an empty string as the lower bound which
1788  // means the range test just becomes a cheap `>= string()` test.
1789  auto pl = new ValueGePostList(&db, est.get(),
1790  value_freq, slot, string());
1791  RETURN({pl, std::move(est)});
1792  }
1793  auto tf_est = estimate_range_freq(lb, ub, limit, NULL, value_freq);
1794  unique_ptr<EstimateOp> est;
1795  if (!qopt->get_no_estimates())
1796  est.reset(new EstimateOp(Estimates{0, tf_est, value_freq}));
1797  if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1798  auto pl = new ValueGePostList(&db, est.get(), tf_est, slot, limit);
1799  RETURN({pl, std::move(est)});
1800 }
1801 
1802 void
1803 QueryValueGE::serialise(string & result) const
1804 {
1805  if (slot < 15) {
1806  result += static_cast<char>(0x20 | 0x10 | slot);
1807  } else {
1808  result += static_cast<char>(0x20 | 0x10 | 15);
1809  pack_uint(result, slot - 15);
1810  }
1811  pack_string(result, limit);
1812 }
1813 
1814 Query::op
1815 QueryValueGE::get_type() const noexcept
1816 {
1817  return Query::OP_VALUE_GE;
1818 }
1819 
1820 string
1822 {
1823  string desc = "VALUE_GE ";
1824  desc += str(slot);
1825  desc += ' ';
1826  description_append(desc, limit);
1827  return desc;
1828 }
1829 
1830 QueryWildcard::QueryWildcard(std::string_view pattern_,
1831  Xapian::termcount max_expansion_,
1832  int flags_,
1833  Query::op combiner_)
1834  : pattern(pattern_),
1835  max_expansion(max_expansion_),
1836  flags(flags_),
1837  combiner(combiner_)
1838 {
1839  if ((flags & ~Query::WILDCARD_LIMIT_MASK_) == 0) {
1840  head = min_len = pattern.size();
1841  max_len = numeric_limits<decltype(max_len)>::max();
1842  prefix = pattern;
1843  return;
1844  }
1845 
1846  size_t i = 0;
1847  while (i != pattern.size()) {
1848  // Check for characters with special meaning.
1849  switch (pattern[i]) {
1850  case '*':
1852  goto found_special;
1853  break;
1854  case '?':
1856  goto found_special;
1857  break;
1858  }
1859  prefix += pattern[i];
1860  ++i;
1861  head = i;
1862  }
1863 found_special:
1864 
1865  min_len = max_len = prefix.size();
1866 
1867  tail = i;
1868  size_t qm_count = 0;
1869  bool had_star = false;
1870  while (i != pattern.size()) {
1871  switch (pattern[i]) {
1872  default:
1873 default_case:
1874  suffix += pattern[i];
1875  ++min_len;
1876  ++max_len;
1877  break;
1878 
1879  case '*':
1881  goto default_case;
1882  // Matches zero or more characters.
1883  had_star = true;
1884  tail = i + 1;
1885  if (!suffix.empty()) {
1886  min_check_len = 0;
1887  suffix.clear();
1888  }
1889  break;
1890 
1891  case '?':
1893  goto default_case;
1894  // Matches exactly one character.
1895  tail = i + 1;
1896  if (!suffix.empty()) {
1897  min_check_len = 0;
1898  suffix.clear();
1899  }
1900  ++qm_count;
1901  ++min_len;
1903  break;
1904  }
1905 
1906  ++i;
1907  }
1908 
1909  if (had_star) {
1910  max_len = numeric_limits<decltype(max_len)>::max();
1911  } else if (qm_count > 1) {
1912  // `?` matches one Unicode character, which is 1-4 bytes in UTF-8, so
1913  // we have to actually check the pattern if there's more than one `?`
1914  // in it.
1915  min_check_len = 0;
1916  } else if (qm_count == 1) {
1917  // If the pattern contains exactly one `?` wildcard we need to check it
1918  // unless the candidate is exactly min_len bytes long. Note that we
1919  // know it can't match if it's < min_len long.
1920  min_check_len = min_len + 1;
1921  }
1922 }
1923 
1924 bool
1925 QueryWildcard::test_wildcard_(const string& candidate, size_t o, size_t p,
1926  size_t i) const
1927 {
1928  // FIXME: Optimisation potential here. We could compile the pattern to a
1929  // regex, or other tricks like calculating the min length needed after each
1930  // position that we test with this method - e.g. for foo*bar*x*baz there
1931  // must be at least 7 bytes after a position or there's no point testing if
1932  // "bar" matches there.
1933  for ( ; i != tail; ++i) {
1934  if ((flags & Query::WILDCARD_PATTERN_MULTI) && pattern[i] == '*') {
1935  if (++i == tail) {
1936  // '*' at end of variable part is easy!
1937  return true;
1938  }
1939  for (size_t test_o = o; test_o <= p; ++test_o) {
1940  if (test_wildcard_(candidate, test_o, p, i))
1941  return true;
1942  }
1943  return false;
1944  }
1945  if (o == p) return false;
1946  if ((flags & Query::WILDCARD_PATTERN_SINGLE) && pattern[i] == '?') {
1947  unsigned char b = candidate[o];
1948  if (b < 0xc0) {
1949  ++o;
1950  continue;
1951  }
1952  unsigned seqlen;
1953  if (b < 0xe0) {
1954  seqlen = 2;
1955  } else if (b < 0xf0) {
1956  seqlen = 3;
1957  } else {
1958  seqlen = 4;
1959  }
1960  if (rare(p - o < seqlen)) return false;
1961  o += seqlen;
1962  continue;
1963  }
1964 
1965  if (pattern[i] != candidate[o]) return false;
1966  ++o;
1967  }
1968  return (o == p);
1969 }
1970 
1971 bool
1972 QueryWildcard::test_prefix_known(const string& candidate) const
1973 {
1974  if (candidate.size() < min_len) return false;
1975  if (candidate.size() > max_len) return false;
1976  if (!endswith(candidate, suffix)) return false;
1977 
1978  if (candidate.size() < min_check_len) return true;
1979 
1980  return test_wildcard_(candidate, prefix.size(),
1981  candidate.size() - suffix.size(),
1982  head);
1983 }
1984 
1987  TermFreqs* termfreqs) const
1988 {
1989  LOGCALL(QUERY, PostListAndEstimate, "QueryWildcard::postlist", qopt | factor | termfreqs);
1990  OrContext ctx(qopt, 0);
1991  Query::op op = combiner;
1992  if (factor == 0.0 || op == Query::OP_SYNONYM) {
1993  if (factor == 0.0) {
1994  // If we have a factor of 0, we don't care about the weights, so
1995  // we're just like a normal OR query.
1996  op = Query::OP_OR;
1997  }
1998 
1999  bool old_compound_weight = qopt->compound_weight;
2000  if (!old_compound_weight) {
2001  qopt->compound_weight = (op == Query::OP_SYNONYM);
2002  }
2003 
2004  TermFreqs synonym_freqs;
2005  if (op == Query::OP_SYNONYM) {
2006  qopt->inc_total_subqs();
2007  ctx.expand_wildcard(this, 0.0, &synonym_freqs);
2008  } else {
2009  ctx.expand_wildcard(this, 0.0, termfreqs);
2010  }
2011 
2012  qopt->compound_weight = old_compound_weight;
2013 
2014  if (ctx.empty())
2015  RETURN({});
2016 
2017  if (op != Query::OP_SYNONYM)
2018  RETURN(ctx.postlist(termfreqs, true));
2019 
2020  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
2021  // SynonymPostList, which supplies the weights.
2022  RETURN(qopt->make_synonym_postlist(ctx.postlist(&synonym_freqs, true),
2023  factor, synonym_freqs));
2024  }
2025 
2026  ctx.expand_wildcard(this, factor, termfreqs);
2027 
2028  qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
2029 
2030  if (ctx.empty())
2031  RETURN({});
2032 
2033  if (op == Query::OP_MAX)
2034  RETURN(ctx.postlist_max());
2035 
2036  RETURN(ctx.postlist(termfreqs));
2037 }
2038 
2039 termcount
2041 {
2042  // We currently assume wqf is 1 for calculating the synonym's weight
2043  // since conceptually the synonym is one "virtual" term. If we were
2044  // to combine multiple occurrences of the same synonym expansion into
2045  // a single instance with wqf set, we would want to track the wqf.
2046  return 1;
2047 }
2048 
2049 void
2050 QueryWildcard::serialise(string & result) const
2051 {
2052  result += static_cast<char>(0x0b);
2053  pack_uint(result, max_expansion);
2054  result += static_cast<unsigned char>(flags);
2055  result += static_cast<unsigned char>(combiner);
2056  pack_string(result, pattern);
2057 }
2058 
2059 Query::op
2060 QueryWildcard::get_type() const noexcept
2061 {
2062  return Query::OP_WILDCARD;
2063 }
2064 
2065 string
2067 {
2068  string desc = "WILDCARD ";
2069  switch (combiner) {
2070  case Query::OP_SYNONYM:
2071  desc += "SYNONYM ";
2072  break;
2073  case Query::OP_MAX:
2074  desc += "MAX ";
2075  break;
2076  case Query::OP_OR:
2077  desc += "OR ";
2078  break;
2079  default:
2080  desc += "BAD ";
2081  break;
2082  }
2083  description_append(desc, pattern);
2084  return desc;
2085 }
2086 
2087 int
2088 QueryEditDistance::test(const string& candidate) const
2089 {
2090  int threshold = get_threshold();
2091  int edist = edcalc(candidate, threshold);
2092  return edist <= threshold ? edist + 1 : 0;
2093 }
2094 
2097  TermFreqs* termfreqs) const
2098 {
2099  LOGCALL(QUERY, PostListAndEstimate, "QueryEditDistance::postlist", qopt | factor | termfreqs);
2100  OrContext ctx(qopt, 0);
2101  Query::op op = combiner;
2102  if (factor == 0.0 || op == Query::OP_SYNONYM) {
2103  if (factor == 0.0) {
2104  // If we have a factor of 0, we don't care about the weights, so
2105  // we're just like a normal OR query.
2106  op = Query::OP_OR;
2107  }
2108 
2109  bool old_compound_weight = qopt->compound_weight;
2110  if (!old_compound_weight) {
2111  qopt->compound_weight = (op == Query::OP_SYNONYM);
2112  }
2113 
2114  TermFreqs synonym_freqs;
2115  if (op == Query::OP_SYNONYM) {
2116  qopt->inc_total_subqs();
2117  ctx.expand_edit_distance(this, 0.0, &synonym_freqs);
2118  } else {
2119  ctx.expand_edit_distance(this, 0.0, termfreqs);
2120  }
2121 
2122  qopt->compound_weight = old_compound_weight;
2123 
2124  if (ctx.empty())
2125  RETURN({});
2126 
2127  if (op != Query::OP_SYNONYM)
2128  RETURN(ctx.postlist(termfreqs, true));
2129 
2130  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
2131  // SynonymPostList, which supplies the weights.
2132  RETURN(qopt->make_synonym_postlist(ctx.postlist(&synonym_freqs, true),
2133  factor, synonym_freqs));
2134  }
2135 
2136  ctx.expand_edit_distance(this, factor, termfreqs);
2137 
2138  qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
2139 
2140  if (ctx.empty())
2141  RETURN({});
2142 
2143  if (op == Query::OP_MAX)
2144  RETURN(ctx.postlist_max());
2145 
2146  RETURN(ctx.postlist(termfreqs));
2147 }
2148 
2149 termcount
2151 {
2152  // We currently assume wqf is 1 for calculating the synonym's weight
2153  // since conceptually the synonym is one "virtual" term. If we were
2154  // to combine multiple occurrences of the same synonym expansion into
2155  // a single instance with wqf set, we would want to track the wqf.
2156  return 1;
2157 }
2158 
2159 void
2160 QueryEditDistance::serialise(string & result) const
2161 {
2162  result += static_cast<char>(0x0a);
2163  pack_uint(result, max_expansion);
2164  result += static_cast<unsigned char>(flags);
2165  result += static_cast<unsigned char>(combiner);
2166  pack_uint(result, edit_distance);
2167  pack_uint(result, fixed_prefix_len);
2168  pack_string(result, pattern);
2169 }
2170 
2171 Query::op
2173 {
2174  return Query::OP_EDIT_DISTANCE;
2175 }
2176 
2177 string
2179 {
2180  string desc = "EDIT_DISTANCE ";
2181  switch (combiner) {
2182  case Query::OP_SYNONYM:
2183  desc += "SYNONYM ";
2184  break;
2185  case Query::OP_MAX:
2186  desc += "MAX ";
2187  break;
2188  case Query::OP_OR:
2189  desc += "OR ";
2190  break;
2191  default:
2192  desc += "BAD ";
2193  break;
2194  }
2195  description_append(desc, pattern);
2196  desc += '~';
2197  desc += str(edit_distance);
2198  if (fixed_prefix_len) {
2199  desc += " fixed_prefix_len=";
2200  desc += str(fixed_prefix_len);
2201  }
2202  return desc;
2203 }
2204 
2206 QueryBranch::get_length() const noexcept
2207 {
2208  // Sum results from all subqueries.
2209  Xapian::termcount result = 0;
2211  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2212  // MatchNothing subqueries should have been removed by done(), but we
2213  // can't use Assert in a noexcept function. But we'll get a
2214  // segfault anyway.
2215  result += (*i).internal->get_length();
2216  }
2217  return result;
2218 }
2219 
2220 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
2221 #define MISC(X) static_cast<unsigned char>(X)
2222 void
2223 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
2224 {
2225  static const unsigned char first_byte[] = {
2226  MULTIWAY(0), // OP_AND
2227  MULTIWAY(1), // OP_OR
2228  MULTIWAY(2), // OP_AND_NOT
2229  MULTIWAY(3), // OP_XOR
2230  MULTIWAY(4), // OP_AND_MAYBE
2231  MULTIWAY(5), // OP_FILTER
2232  MULTIWAY(14), // OP_NEAR
2233  MULTIWAY(15), // OP_PHRASE
2234  0, // OP_VALUE_RANGE
2235  MISC(3), // OP_SCALE_WEIGHT
2236  MULTIWAY(13), // OP_ELITE_SET
2237  0, // OP_VALUE_GE
2238  0, // OP_VALUE_LE
2239  MULTIWAY(6), // OP_SYNONYM
2240  MULTIWAY(7) // OP_MAX
2241  };
2242  Xapian::Query::op op_ = get_op();
2243  AssertRel(size_t(op_),<,sizeof(first_byte));
2244  unsigned char ch = first_byte[op_];
2245  if (ch & 0x80) {
2246  // Multi-way operator.
2247  if (subqueries.size() < 8)
2248  ch |= subqueries.size();
2249  result += ch;
2250  if (subqueries.size() >= 8)
2251  pack_uint(result, subqueries.size() - 8);
2252  if (ch >= MULTIWAY(13))
2253  pack_uint(result, parameter);
2254  } else {
2255  result += ch;
2256  }
2257 
2259  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2260  // MatchNothing subqueries should have been removed by done().
2261  Assert((*i).internal);
2262  (*i).internal->serialise(result);
2263  }
2264 
2265  // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
2266  // appended next by an overloaded serialise() method in the subclass.
2267 }
2268 
2269 void
2270 QueryBranch::serialise(string & result) const
2271 {
2272  QueryBranch::serialise_(result);
2273 }
2274 
2275 void
2276 QueryNear::serialise(string & result) const
2277 {
2278  // FIXME: window - subqueries.size() ?
2280 }
2281 
2282 void
2283 QueryPhrase::serialise(string & result) const
2284 {
2285  // FIXME: window - subqueries.size() ?
2287 }
2288 
2289 void
2290 QueryEliteSet::serialise(string & result) const
2291 {
2292  // FIXME: set_size - subqueries.size() ?
2294 }
2295 
2296 void
2297 QueryBranch::gather_terms(void * void_terms) const
2298 {
2299  // Gather results from all subqueries.
2301  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2302  // MatchNothing subqueries should have been removed by done().
2303  Assert((*i).internal);
2304  (*i).internal->gather_terms(void_terms);
2305  }
2306 }
2307 
2308 void
2310  QueryOptimiser* qopt,
2311  TermFreqs* termfreqs,
2312  size_t first) const
2313 {
2314  LOGCALL_VOID(MATCH, "QueryBranch::do_bool_or_like", ctx | qopt | termfreqs | first);
2315 
2316  // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
2317  // for AND-like operations.
2318 
2319  // OP_SYNONYM with a single subquery is only simplified by
2320  // QuerySynonym::done() if the single subquery is a term or MatchAll.
2321  Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
2322 
2324  for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
2325  // MatchNothing subqueries should have been removed by done().
2326  Assert((*q).internal);
2327  (*q).internal->postlist_sub_bool_or_like(ctx, qopt, termfreqs);
2328  }
2329 }
2330 
2331 void
2333  TermFreqs* termfreqs,
2334  Xapian::termcount elite_set_size, size_t first,
2335  bool keep_zero_weight) const
2336 {
2337  LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | termfreqs | elite_set_size | first | keep_zero_weight);
2338 
2339  // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
2340  // for AND-like operations.
2341 
2342  // OP_SYNONYM with a single subquery is only simplified by
2343  // QuerySynonym::done() if the single subquery is a term or MatchAll.
2344  Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
2345 
2346  size_t size_before = ctx.size();
2348  for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
2349  // MatchNothing subqueries should have been removed by done().
2350  Assert((*q).internal);
2351  (*q).internal->postlist_sub_or_like(ctx, qopt, factor,
2352  termfreqs,
2353  keep_zero_weight);
2354  }
2355 
2356  size_t out_of = ctx.size() - size_before;
2357  if (elite_set_size && elite_set_size < out_of) {
2358  ctx.select_elite_set(elite_set_size, out_of);
2359  // FIXME: This isn't quite right as we flatten ORs under the ELITE_SET
2360  // and then pick from amongst all the subqueries. Consider:
2361  //
2362  // Query subqs[] = {q1 | q2, q3 | q4};
2363  // Query q(OP_ELITE_SET, begin(subqs), end(subqs), 1);
2364  //
2365  // Here q should be either q1 | q2 or q3 | q4, but actually it'll be
2366  // just one of q1 or q2 or q3 or q4 (assuming those aren't themselves
2367  // OP_OR or OP_OR-like queries).
2368  }
2369 }
2370 
2373  double factor,
2374  TermFreqs* termfreqs) const
2375 {
2376  LOGCALL(MATCH, PostListAndEstimate, "QueryBranch::do_synonym", qopt | factor | termfreqs);
2377  OrContext ctx(qopt, subqueries.size());
2378  if (factor == 0.0) {
2379  // If we have a factor of 0, we don't care about the weights, so
2380  // we're just like a normal OR query. An OP_SYNONYM sets factor=0
2381  // for its subqueries so this handles an OP_SYNONYM below an
2382  // OP_SYNONYM.
2383  do_bool_or_like(ctx, qopt, termfreqs);
2384  return ctx.postlist(termfreqs, true);
2385  }
2386 
2387  bool old_compound_weight = qopt->compound_weight;
2388  Assert(!old_compound_weight);
2389  qopt->compound_weight = true;
2390  TermFreqs synonym_freqs;
2391  do_bool_or_like(ctx, qopt, &synonym_freqs);
2392  PostListAndEstimate plest = ctx.postlist(&synonym_freqs, true);
2393  qopt->compound_weight = old_compound_weight;
2394  if (!plest.pl) return {};
2395 
2396  // We currently assume wqf is 1 for calculating the synonym's weight
2397  // since conceptually the synonym is one "virtual" term. If we were
2398  // to combine multiple occurrences of the same synonym expansion into
2399  // a single instance with wqf set, we would want to track the wqf.
2400 
2401  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
2402  // SynonymPostList, which supplies the weights.
2403  RETURN(qopt->make_synonym_postlist(std::move(plest), factor,
2404  synonym_freqs));
2405 }
2406 
2409  double factor,
2410  TermFreqs* termfreqs) const
2411 {
2412  LOGCALL(MATCH, PostListAndEstimate, "QueryBranch::do_max", qopt | factor | termfreqs);
2413  OrContext ctx(qopt, subqueries.size());
2414  if (factor == 0.0) {
2415  // Without the weights we're just like a normal OR query.
2416  do_bool_or_like(ctx, qopt, termfreqs);
2417  RETURN(ctx.postlist(termfreqs, true));
2418  }
2419 
2420  // If termfreqs is set that means we're below a synonym, but in that case
2421  // factor should be 0.0 which is handled above.
2422  Assert(!termfreqs);
2423 
2424  do_or_like(ctx, qopt, factor, termfreqs);
2425  // We currently assume wqf is 1 for calculating the OP_MAX's weight
2426  // since conceptually the OP_MAX is one "virtual" term. If we were
2427  // to combine multiple occurrences of the same OP_MAX expansion into
2428  // a single instance with wqf set, we would want to track the wqf.
2429  RETURN(ctx.postlist_max());
2430 }
2431 
2433 QueryBranch::get_type() const noexcept
2434 {
2435  return get_op();
2436 }
2437 
2438 size_t
2440 {
2441  return subqueries.size();
2442 }
2443 
2444 const Query
2446 {
2447  return subqueries[n];
2448 }
2449 
2450 const string
2452  Xapian::termcount parameter) const
2453 {
2454  string desc = "(";
2456  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2457  if (desc.size() > 1) {
2458  desc += op;
2459  if (parameter) {
2460  desc += str(parameter);
2461  desc += ' ';
2462  }
2463  }
2464  Assert((*i).internal);
2465  // MatchNothing subqueries should have been removed by done(), and we
2466  // shouldn't get called before done() is, since that happens at the
2467  // end of the Xapian::Query constructor.
2468  desc += (*i).internal->get_description();
2469  }
2470  desc += ')';
2471  return desc;
2472 }
2473 
2476 {
2477  // If window size not specified, default it.
2478  if (window == 0)
2479  window = subqueries.size();
2480  return QueryAndLike::done();
2481 }
2482 
2483 void
2484 QueryScaleWeight::gather_terms(void * void_terms) const
2485 {
2486  subquery.internal->gather_terms(void_terms);
2487 }
2488 
2489 void QueryTerm::serialise(string & result) const
2490 {
2491  size_t len = term.size();
2492  if (len == 0) {
2493  if (wqf == 1 && pos == 0) {
2494  // Query::MatchAll
2495  result += '\x0f';
2496  } else {
2497  // Weird mutant versions of MatchAll
2498  result += '\x0e';
2499  pack_uint(result, wqf);
2500  pack_uint(result, pos);
2501  }
2502  } else if (wqf == 1) {
2503  if (pos == 0) {
2504  // Single occurrence free-text term without position set.
2505  if (len >= 16) {
2506  result += static_cast<char>(0x40 | 0x10);
2507  pack_uint(result, term.size() - 16);
2508  } else {
2509  result += static_cast<char>(0x40 | 0x10 | len);
2510  }
2511  result += term;
2512  } else {
2513  // Single occurrence free-text term with position set.
2514  if (len >= 16) {
2515  result += static_cast<char>(0x40 | 0x20);
2516  pack_uint(result, term.size() - 16);
2517  } else {
2518  result += static_cast<char>(0x40 | 0x20 | len);
2519  }
2520  result += term;
2521  pack_uint(result, pos);
2522  }
2523  } else if (wqf > 1 || pos > 0) {
2524  // General case.
2525  if (len >= 16) {
2526  result += static_cast<char>(0x40 | 0x30);
2527  pack_uint(result, term.size() - 16);
2528  } else if (len) {
2529  result += static_cast<char>(0x40 | 0x30 | len);
2530  }
2531  result += term;
2532  pack_uint(result, wqf);
2533  pack_uint(result, pos);
2534  } else {
2535  // Typical boolean term.
2536  AssertEq(wqf, 0);
2537  AssertEq(pos, 0);
2538  if (len >= 16) {
2539  result += static_cast<char>(0x40);
2540  pack_uint(result, term.size() - 16);
2541  } else {
2542  result += static_cast<char>(0x40 | len);
2543  }
2544  result += term;
2545  }
2546 }
2547 
2548 void QueryPostingSource::serialise(string & result) const
2549 {
2550  result += static_cast<char>(0x0c);
2551  pack_string(result, source->name());
2552  pack_string(result, source->serialise());
2553 }
2554 
2555 void QueryScaleWeight::serialise(string & result) const
2556 {
2558  result += '\x0d';
2559  result += serialise_double(scale_factor);
2560  subquery.internal->serialise(result);
2561 }
2562 
2563 void
2565 {
2566  // If the AndLike is already MatchNothing, do nothing.
2567  if (subqueries.size() == 1 && !subqueries[0].internal)
2568  return;
2569  // If we're adding MatchNothing, discard any previous subqueries.
2570  if (!subquery.internal)
2571  subqueries.clear();
2572  subqueries.push_back(subquery);
2573 }
2574 
2577 {
2578  // Empty AndLike gives MatchNothing.
2579  if (subqueries.empty())
2580  return NULL;
2581  // We handle any subquery being MatchNothing in add_subquery() by leaving
2582  // a single MatchNothing subquery, and so this check results in AndLike
2583  // giving MatchNothing.
2584  if (subqueries.size() == 1)
2585  return subqueries[0].internal.get();
2586  return this;
2587 }
2588 
2591  TermFreqs* termfreqs) const
2592 {
2593  LOGCALL(QUERY, PostListAndEstimate, "QueryAndLike::postlist", qopt | factor | termfreqs);
2594  AndContext ctx(qopt, subqueries.size());
2595  if (!postlist_sub_and_like(ctx, qopt, factor, termfreqs)) {
2596  RETURN({});
2597  }
2598  RETURN(ctx.postlist(termfreqs));
2599 }
2600 
2601 bool
2603  QueryOptimiser* qopt,
2604  double factor,
2605  TermFreqs* termfreqs) const
2606 {
2608  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2609  // MatchNothing subqueries should have been removed by done().
2610  Assert((*i).internal);
2611  if (!(*i).internal->postlist_sub_and_like(ctx, qopt, factor, termfreqs))
2612  return false;
2613  }
2614  return true;
2615 }
2616 
2617 void
2619 {
2620  // Drop any subqueries which are MatchNothing.
2621  if (subquery.internal)
2622  subqueries.push_back(subquery);
2623 }
2624 
2627 {
2628  // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
2629  // subqueries which are MatchNothing.
2630  if (subqueries.empty())
2631  return NULL;
2632  if (subqueries.size() == 1)
2633  return subqueries[0].internal.get();
2634  return this;
2635 }
2636 
2637 void
2639 {
2640  if (!subqueries.empty()) {
2641  // We're adding the 2nd or subsequent subquery, so this subquery is
2642  // negated.
2643  if (!subqueries[0].internal) {
2644  // The left side is already MatchNothing so drop any right side.
2645  //
2646  // MatchNothing AND_NOT X == MatchNothing
2647  return;
2648  }
2649  if (!subquery.internal) {
2650  // Drop MatchNothing on the right of AndNot.
2651  //
2652  // X AND_NOT MatchNothing == X
2653  return;
2654  }
2655  if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
2656  // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
2657  // AndNot as no weight is taken from them.
2658  subqueries.push_back(subquery.get_subquery(0));
2659  // The Query constructor for OP_SCALE_WEIGHT constructor should
2660  // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
2661  Assert(subquery.get_subquery(0).internal);
2662  return;
2663  }
2664  }
2665  subqueries.push_back(subquery);
2666 }
2667 
2670 {
2671  // Any MatchNothing right subqueries get discarded by add_subquery() - if
2672  // that leaves just the left subquery, return that.
2673  //
2674  // If left subquery is MatchNothing, then add_subquery() discards all right
2675  // subqueries, so this check also gives MatchNothing for this case.
2676  if (subqueries.size() == 1)
2677  return subqueries[0].internal.get();
2678  return this;
2679 }
2680 
2681 void
2683 {
2684  // If the left side of AndMaybe is already MatchNothing, do nothing.
2685  if (subqueries.size() == 1 && !subqueries[0].internal)
2686  return;
2687  // Drop any 2nd or subsequent subqueries which are MatchNothing.
2688  if (subquery.internal || subqueries.empty())
2689  subqueries.push_back(subquery);
2690 }
2691 
2694 {
2695  // Any MatchNothing right subqueries get discarded by add_subquery() - if
2696  // that leaves just the left subquery, return that.
2697  //
2698  // If left subquery is MatchNothing, then add_subquery() discards all right
2699  // subqueries, so this check also gives MatchNothing for this case.
2700  if (subqueries.size() == 1)
2701  return subqueries[0].internal.get();
2702  return this;
2703 }
2704 
2706 QueryOr::postlist(QueryOptimiser* qopt, double factor,
2707  TermFreqs* termfreqs) const
2708 {
2709  LOGCALL(QUERY, PostListAndEstimate, "QueryOr::postlist", qopt | factor | termfreqs);
2710  OrContext ctx(qopt, subqueries.size());
2711  if (factor == 0.0) {
2712  do_bool_or_like(ctx, qopt, termfreqs);
2713  RETURN(ctx.postlist(termfreqs, true));
2714  }
2715  do_or_like(ctx, qopt, factor, termfreqs);
2716  RETURN(ctx.postlist(termfreqs));
2717 }
2718 
2719 void
2721  double factor,
2722  TermFreqs* termfreqs,
2723  bool keep_zero_weight) const
2724 {
2725  do_or_like(ctx, qopt, factor, termfreqs, 0, 0, keep_zero_weight);
2726 }
2727 
2728 void
2730  QueryOptimiser* qopt,
2731  TermFreqs* termfreqs) const
2732 {
2733  do_bool_or_like(ctx, qopt, termfreqs);
2734 }
2735 
2738  TermFreqs* termfreqs) const
2739 {
2740  LOGCALL(QUERY, PostListAndEstimate, "QueryAndNot::postlist", qopt | factor | termfreqs);
2741  AndContext ctx(qopt, 1);
2742  if (!QueryAndNot::postlist_sub_and_like(ctx, qopt, factor, termfreqs)) {
2743  RETURN({});
2744  }
2745  RETURN(ctx.postlist(termfreqs));
2746 }
2747 
2748 bool
2750  QueryOptimiser* qopt,
2751  double factor,
2752  TermFreqs* termfreqs) const
2753 {
2754  // This invariant should be established by QueryAndNot::done() with
2755  // assistance from QueryAndNot::add_subquery().
2756  Assert(subqueries[0].internal);
2757  if (!subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor,
2758  termfreqs)) {
2759  return false;
2760  }
2761  do_bool_or_like(ctx.get_not_ctx(subqueries.size() - 1), qopt, termfreqs, 1);
2762  return true;
2763 }
2764 
2766 QueryXor::postlist(QueryOptimiser* qopt, double factor,
2767  TermFreqs* termfreqs) const
2768 {
2769  LOGCALL(QUERY, PostListAndEstimate, "QueryXor::postlist", qopt | factor | termfreqs);
2770  XorContext ctx(qopt, subqueries.size());
2771  postlist_sub_xor(ctx, qopt, factor, termfreqs);
2772  RETURN(ctx.postlist(termfreqs));
2773 }
2774 
2775 void
2777  QueryOptimiser* qopt,
2778  double factor,
2779  TermFreqs* termfreqs) const
2780 {
2782  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2783  // MatchNothing subqueries should have been removed by done().
2784  Assert((*i).internal);
2785  (*i).internal->postlist_sub_xor(ctx, qopt, factor, termfreqs);
2786  }
2787 }
2788 
2791  TermFreqs* termfreqs) const
2792 {
2793  LOGCALL(QUERY, PostListAndEstimate, "QueryAndMaybe::postlist", qopt | factor | termfreqs);
2794  AndContext ctx(qopt, 1);
2795  if (!QueryAndMaybe::postlist_sub_and_like(ctx, qopt, factor, termfreqs)) {
2796  RETURN({});
2797  }
2798  RETURN(ctx.postlist(termfreqs));
2799 }
2800 
2801 bool
2803  QueryOptimiser* qopt,
2804  double factor,
2805  TermFreqs* termfreqs) const
2806 {
2807  // This invariant should be established by QueryAndMaybe::done() with
2808  // assistance from QueryAndMaybe::add_subquery().
2809  Assert(subqueries[0].internal);
2810  if (!subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor,
2811  termfreqs)) {
2812  return false;
2813  }
2814  // We only need to consider the right branch or branches if we're weighted
2815  // - an unweighted OP_AND_MAYBE can be replaced with its left branch.
2816  if (factor != 0.0) {
2817  // Only keep zero-weight subqueries if we need their wdf because
2818  // they're underneath a compound weight.
2819  OrContext& maybe_ctx = ctx.get_maybe_ctx(subqueries.size() - 1);
2820  bool need_wdf = qopt->need_wdf_for_compound_weight();
2821  bool save_no_estimates = qopt->get_no_estimates();
2822  qopt->set_no_estimates(true);
2823  do_or_like(maybe_ctx, qopt, factor, termfreqs, 0, 1, need_wdf);
2824  qopt->set_no_estimates(save_no_estimates);
2825  }
2826  return true;
2827 }
2828 
2831  TermFreqs* termfreqs) const
2832 {
2833  LOGCALL(QUERY, PostListAndEstimate, "QueryFilter::postlist", qopt | factor | termfreqs);
2834  AndContext ctx(qopt, subqueries.size());
2835  if (!QueryFilter::postlist_sub_and_like(ctx, qopt, factor, termfreqs)) {
2836  RETURN({});
2837  }
2838  RETURN(ctx.postlist(termfreqs));
2839 }
2840 
2841 bool
2843  QueryOptimiser* qopt,
2844  double factor,
2845  TermFreqs* termfreqs) const
2846 {
2848  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2849  // MatchNothing subqueries should have been removed by done().
2850  Assert((*i).internal);
2851  if (!(*i).internal->postlist_sub_and_like(ctx, qopt, factor, termfreqs))
2852  return false;
2853  // Second and subsequent subqueries are unweighted.
2854  factor = 0.0;
2855  }
2856  return true;
2857 }
2858 
2859 bool
2861  AndContext& ctx,
2862  QueryOptimiser* qopt,
2863  double factor,
2864  TermFreqs* termfreqs) const
2865 {
2866  if (!qopt->db.has_positions()) {
2867  // No positions in this subdatabase so this matches nothing, which
2868  // means the whole andcontext matches nothing.
2869  //
2870  // Bailing out here means we don't recurse deeper and that means we
2871  // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
2872  // the phrase, but at least one shard will count them, and the matcher
2873  // takes the highest answer (since 1.4.6).
2874  ctx.shrink(0);
2875  return false;
2876  }
2877 
2878  bool old_need_positions = qopt->need_positions;
2879  qopt->need_positions = true;
2880 
2881  bool result = true;
2883  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
2884  // MatchNothing subqueries should have been removed by done().
2885  Assert((*i).internal);
2886  PostListAndEstimate plest = (*i).internal->postlist(qopt, factor, NULL);
2887  if (plest.pl && (*i).internal->get_type() != Query::LEAF_TERM) {
2888  plest.pl = new OrPosPostList(plest.pl);
2889  }
2890  result = ctx.add_postlist(std::move(plest), termfreqs);
2891  if (!result) {
2892  if (factor == 0.0) break;
2893  // If we don't complete the iteration, the subquery count may be
2894  // wrong, and weighting information may not be filled in.
2895  while (i != subqueries.end()) {
2896  // MatchNothing subqueries should have been removed by done().
2897  // FIXME: Can we handle this more gracefully?
2898  Assert((*i).internal);
2899  qopt->destroy_postlist((*i).internal->postlist(qopt, factor,
2900  NULL).pl);
2901  ++i;
2902  }
2903  break;
2904  }
2905  }
2906  if (result) {
2907  // Record the positional filter to apply higher up the tree.
2908  ctx.add_pos_filter(op, subqueries.size(), window);
2909  }
2910 
2911  qopt->need_positions = old_need_positions;
2912  return result;
2913 }
2914 
2915 bool
2917  QueryOptimiser* qopt,
2918  double factor,
2919  TermFreqs* termfreqs) const
2920 {
2921  constexpr auto OP_PHRASE = Query::OP_PHRASE;
2922  return QueryWindowed::postlist_windowed(OP_PHRASE, ctx, qopt, factor,
2923  termfreqs);
2924 }
2925 
2926 bool
2928  QueryOptimiser* qopt,
2929  double factor,
2930  TermFreqs* termfreqs) const
2931 {
2932  constexpr auto OP_NEAR = Query::OP_NEAR;
2933  return QueryWindowed::postlist_windowed(OP_NEAR, ctx, qopt, factor,
2934  termfreqs);
2935 }
2936 
2939  TermFreqs* termfreqs) const
2940 {
2941  LOGCALL(QUERY, PostListAndEstimate, "QueryEliteSet::postlist", qopt | factor | termfreqs);
2942  OrContext ctx(qopt, subqueries.size());
2943  do_or_like(ctx, qopt, factor, termfreqs, set_size);
2944  RETURN(ctx.postlist(termfreqs));
2945 }
2946 
2947 void
2949  double factor,
2950  TermFreqs* termfreqs,
2951  bool keep_zero_weight) const
2952 {
2953  do_or_like(ctx, qopt, factor, termfreqs, set_size, 0, keep_zero_weight);
2954 }
2955 
2958  TermFreqs* termfreqs) const
2959 {
2960  LOGCALL(QUERY, PostListAndEstimate, "QuerySynonym::postlist", qopt | factor | termfreqs);
2961  // Save and restore total_subqs so we only add one for the whole
2962  // OP_SYNONYM subquery (or none if we're not weighted).
2963  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
2964  if (factor != 0.0)
2965  ++save_total_subqs;
2966  PostListAndEstimate plest = do_synonym(qopt, factor, termfreqs);
2967  qopt->set_total_subqs(save_total_subqs);
2968  return plest;
2969 }
2970 
2973 {
2974  // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
2975  // subqueries which are MatchNothing.
2976  if (subqueries.empty())
2977  return NULL;
2978  if (subqueries.size() == 1) {
2979  Query::op sub_type = subqueries[0].get_type();
2980  // Synonym of a single subquery should only be simplified if that
2981  // subquery is a term (or MatchAll), or if it's also OP_SYNONYM. Note
2982  // that MatchNothing subqueries are dropped, so we'd never get here
2983  // with a single MatchNothing subquery.
2984  if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
2985  sub_type == Query::OP_SYNONYM) {
2986  return subqueries[0].internal.get();
2987  }
2988  if (sub_type == Query::OP_WILDCARD) {
2989  auto q = static_cast<QueryWildcard*>(subqueries[0].internal.get());
2990  // SYNONYM over WILDCARD X -> WILDCARD SYNONYM for any combiner X.
2992  }
2993  if (sub_type == Query::OP_EDIT_DISTANCE) {
2994  auto q =
2995  static_cast<QueryEditDistance*>(subqueries[0].internal.get());
2996  // SYNONYM over EDIT_DISTANCE X -> EDIT_DISTANCE SYNONYM for any
2997  // combiner X.
2999  }
3000  }
3001  return this;
3002 }
3003 
3005 QueryMax::postlist(QueryOptimiser* qopt, double factor,
3006  TermFreqs* termfreqs) const
3007 {
3008  LOGCALL(QUERY, PostListAndEstimate, "QueryMax::postlist", qopt | factor | termfreqs);
3009  // Save and restore total_subqs so we only add one for the whole
3010  // OP_MAX subquery (or none if we're not weighted).
3011  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
3012  if (factor != 0.0)
3013  ++save_total_subqs;
3014  PostListAndEstimate plest = do_max(qopt, factor, termfreqs);
3015  qopt->set_total_subqs(save_total_subqs);
3016  return plest;
3017 }
3018 
3021 {
3022  return Xapian::Query::OP_AND;
3023 }
3024 
3027 {
3028  return Xapian::Query::OP_OR;
3029 }
3030 
3033 {
3035 }
3036 
3039 {
3040  return Xapian::Query::OP_XOR;
3041 }
3042 
3045 {
3047 }
3048 
3051 {
3052  return Xapian::Query::OP_FILTER;
3053 }
3054 
3057 {
3058  return Xapian::Query::OP_NEAR;
3059 }
3060 
3063 {
3064  return Xapian::Query::OP_PHRASE;
3065 }
3066 
3069 {
3071 }
3072 
3075 {
3077 }
3078 
3081 {
3082  return Xapian::Query::OP_MAX;
3083 }
3084 
3085 string
3087 {
3088  return get_description_helper(" AND ");
3089 }
3090 
3091 string
3093 {
3094  return get_description_helper(" OR ");
3095 }
3096 
3097 string
3099 {
3100  return get_description_helper(" AND_NOT ");
3101 }
3102 
3103 string
3105 {
3106  return get_description_helper(" XOR ");
3107 }
3108 
3109 string
3111 {
3112  return get_description_helper(" AND_MAYBE ");
3113 }
3114 
3115 string
3117 {
3118  return get_description_helper(" FILTER ");
3119 }
3120 
3121 string
3123 {
3124  return get_description_helper(" NEAR ", window);
3125 }
3126 
3127 string
3129 {
3130  return get_description_helper(" PHRASE ", window);
3131 }
3132 
3133 string
3135 {
3136  return get_description_helper(" ELITE_SET ", set_size);
3137 }
3138 
3139 string
3141 {
3142  if (subqueries.size() == 1) {
3143  string d = "(SYNONYM ";
3144  d += subqueries[0].internal->get_description();
3145  d += ")";
3146  return d;
3147  }
3148  return get_description_helper(" SYNONYM ");
3149 }
3150 
3151 string
3153 {
3154  return get_description_helper(" MAX ");
3155 }
3156 
3158 QueryInvalid::get_type() const noexcept
3159 {
3161 }
3162 
3165 {
3166  throw Xapian::InvalidOperationError("Query is invalid");
3167 }
3168 
3169 void
3170 QueryInvalid::serialise(std::string & result) const
3171 {
3172  result += static_cast<char>(0x00);
3173 }
3174 
3175 string
3177 {
3178  return "<INVALID>";
3179 }
3180 
3181 }
3182 }
PostList class implementing Query::OP_AND_MAYBE.
PostList class implementing Query::OP_AND_NOT.
N-way AND postlist.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
char name[9]
Definition: dbcheck.cc:57
PostList class implementing unweighted Query::OP_OR.
PostList class implementing Query::OP_AND_MAYBE.
PostList class implementing Query::OP_AND_NOT.
N-way AND postlist.
Definition: andpostlist.h:32
PostList class implementing unweighted Query::OP_OR.
Class for estimating the total number of matching documents.
Definition: estimateop.h:64
@ EXACT_PHRASE
Definition: estimateop.h:71
Postlist which matches an exact phrase using positional information.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
N-way OR postlist with wt=max(wt_i).
Definition: maxpostlist.h:31
Postlist which matches terms occurring within a specified window.
Definition: nearpostlist.h:41
Wrapper postlist providing positions for an OR.
Definition: orpospostlist.h:28
PostList class implementing Query::OP_OR.
Definition: orpostlist.h:29
Postlist which matches a phrase using positional information.
bool * get_max_weight_cached_flag_ptr()
Return pointer to flag to set to false to invalidate cached max weight.
Definition: postlisttree.h:92
Virtual base class for Database internals.
virtual std::string get_value_upper_bound(valueno slot) const =0
Get an upper bound on the values stored in the given value slot.
virtual std::string get_value_lower_bound(valueno slot) const =0
Get a lower bound on the values stored in the given value slot.
virtual PostList * open_post_list(std::string_view term) const =0
Return a PostList suitable for use in a PostingIterator.
virtual bool has_positions() const =0
Check whether this database contains any positional information.
virtual doccount get_value_freq(valueno slot) const =0
Return the frequency of a given value slot.
An indexed database of documents.
Definition: database.h:75
PostListAndEstimate postlist(TermFreqs *termfreqs)
bool add_postlist(PostList *pl, unique_ptr< EstimateOp > &&estimate, TermFreqs *termfreqs)
OrContext & get_maybe_ctx(size_t reserve)
AndContext(QueryOptimiser *qopt_, size_t reserve)
unique_ptr< OrContext > maybe_ctx
void add_pos_filter(Query::op op_, size_t n_subqs, Xapian::termcount window)
OrContext & get_not_ctx(size_t reserve)
bool add_postlist(PostListAndEstimate p, TermFreqs *termfreqs)
list< PosFilter > pos_filters
unique_ptr< OrContext > not_ctx
VecUniquePtr< EstimateOp > estimates
void add_postlist(PostList *pl, EstimateOp *estimate, TermFreqs *termfreqs)
vector< PostList * > pls
vector< TermFreqs > termfreqs_list
Xapian::docid get_first() const
void expand_edit_distance(const QueryEditDistance *query, double factor, TermFreqs *termfreqs)
Expand an edit distance query.
Context(QueryOptimiser *qopt_, size_t reserve)
void add_termfreqs(TermFreqs *termfreqs)
void expand_wildcard(const QueryWildcard *query, double factor, TermFreqs *termfreqs)
Expand a wildcard query.
void add_postlist(PostListAndEstimate p, TermFreqs *termfreqs)
Xapian::docid get_last() const
Xapian::termcount size() const
void shrink(size_t new_size)
PostListAndEstimate postlist_max()
void select_elite_set(size_t set_size, size_t out_of)
Select the best set_size postlists from the last out_of added.
PostListAndEstimate postlist(TermFreqs *termfreqs, bool bool_or=false)
OrContext(QueryOptimiser *qopt_, size_t reserve)
size_t begin
Start and end indices for the PostLists this positional filter uses.
PostListAndEstimate postlist(PostList *pl, EstimateOp *est, const vector< PostList * > &pls, PostListTree *pltree, TermFreqs *termfreqs) const
PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_, Xapian::termcount window_)
Abstract base class for postlists.
Definition: postlist.h:40
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
Definition: postlist.h:67
virtual double recalc_maxweight()=0
Recalculate the upper bound on what get_weight() can return.
virtual void get_docid_range(docid &first, docid &last) const
Get the bounds on the range of docids this PostList can return.
Definition: postlist.cc:72
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void add_subquery(const Xapian::Query &subquery)
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void add_subquery(const Xapian::Query &subquery)
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
std::string get_description() const
virtual Query::Internal * done()=0
void do_bool_or_like(OrContext &ctx, QueryOptimiser *qopt, TermFreqs *termfreqs, size_t first=0) const
void do_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs, Xapian::termcount elite_set_size=0, size_t first=0, bool keep_zero_weight=true) const
Process OR-like subqueries.
virtual Xapian::Query::op get_op() const =0
void serialise_(std::string &result, Xapian::termcount parameter=0) const
virtual void add_subquery(const Xapian::Query &subquery)=0
Xapian::Query::op get_type() const noexcept
const std::string get_description_helper(const char *op, Xapian::termcount window=0) const
PostListAndEstimate do_max(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void gather_terms(void *void_terms) const
termcount get_length() const noexcept
PostListAndEstimate do_synonym(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
const Query get_subquery(size_t n) const
size_t get_num_subqueries() const noexcept
void serialise(std::string &result) const
void serialise(std::string &result) const
int test(const std::string &candidate) const
Perform edit distance test.
QueryEditDistance * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
termcount get_length() const noexcept
Xapian::Query::op get_type() const noexcept
void serialise(std::string &result) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs, bool keep_zero_weight) const
std::string get_description() const
Xapian::Query::op get_op() const
std::string get_description() const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void serialise(std::string &result) const
Xapian::Query::op get_type() const noexcept
std::string get_description() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
void serialise(std::string &result) const
Xapian::Query::op get_op() const
std::string get_description() const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::termcount get_total_subqs() const
void destroy_postlist(PostList *pl)
PostListAndEstimate open_post_list(const std::string &term, Xapian::termcount wqf, double factor, TermFreqs *termfreqs)
Create a PostList object for term.
PostListAndEstimate make_synonym_postlist(PostListAndEstimate or_pl, double factor, const TermFreqs &termfreqs)
Create a SynonymPostList object.
const Xapian::Weight::Internal * get_stats() const
const Xapian::Database::Internal & db
void set_total_subqs(Xapian::termcount n)
void add_subquery(const Xapian::Query &subquery)
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void postlist_sub_bool_or_like(OrContext &ctx, QueryOptimiser *qopt, TermFreqs *termfreqs) const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs, bool keep_zero_weight) const
std::string get_description() const
std::string get_description() const
Xapian::Query::op get_op() const
void serialise(std::string &result) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Internal::opt_intrusive_ptr< PostingSource > source
Definition: queryinternal.h:85
Xapian::Query::op get_type() const noexcept
void serialise(std::string &result) const
void serialise(std::string &result) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
const Query get_subquery(size_t n) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
QueryScaleWeight(double factor, const Query &subquery_)
Xapian::Query::op get_type() const noexcept
void gather_terms(void *void_terms) const
size_t get_num_subqueries() const noexcept
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
std::string get_description() const
void serialise(std::string &result) const
void gather_terms(void *void_terms) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_type() const noexcept
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
void serialise(std::string &result) const
void serialise(std::string &result) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
Xapian::Query::op get_type() const noexcept
Xapian::Query::op get_type() const noexcept
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void serialise(std::string &result) const
bool test_wildcard_(const std::string &candidate, size_t o, size_t p, size_t i) const
size_t head
Fixed head and tail lengths, and min/max length term that can match.
Xapian::Query::op get_type() const noexcept
QueryWildcard * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
bool test_prefix_known(const std::string &candidate) const
Perform wildcard test on candidate known to match prefix.
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void serialise(std::string &result) const
QueryWildcard(std::string_view pattern_, Xapian::termcount max_expansion_, int flags_, Query::op combiner_)
std::string get_description() const
termcount get_length() const noexcept
bool postlist_windowed(Xapian::Query::op op, AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void postlist_sub_xor(XorContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
Xapian::Query::op get_op() const
PostListAndEstimate postlist(TermFreqs *termfreqs)
XorContext(QueryOptimiser *qopt_, size_t reserve)
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
virtual PostingSource * unserialise_with_registry(const std::string &serialised, const Registry &registry) const
Create object given string serialisation returned by serialise().
PostingSource * release()
Start reference counting this object.
Class representing a query.
Definition: query.h:45
const Query get_subquery(size_t n) const
Read a top level subquery.
Definition: query.cc:289
op get_type() const noexcept
Get the type of the top level of the query.
Definition: query.cc:275
op
Query operators.
Definition: query.h:78
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166
@ LEAF_POSTING_SOURCE
Value returned by get_type() for a PostingSource.
Definition: query.h:283
@ OP_MAX
Pick the maximum weight of any subquery.
Definition: query.h:249
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
Definition: query.h:158
@ OP_WILDCARD
Wildcard expansion.
Definition: query.h:255
@ OP_XOR
Match documents which an odd number of subqueries match.
Definition: query.h:107
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:290
@ OP_NEAR
Match only documents where all subqueries match near each other.
Definition: query.h:140
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:152
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
Definition: query.h:231
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
Definition: query.h:99
@ OP_EDIT_DISTANCE
Edit distance expansion.
Definition: query.h:269
@ LEAF_TERM
Value returned by get_type() for a term.
Definition: query.h:280
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:223
@ OP_INVALID
Construct an invalid query.
Definition: query.h:277
bool empty() const noexcept
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:661
@ WILDCARD_PATTERN_MULTI
Support * which matches 0 or more characters.
Definition: query.h:330
@ WILDCARD_LIMIT_FIRST
Stop expanding when OP_WILDCARD reaches its expansion limit.
Definition: query.h:311
@ WILDCARD_LIMIT_MOST_FREQUENT
Limit OP_WILDCARD expansion to the most frequent terms.
Definition: query.h:321
@ WILDCARD_LIMIT_MASK_
Definition: query.h:324
@ WILDCARD_PATTERN_SINGLE
Support ? which matches a single character.
Definition: query.h:336
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:48
Registry for user subclasses.
Definition: registry.h:47
const Xapian::PostingSource * get_posting_source(std::string_view name) const
Get a posting source given a name.
Definition: registry.cc:331
Indicates an error in the std::string serialisation of an object.
Definition: error.h:917
T::Internal *const * const_iterator
Definition: smallvector.h:515
bool empty() const
Definition: smallvector.h:450
std::size_t size() const
Definition: smallvector.h:438
const_iterator begin() const
Definition: smallvector.h:622
const_iterator end() const
Definition: smallvector.h:626
void push_back(const T &elt)
Definition: smallvector.h:632
Abstract base class for termlists.
Definition: termlist.h:42
virtual Internal * skip_to(std::string_view term)=0
Skip forward to the specified term.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
Suitable for "simple" type T.
Definition: smallvector.h:62
const_iterator end() const
Definition: smallvector.h:165
void reserve(size_type n)
Definition: smallvector.h:147
void erase(const_iterator it)
Definition: smallvector.h:229
void push_back(T elt)
Definition: smallvector.h:190
const_iterator begin() const
Definition: smallvector.h:161
WildcardError indicates an error expanding a wildcarded query.
Definition: error.h:1001
N-way XOR postlist.
Definition: xorpostlist.h:31
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
#define usual(COND)
Definition: config.h:608
#define rare(COND)
Definition: config.h:607
string term
PositionList * p
Xapian::termpos pos
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:479
Append a string to an object description, escaping invalid UTF-8.
Edit distance calculation algorithm.
Hierarchy of classes which Xapian can throw as exceptions.
Return docs containing terms forming a particular exact phrase.
Return document ids from an external source.
C++ STL heap implementation with extensions.
N-way OR postlist with wt=max(wt_i)
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:213
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:230
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:259
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:277
string str(int value)
Convert int to std::string.
Definition: str.cc:91
static double string_frac(const string &s, size_t prefix)
static T estimate_and_not(T l, T r, U n)
static Xapian::doccount estimate_range_freq(const string &lo, const string &hi, const string &begin, const string *end, Xapian::doccount value_freq)
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
Return docs containing terms within a specified window.
Various assertion macros.
#define AssertEq(A, B)
Definition: omassert.h:124
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define Assert(COND)
Definition: omassert.h:122
Wrapper postlist providing positions for an OR.
PostList class implementing Query::OP_OR.
void unpack_throw_serialisation_error(const char *p)
Throw appropriate SerialisationError.
Definition: pack.cc:29
Pack types into strings and unpack them again.
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
Definition: pack.h:468
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
Definition: pack.h:442
void pack_string_empty(std::string &s)
Append an empty encoded std::string to a string.
Definition: pack.h:456
Return docs containing terms forming a particular phrase.
External sources of posting information.
Abstract base class for postlists.
Xapian::Query API class.
#define MISC(X)
#define MULTIWAY(X)
static constexpr unsigned MAX_UTF_8_CHARACTER_LENGTH
Xapian::Query internals.
Details passed around while building PostList tree from Query tree.
string serialise_double(double v)
Serialise a double to a string.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
functions to serialise and unserialise a double
Convert types to std::string.
Various handy string-related helpers.
bool endswith(std::string_view s, char sfx)
Definition: stringutils.h:80
bool startswith(std::string_view s, char pfx)
Definition: stringutils.h:56
Class providing an operator which sorts postlists to select max or terms.
bool operator()(PostList *a, PostList *b)
Return true if and only if a has a strictly greater termweight than b.
Comparison functor which orders by descending termfreq.
bool operator()(const PostList *a, const PostList *b) const
Order PostList* by descending get_termfreq().
The frequencies for a term.
Xapian::doccount reltermfreq
Xapian::termcount collfreq
Definition: header.h:215
Abstract base class for termlists.
Unicode and UTF-8 related classes and functions.
void description_append(std::string &desc, std::string_view s)
Definition: unittest.cc:105
Return document ids matching a >= test on a specified doc value.
Return document ids matching a range test on a specified doc value.
N-way XOR postlist.