xapian-core  1.4.19
queryinternal.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "queryinternal.h"
25 
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
29 
30 #include "leafpostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
36 #include "matcher/maxpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpospostlist.h"
41 #include "matcher/orpostlist.h"
42 #include "matcher/phrasepostlist.h"
43 #include "matcher/queryoptimiser.h"
46 #include "net/length.h"
47 #include "serialise-double.h"
48 #include "stringutils.h"
49 #include "termlist.h"
50 
51 #include "autoptr.h"
52 #include "debuglog.h"
53 #include "omassert.h"
54 #include "str.h"
56 
57 #include <algorithm>
58 #include <list>
59 #include <string>
60 #include <unordered_set>
61 #include <vector>
62 
63 using namespace std;
64 
68 
69 namespace Xapian {
70 
71 namespace Internal {
72 
78 struct CmpMaxOrTerms {
80  bool operator()(const PostList *a, const PostList *b) {
81 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
82  defined(__mc68000__) || defined(__mc68010__) || \
83  defined(__mc68020__) || defined(__mc68030__)
84  // On some architectures, most common of which is x86, floating point
85  // values are calculated and stored in registers with excess precision.
86  // If the two get_maxweight() calls below return identical values in a
87  // register, the excess precision may be dropped for one of them but
88  // not the other (e.g. because the compiler saves the first calculated
89  // weight to memory while calculating the second, then reloads it to
90  // compare). This leads to both a > b and b > a being true, which
91  // violates the antisymmetry property of the strict weak ordering
92  // required by nth_element(). This can have serious consequences (e.g.
93  // segfaults).
94  //
95  // Note that m68k only has excess precision in earlier models - 68040
96  // and later are OK:
97  // https://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
98  //
99  // To avoid this, we store each result in a volatile double prior to
100  // comparing them. This means that the result of this test should
101  // match that on other architectures with the same double format (which
102  // is desirable), and actually has less overhead than rounding both
103  // results to float (which is another approach which works).
104  volatile double a_max_wt = a->get_maxweight();
105  volatile double b_max_wt = b->get_maxweight();
106  return a_max_wt > b_max_wt;
107 #else
108  return a->get_maxweight() > b->get_maxweight();
109 #endif
110  }
111 };
112 
116  bool operator()(const PostList *a, const PostList *b) const {
117  return a->get_termfreq_est() > b->get_termfreq_est();
118  }
119 };
120 
121 class Context {
122  protected:
124 
125  vector<PostList*> pls;
126 
127  public:
128  Context(QueryOptimiser* qopt_, size_t reserve);
129 
130  ~Context();
131 
132  void add_postlist(PostList * pl) {
133  pls.push_back(pl);
134  }
135 
136  bool empty() const {
137  return pls.empty();
138  }
139 
140  size_t size() const {
141  return pls.size();
142  }
143 
144  void shrink(size_t new_size);
145 };
146 
147 Context::Context(QueryOptimiser* qopt_, size_t reserve)
148  : qopt(qopt_)
149 {
150  pls.reserve(reserve);
151 }
152 
153 void
154 Context::shrink(size_t new_size)
155 {
156  AssertRel(new_size, <=, pls.size());
157  if (new_size >= pls.size())
158  return;
159 
160  for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
161  qopt->destroy_postlist(*i);
162  }
163  pls.resize(new_size);
164 }
165 
167 {
168  shrink(0);
169 }
170 
171 class OrContext : public Context {
172  public:
173  OrContext(QueryOptimiser* qopt_, size_t reserve)
174  : Context(qopt_, reserve) { }
175 
177  void select_elite_set(size_t set_size, size_t out_of);
178 
180  void select_most_frequent(size_t set_size);
181 
182  PostList * postlist();
183  PostList * postlist_max();
184 };
185 
186 void
187 OrContext::select_elite_set(size_t set_size, size_t out_of)
188 {
189  // Call recalc_maxweight() as otherwise get_maxweight()
190  // may not be valid before next() or skip_to()
191  auto begin = pls.begin() + pls.size() - out_of;
192  for (auto i = begin; i != pls.end(); ++i) {
193  (*i)->recalc_maxweight();
194  }
195  nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
196  shrink(pls.size() - out_of + set_size);
197 }
198 
199 void
201 {
202  vector<PostList*>::iterator begin = pls.begin();
203  nth_element(begin, begin + set_size - 1, pls.end(),
205  shrink(set_size);
206 }
207 
208 PostList *
210 {
211  Assert(!pls.empty());
212 
213  if (pls.size() == 1) {
214  PostList * pl = pls[0];
215  pls.clear();
216  return pl;
217  }
218 
219  // Make postlists into a heap so that the postlist with the greatest term
220  // frequency is at the top of the heap.
221  make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
222 
223  // Now build a tree of binary OrPostList objects.
224  //
225  // The algorithm used to build the tree is like that used to build an
226  // optimal Huffman coding tree. If we called next() repeatedly, this
227  // arrangement would minimise the number of method calls. Generally we
228  // don't actually do that, but this arrangement is still likely to be a
229  // good one, and it does minimise the work in the worst case.
230  while (true) {
231  // We build the tree such that at each branch:
232  //
233  // l.get_termfreq_est() >= r.get_termfreq_est()
234  //
235  // We do this so that the OrPostList class can be optimised assuming
236  // that this is the case.
237  PostList * r = pls.front();
238  pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
239  pls.pop_back();
240  PostList * pl;
241  pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
242 
243  if (pls.size() == 1) {
244  pls.clear();
245  return pl;
246  }
247 
248  pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
249  pls.back() = pl;
250  push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
251  }
252 }
253 
254 PostList *
256 {
257  Assert(!pls.empty());
258 
259  if (pls.size() == 1) {
260  PostList * pl = pls[0];
261  pls.clear();
262  return pl;
263  }
264 
265  // Sort the postlists so that the postlist with the greatest term frequency
266  // is first.
267  sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
268 
269  PostList * pl;
270  pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
271 
272  pls.clear();
273  return pl;
274 }
275 
276 class XorContext : public Context {
277  public:
278  XorContext(QueryOptimiser* qopt_, size_t reserve)
279  : Context(qopt_, reserve) { }
280 
281  PostList * postlist();
282 };
283 
284 PostList *
286 {
287  Xapian::doccount db_size = qopt->db_size;
288  PostList * pl;
289  pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
290 
291  // Empty pls so our destructor doesn't delete them all!
292  pls.clear();
293  return pl;
294 }
295 
296 class AndContext : public Context {
297  class PosFilter {
299 
301  size_t begin, end;
302 
304 
305  public:
306  PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
307  Xapian::termcount window_)
308  : op_(op__), begin(begin_), end(end_), window(window_) { }
309 
310  PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
311  };
312 
313  list<PosFilter> pos_filters;
314 
315  AutoPtr<OrContext> not_ctx;
316 
317  AutoPtr<OrContext> maybe_ctx;
318 
319  public:
320  AndContext(QueryOptimiser* qopt_, size_t reserve)
321  : Context(qopt_, reserve) { }
322 
323  void add_pos_filter(Query::op op_,
324  size_t n_subqs,
325  Xapian::termcount window);
326 
327  OrContext& get_not_ctx(size_t reserve) {
328  if (!not_ctx) {
329  not_ctx.reset(new OrContext(qopt, reserve));
330  }
331  return *not_ctx;
332  }
333 
334  OrContext& get_maybe_ctx(size_t reserve) {
335  if (!maybe_ctx) {
336  maybe_ctx.reset(new OrContext(qopt, reserve));
337  }
338  return *maybe_ctx;
339  }
340 
341  PostList * postlist();
342 };
343 
344 PostList *
345 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
346 try {
347  vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
348  vector<PostList *>::const_iterator terms_end = pls.begin() + end;
349 
350  if (op_ == Xapian::Query::OP_NEAR) {
351  pl = new NearPostList(pl, window, terms_begin, terms_end);
352  } else if (window == end - begin) {
354  pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
355  } else {
357  pl = new PhrasePostList(pl, window, terms_begin, terms_end);
358  }
359  return pl;
360 } catch (...) {
361  delete pl;
362  throw;
363 }
364 
365 void
367  size_t n_subqs,
368  Xapian::termcount window)
369 {
370  Assert(n_subqs > 1);
371  size_t end = pls.size();
372  size_t begin = end - n_subqs;
373  pos_filters.push_back(PosFilter(op_, begin, end, window));
374 }
375 
376 PostList *
378 {
379  if (pls.empty()) {
380  // This case only happens if this sub-database has no positional data
381  // (but another sub-database does).
382  Assert(pos_filters.empty());
383  return new EmptyPostList;
384  }
385 
386  auto matcher = qopt->matcher;
387  auto db_size = qopt->db_size;
388 
389  AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
390  matcher, db_size));
391 
392  if (not_ctx) {
393  PostList* rhs = not_ctx->postlist();
394  pl.reset(new AndNotPostList(pl.release(), rhs, matcher, db_size));
395  not_ctx.reset();
396  }
397 
398  // Sort the positional filters to try to apply them in an efficient order.
399  // FIXME: We need to figure out what that is! Try applying lowest cf/tf
400  // first?
401 
402  // Apply any positional filters.
403  for (const PosFilter& filter : pos_filters) {
404  pl.reset(filter.postlist(pl.release(), pls));
405  }
406 
407  // Empty pls so our destructor doesn't delete them all!
408  pls.clear();
409 
410  if (maybe_ctx) {
411  PostList* rhs = maybe_ctx->postlist();
412  pl.reset(new AndMaybePostList(pl.release(), rhs, matcher, db_size));
413  maybe_ctx.reset();
414  }
415 
416  return pl.release();
417 }
418 
419 }
420 
422 
423 size_t
425 {
426  return 0;
427 }
428 
429 const Query
431 {
432  throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
433 }
434 
435 void
437 {
438 }
439 
442 {
443  return 0;
444 }
445 
447 Query::Internal::unserialise(const char ** p, const char * end,
448  const Registry & reg)
449 {
450  if (*p == end)
451  return NULL;
452  unsigned char ch = *(*p)++;
453  switch (ch >> 5) {
454  case 4: case 5: case 6: case 7: {
455  // Multi-way branch
456  //
457  // 1ccccnnn where:
458  // nnn -> n_subqs (0 means encoded value follows)
459  // cccc -> code (which OP_XXX)
460  size_t n_subqs = ch & 0x07;
461  if (n_subqs == 0) {
462  decode_length(p, end, n_subqs);
463  n_subqs += 8;
464  }
465  unsigned char code = (ch >> 3) & 0x0f;
466  Xapian::termcount parameter = 0;
467  if (code >= 13)
468  decode_length(p, end, parameter);
470  switch (code) {
471  case 0: // OP_AND
472  result = new Xapian::Internal::QueryAnd(n_subqs);
473  break;
474  case 1: // OP_OR
475  result = new Xapian::Internal::QueryOr(n_subqs);
476  break;
477  case 2: // OP_AND_NOT
478  result = new Xapian::Internal::QueryAndNot(n_subqs);
479  break;
480  case 3: // OP_XOR
481  result = new Xapian::Internal::QueryXor(n_subqs);
482  break;
483  case 4: // OP_AND_MAYBE
484  result = new Xapian::Internal::QueryAndMaybe(n_subqs);
485  break;
486  case 5: // OP_FILTER
487  result = new Xapian::Internal::QueryFilter(n_subqs);
488  break;
489  case 6: // OP_SYNONYM
490  result = new Xapian::Internal::QuerySynonym(n_subqs);
491  break;
492  case 7: // OP_MAX
493  result = new Xapian::Internal::QueryMax(n_subqs);
494  break;
495  case 13: // OP_ELITE_SET
496  result = new Xapian::Internal::QueryEliteSet(n_subqs,
497  parameter);
498  break;
499  case 14: // OP_NEAR
500  result = new Xapian::Internal::QueryNear(n_subqs,
501  parameter);
502  break;
503  case 15: // OP_PHRASE
504  result = new Xapian::Internal::QueryPhrase(n_subqs,
505  parameter);
506  break;
507  default:
508  // 8 to 12 are currently unused.
509  throw SerialisationError("Unknown multi-way branch Query operator");
510  }
511  do {
512  result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
513  } while (--n_subqs);
514  result->done();
515  return result;
516  }
517  case 2: case 3: { // Term
518  // Term
519  //
520  // 01ccLLLL where:
521  // LLLL -> length (0 means encoded value follows)
522  // cc -> code:
523  // 0: wqf = 0; pos = 0
524  // 1: wqf = 1; pos = 0
525  // 2: wqf = 1; pos -> encoded value follows
526  // 3: wqf -> encoded value follows; pos -> encoded value follows
527  size_t len = ch & 0x0f;
528  if (len == 0) {
529  decode_length(p, end, len);
530  len += 16;
531  }
532  if (size_t(end - *p) < len)
533  throw SerialisationError("Not enough data");
534  string term(*p, len);
535  *p += len;
536 
537  int code = ((ch >> 4) & 0x03);
538 
539  Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
540  if (code == 3)
541  decode_length(p, end, wqf);
542 
543  Xapian::termpos pos = 0;
544  if (code >= 2)
545  decode_length(p, end, pos);
546 
547  return new Xapian::Internal::QueryTerm(term, wqf, pos);
548  }
549  case 1: {
550  // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
551  //
552  // 001tssss where:
553  // ssss -> slot number (15 means encoded value follows)
554  // t -> op:
555  // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
556  // 1: OP_VALUE_GE
557  Xapian::valueno slot = ch & 15;
558  if (slot == 15) {
559  decode_length(p, end, slot);
560  slot += 15;
561  }
562  size_t len;
563  decode_length_and_check(p, end, len);
564  string begin(*p, len);
565  *p += len;
566  if (ch & 0x10) {
567  // OP_VALUE_GE
568  return new Xapian::Internal::QueryValueGE(slot, begin);
569  }
570 
571  // OP_VALUE_RANGE
572  decode_length_and_check(p, end, len);
573  string end_(*p, len);
574  *p += len;
575  if (begin.empty()) // FIXME: is this right?
576  return new Xapian::Internal::QueryValueLE(slot, end_);
577  return new Xapian::Internal::QueryValueRange(slot, begin, end_);
578  }
579  case 0: {
580  // Other operators
581  //
582  // 000ttttt where:
583  // ttttt -> encodes which OP_XXX
584  switch (ch & 0x1f) {
585  case 0x00: // OP_INVALID
586  return new Xapian::Internal::QueryInvalid();
587  case 0x0b: { // Wildcard
588  if (*p == end)
589  throw SerialisationError("not enough data");
590  Xapian::termcount max_expansion;
591  decode_length(p, end, max_expansion);
592  if (end - *p < 2)
593  throw SerialisationError("not enough data");
594  int max_type = static_cast<unsigned char>(*(*p)++);
595  op combiner = static_cast<op>(*(*p)++);
596  size_t len;
597  decode_length_and_check(p, end, len);
598  string pattern(*p, len);
599  *p += len;
600  return new Xapian::Internal::QueryWildcard(pattern,
601  max_expansion,
602  max_type,
603  combiner);
604  }
605  case 0x0c: { // PostingSource
606  size_t len;
607  decode_length_and_check(p, end, len);
608  string name(*p, len);
609  *p += len;
610 
611  const PostingSource * reg_source = reg.get_posting_source(name);
612  if (!reg_source) {
613  string m = "PostingSource ";
614  m += name;
615  m += " not registered";
616  throw SerialisationError(m);
617  }
618 
619  decode_length_and_check(p, end, len);
620  PostingSource * source =
621  reg_source->unserialise_with_registry(string(*p, len),
622  reg);
623  *p += len;
624  return new Xapian::Internal::QueryPostingSource(source->release());
625  }
626  case 0x0d: {
628  double scale_factor = unserialise_double(p, end);
629  return new QueryScaleWeight(scale_factor,
630  Query(unserialise(p, end, reg)));
631  }
632  case 0x0e: {
633  Xapian::termcount wqf;
634  Xapian::termpos pos;
635  decode_length(p, end, wqf);
636  decode_length(p, end, pos);
637  return new Xapian::Internal::QueryTerm(string(), wqf, pos);
638  }
639  case 0x0f:
640  return new Xapian::Internal::QueryTerm();
641  default: // Others currently unused.
642  break;
643  }
644  break;
645  }
646  }
647  string msg = "Unknown Query serialisation: ";
648  msg += str(ch);
649  throw SerialisationError(msg);
650 }
651 
652 void
655  double factor) const
656 {
657  ctx.add_postlist(postlist(qopt, factor));
658 }
659 
660 void
663  double factor) const
664 {
665  ctx.add_postlist(postlist(qopt, factor));
666 }
667 
668 void
671  double factor) const
672 {
673  ctx.add_postlist(postlist(qopt, factor));
674 }
675 
676 namespace Internal {
677 
678 Query::op
680 {
681  return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
682 }
683 
684 string
686 {
687  string desc;
688  if (term.empty()) {
689  desc = "<alldocuments>";
690  } else {
691  description_append(desc, term);
692  }
693  if (wqf != 1) {
694  desc += '#';
695  desc += str(wqf);
696  }
697  if (pos) {
698  desc += '@';
699  desc += str(pos);
700  }
701  return desc;
702 }
703 
705  : source(source_)
706 {
707  if (!source_)
708  throw Xapian::InvalidArgumentError("source parameter can't be NULL");
709  if (source->_refs == 0) {
710  // source_ isn't reference counted, so try to clone it. If clone()
711  // isn't implemented, just use the object provided and it's the
712  // caller's responsibility to ensure it stays valid while in use.
713  PostingSource * cloned_source = source->clone();
714  if (cloned_source) source = cloned_source->release();
715  }
716 }
717 
718 Query::op
720 {
722 }
723 
724 string
726 {
727  string desc = "PostingSource(";
728  desc += source->get_description();
729  desc += ')';
730  return desc;
731 }
732 
733 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
734  : scale_factor(factor), subquery(subquery_)
735 {
736  if (rare(scale_factor < 0.0))
737  throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
738 }
739 
740 Query::op
742 {
743  return Query::OP_SCALE_WEIGHT;
744 }
745 
746 size_t
748 {
749  return 1;
750 }
751 
752 const Query
754 {
755  return subquery;
756 }
757 
758 string
760 {
761  Assert(subquery.internal.get());
762  string desc = str(scale_factor);
763  desc += " * ";
764  desc += subquery.internal->get_description();
765  return desc;
766 }
767 
769 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
770 {
771  LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
772  if (factor != 0.0)
773  qopt->inc_total_subqs();
774  RETURN(qopt->open_post_list(term, wqf, factor));
775 }
776 
778 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
779 {
780  LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
781  Assert(source.get());
782  if (factor != 0.0)
783  qopt->inc_total_subqs();
784  // Casting away const on the Database::Internal here is OK, as we wrap
785  // them in a const Xapian::Database so non-const methods can't actually
786  // be called on the Database::Internal object.
787  const Xapian::Database wrappeddb(
788  const_cast<Xapian::Database::Internal*>(&(qopt->db)));
789  RETURN(new ExternalPostList(wrappeddb, source.get(), factor,
790  qopt->matcher,
791  qopt->shard_index));
792 }
793 
795 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
796 {
797  LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
798  RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
799 }
800 
801 void
802 QueryTerm::gather_terms(void * void_terms) const
803 {
804  // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
805  if (!term.empty()) {
806  vector<pair<Xapian::termpos, string>> &terms =
807  *static_cast<vector<pair<Xapian::termpos, string>>*>(void_terms);
808  terms.push_back(make_pair(pos, term));
809  }
810 }
811 
813 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
814 {
815  LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
816  if (factor != 0.0)
817  qopt->inc_total_subqs();
818  const Xapian::Database::Internal & db = qopt->db;
819  const string & lb = db.get_value_lower_bound(slot);
820  if (lb.empty()) {
821  // This should only happen if there are no values in this slot (which
822  // could be because the backend just doesn't support values at all).
823  // If there were values in the slot, the backend should have a
824  // non-empty lower bound, even if it isn't a tight one.
825  AssertEq(db.get_value_freq(slot), 0);
826  RETURN(new EmptyPostList);
827  }
828  if (end < lb) {
829  RETURN(new EmptyPostList);
830  }
831  const string & ub = db.get_value_upper_bound(slot);
832  if (begin > ub) {
833  RETURN(new EmptyPostList);
834  }
835  if (end >= ub) {
836  // If begin <= lb too, then the range check isn't needed, but we do
837  // still need to consider which documents have a value set in this
838  // slot. If this value is set for all documents, we can replace it
839  // with the MatchAll postlist, which is especially efficient if
840  // there are no gaps in the docids.
841  if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
842  RETURN(db.open_post_list(string()));
843  }
844  RETURN(new ValueGePostList(&db, slot, begin));
845  }
846  RETURN(new ValueRangePostList(&db, slot, begin, end));
847 }
848 
849 void
850 QueryValueRange::serialise(string & result) const
851 {
852  if (slot < 15) {
853  result += static_cast<char>(0x20 | slot);
854  } else {
855  result += static_cast<char>(0x20 | 15);
856  result += encode_length(slot - 15);
857  }
858  result += encode_length(begin.size());
859  result += begin;
860  result += encode_length(end.size());
861  result += end;
862 }
863 
864 Query::op
866 {
867  return Query::OP_VALUE_RANGE;
868 }
869 
870 string
872 {
873  string desc = "VALUE_RANGE ";
874  desc += str(slot);
875  desc += ' ';
876  description_append(desc, begin);
877  desc += ' ';
878  description_append(desc, end);
879  return desc;
880 }
881 
883 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
884 {
885  LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
886  if (factor != 0.0)
887  qopt->inc_total_subqs();
888  const Xapian::Database::Internal & db = qopt->db;
889  const string & lb = db.get_value_lower_bound(slot);
890  if (lb.empty()) {
891  // This should only happen if there are no values in this slot (which
892  // could be because the backend just doesn't support values at all).
893  // If there were values in the slot, the backend should have a
894  // non-empty lower bound, even if it isn't a tight one.
895  AssertEq(db.get_value_freq(slot), 0);
896  RETURN(new EmptyPostList);
897  }
898  if (limit < lb) {
899  RETURN(new EmptyPostList);
900  }
901  if (limit >= db.get_value_upper_bound(slot)) {
902  // The range check isn't needed, but we do still need to consider
903  // which documents have a value set in this slot. If this value is
904  // set for all documents, we can replace it with the MatchAll
905  // postlist, which is especially efficient if there are no gaps in
906  // the docids.
907  if (db.get_value_freq(slot) == db.get_doccount()) {
908  RETURN(db.open_post_list(string()));
909  }
910  }
911  RETURN(new ValueRangePostList(&db, slot, string(), limit));
912 }
913 
914 void
915 QueryValueLE::serialise(string & result) const
916 {
917  // Encode as a range with an empty start (which only takes a single byte to
918  // encode).
919  if (slot < 15) {
920  result += static_cast<char>(0x20 | slot);
921  } else {
922  result += static_cast<char>(0x20 | 15);
923  result += encode_length(slot - 15);
924  }
925  result += encode_length(0);
926  result += encode_length(limit.size());
927  result += limit;
928 }
929 
930 Query::op
932 {
933  return Query::OP_VALUE_LE;
934 }
935 
936 string
938 {
939  string desc = "VALUE_LE ";
940  desc += str(slot);
941  desc += ' ';
942  description_append(desc, limit);
943  return desc;
944 }
945 
947 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
948 {
949  LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
950  if (factor != 0.0)
951  qopt->inc_total_subqs();
952  const Xapian::Database::Internal & db = qopt->db;
953  const string & lb = db.get_value_lower_bound(slot);
954  if (lb.empty()) {
955  // This should only happen if there are no values in this slot (which
956  // could be because the backend just doesn't support values at all).
957  // If there were values in the slot, the backend should have a
958  // non-empty lower bound, even if it isn't a tight one.
959  AssertEq(db.get_value_freq(slot), 0);
960  RETURN(new EmptyPostList);
961  }
962  if (limit > db.get_value_upper_bound(slot)) {
963  RETURN(new EmptyPostList);
964  }
965  if (limit <= lb) {
966  // The range check isn't needed, but we do still need to consider
967  // which documents have a value set in this slot. If this value is
968  // set for all documents, we can replace it with the MatchAll
969  // postlist, which is especially efficient if there are no gaps in
970  // the docids.
971  if (db.get_value_freq(slot) == db.get_doccount()) {
972  RETURN(db.open_post_list(string()));
973  }
974  }
975  RETURN(new ValueGePostList(&db, slot, limit));
976 }
977 
978 void
979 QueryValueGE::serialise(string & result) const
980 {
981  if (slot < 15) {
982  result += static_cast<char>(0x20 | 0x10 | slot);
983  } else {
984  result += static_cast<char>(0x20 | 0x10 | 15);
985  result += encode_length(slot - 15);
986  }
987  result += encode_length(limit.size());
988  result += limit;
989 }
990 
991 Query::op
993 {
994  return Query::OP_VALUE_GE;
995 }
996 
997 string
999 {
1000  string desc = "VALUE_GE ";
1001  desc += str(slot);
1002  desc += ' ';
1003  description_append(desc, limit);
1004  return desc;
1005 }
1006 
1008 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
1009 {
1010  LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
1011  Query::op op = combiner;
1012  double or_factor = 0.0;
1013  if (factor == 0.0) {
1014  // If we have a factor of 0, we don't care about the weights, so
1015  // we're just like a normal OR query.
1016  op = Query::OP_OR;
1017  } else if (op != Query::OP_SYNONYM) {
1018  or_factor = factor;
1019  }
1020 
1021  bool old_in_synonym = qopt->in_synonym;
1022  if (!old_in_synonym) {
1023  qopt->in_synonym = (op == Query::OP_SYNONYM);
1024  }
1025 
1026  OrContext ctx(qopt, 0);
1027  AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
1028  Xapian::termcount expansions_left = max_expansion;
1029  // If there's no expansion limit, set expansions_left to the maximum
1030  // value Xapian::termcount can hold.
1031  if (expansions_left == 0)
1032  --expansions_left;
1033  while (true) {
1034  t->next();
1035  if (t->at_end())
1036  break;
1038  if (expansions_left-- == 0) {
1039  if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
1040  break;
1041  string msg("Wildcard ");
1042  msg += pattern;
1043  msg += "* expands to more than ";
1044  msg += str(max_expansion);
1045  msg += " terms";
1046  throw Xapian::WildcardError(msg);
1047  }
1048  }
1049  const string & term = t->get_termname();
1050  ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
1051  }
1052 
1054  // FIXME: open_lazy_post_list() results in the term getting registered
1055  // for stats, so we still incur an avoidable cost from the full
1056  // expansion size of the wildcard, which is most likely to be visible
1057  // with the remote backend. Perhaps we should split creating the lazy
1058  // postlist from registering the term for stats.
1059  if (ctx.size() > max_expansion)
1060  ctx.select_most_frequent(max_expansion);
1061  }
1062 
1063  if (factor != 0.0) {
1064  if (op != Query::OP_SYNONYM) {
1065  qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1066  } else {
1067  qopt->inc_total_subqs();
1068  }
1069  }
1070 
1071  qopt->in_synonym = old_in_synonym;
1072 
1073  if (ctx.empty())
1074  RETURN(new EmptyPostList);
1075 
1076  if (op == Query::OP_MAX)
1077  RETURN(ctx.postlist_max());
1078 
1079  PostList * pl = ctx.postlist();
1080  if (op == Query::OP_OR)
1081  RETURN(pl);
1082 
1083  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1084  // SynonymPostList, which supplies the weights.
1085  //
1086  // We know the subqueries from a wildcard expansion are wdf-disjoint
1087  // (i.e. each wdf from the document contributes at most itself to the
1088  // wdf of the subquery).
1089  RETURN(qopt->make_synonym_postlist(pl, factor, true));
1090 }
1091 
1092 termcount
1094 {
1095  // We currently assume wqf is 1 for calculating the synonym's weight
1096  // since conceptually the synonym is one "virtual" term. If we were
1097  // to combine multiple occurrences of the same synonym expansion into
1098  // a single instance with wqf set, we would want to track the wqf.
1099  return 1;
1100 }
1101 
1102 void
1103 QueryWildcard::serialise(string & result) const
1104 {
1105  result += static_cast<char>(0x0b);
1106  result += encode_length(max_expansion);
1107  result += static_cast<unsigned char>(max_type);
1108  result += static_cast<unsigned char>(combiner);
1109  result += encode_length(pattern.size());
1110  result += pattern;
1111 }
1112 
1113 Query::op
1115 {
1116  return Query::OP_WILDCARD;
1117 }
1118 
1119 string
1121 {
1122  string desc = "WILDCARD ";
1123  switch (combiner) {
1124  case Query::OP_SYNONYM:
1125  desc += "SYNONYM ";
1126  break;
1127  case Query::OP_MAX:
1128  desc += "MAX ";
1129  break;
1130  case Query::OP_OR:
1131  desc += "OR ";
1132  break;
1133  default:
1134  desc += "BAD ";
1135  break;
1136  }
1137  description_append(desc, pattern);
1138  return desc;
1139 }
1140 
1143 {
1144  // Sum results from all subqueries.
1145  Xapian::termcount result = 0;
1146  QueryVector::const_iterator i;
1147  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1148  // MatchNothing subqueries should have been removed by done(), but we
1149  // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1150  // segfault anyway.
1151  result += (*i).internal->get_length();
1152  }
1153  return result;
1154 }
1155 
1156 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1157 #define MISC(X) static_cast<unsigned char>(X)
1158 void
1159 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1160 {
1161  static const unsigned char first_byte[] = {
1162  MULTIWAY(0), // OP_AND
1163  MULTIWAY(1), // OP_OR
1164  MULTIWAY(2), // OP_AND_NOT
1165  MULTIWAY(3), // OP_XOR
1166  MULTIWAY(4), // OP_AND_MAYBE
1167  MULTIWAY(5), // OP_FILTER
1168  MULTIWAY(14), // OP_NEAR
1169  MULTIWAY(15), // OP_PHRASE
1170  0, // OP_VALUE_RANGE
1171  MISC(3), // OP_SCALE_WEIGHT
1172  MULTIWAY(13), // OP_ELITE_SET
1173  0, // OP_VALUE_GE
1174  0, // OP_VALUE_LE
1175  MULTIWAY(6), // OP_SYNONYM
1176  MULTIWAY(7) // OP_MAX
1177  };
1178  Xapian::Query::op op_ = get_op();
1179  AssertRel(size_t(op_),<,sizeof(first_byte));
1180  unsigned char ch = first_byte[op_];
1181  if (ch & 0x80) {
1182  // Multi-way operator.
1183  if (subqueries.size() < 8)
1184  ch |= subqueries.size();
1185  result += ch;
1186  if (subqueries.size() >= 8)
1187  result += encode_length(subqueries.size() - 8);
1188  if (ch >= MULTIWAY(13))
1189  result += encode_length(parameter);
1190  } else {
1191  result += ch;
1192  }
1193 
1194  QueryVector::const_iterator i;
1195  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1196  // MatchNothing subqueries should have been removed by done().
1197  Assert((*i).internal.get());
1198  (*i).internal->serialise(result);
1199  }
1200 
1201  // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1202  // appended next by an overloaded serialise() method in the subclass.
1203 }
1204 
1205 void
1206 QueryBranch::serialise(string & result) const
1207 {
1208  QueryBranch::serialise_(result);
1209 }
1210 
1211 void
1212 QueryNear::serialise(string & result) const
1213 {
1214  // FIXME: window - subqueries.size() ?
1215  QueryBranch::serialise_(result, window);
1216 }
1217 
1218 void
1219 QueryPhrase::serialise(string & result) const
1220 {
1221  // FIXME: window - subqueries.size() ?
1222  QueryBranch::serialise_(result, window);
1223 }
1224 
1225 void
1226 QueryEliteSet::serialise(string & result) const
1227 {
1228  // FIXME: set_size - subqueries.size() ?
1229  QueryBranch::serialise_(result, set_size);
1230 }
1231 
1232 void
1233 QueryBranch::gather_terms(void * void_terms) const
1234 {
1235  // Gather results from all subqueries.
1236  QueryVector::const_iterator i;
1237  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1238  // MatchNothing subqueries should have been removed by done().
1239  Assert((*i).internal.get());
1240  (*i).internal->gather_terms(void_terms);
1241  }
1242 }
1243 
1244 void
1246  Xapian::termcount elite_set_size, size_t first) const
1247 {
1248  LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1249 
1250  // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1251  // for AND-like operations.
1252 
1253  // OP_SYNONYM with a single subquery is only simplified by
1254  // QuerySynonym::done() if the single subquery is a term or MatchAll.
1255  Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1256 
1257  size_t size_before = ctx.size();
1258  QueryVector::const_iterator q;
1259  for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1260  // MatchNothing subqueries should have been removed by done().
1261  Assert((*q).internal.get());
1262  (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1263  }
1264 
1265  size_t out_of = ctx.size() - size_before;
1266  if (elite_set_size && elite_set_size < out_of) {
1267  ctx.select_elite_set(elite_set_size, out_of);
1268  // FIXME: This isn't quite right as we flatten ORs under the ELITE_SET
1269  // and then pick from amongst all the subqueries. Consider:
1270  //
1271  // Query subqs[] = {q1 | q2, q3 | q4};
1272  // Query q(OP_ELITE_SET, begin(subqs), end(subqs), 1);
1273  //
1274  // Here q should be either q1 | q2 or q3 | q4, but actually it'll be
1275  // just one of q1 or q2 or q3 or q4 (assuming those aren't themselves
1276  // OP_OR or OP_OR-like queries).
1277  }
1278 }
1279 
1280 PostList *
1281 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1282 {
1283  LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1284  OrContext ctx(qopt, subqueries.size());
1285  if (factor == 0.0) {
1286  // If we have a factor of 0, we don't care about the weights, so
1287  // we're just like a normal OR query.
1288  do_or_like(ctx, qopt, 0.0);
1289  return ctx.postlist();
1290  }
1291 
1292  bool old_in_synonym = qopt->in_synonym;
1293  qopt->in_synonym = true;
1294  do_or_like(ctx, qopt, 0.0);
1295  PostList * pl = ctx.postlist();
1296  qopt->in_synonym = old_in_synonym;
1297 
1298  bool wdf_disjoint = false;
1299  Assert(!subqueries.empty());
1300  auto type = (*subqueries.begin()).get_type();
1301  if (type == Query::OP_WILDCARD) {
1302  // Detect common easy case where all subqueries are OP_WILDCARD whose
1303  // constant prefixes form a prefix-free set.
1304  wdf_disjoint = true;
1305  vector<string> prefixes;
1306  for (auto&& q : subqueries) {
1307  if (q.get_type() != Query::OP_WILDCARD) {
1308  wdf_disjoint = false;
1309  break;
1310  }
1311  auto qw = static_cast<const QueryWildcard*>(q.internal.get());
1312  prefixes.push_back(qw->get_pattern());
1313  }
1314 
1315  if (wdf_disjoint) {
1316  sort(prefixes.begin(), prefixes.end());
1317  const string* prev = nullptr;
1318  for (const auto& i : prefixes) {
1319  if (prev) {
1320  if (startswith(i, *prev)) {
1321  wdf_disjoint = false;
1322  break;
1323  }
1324  }
1325  prev = &i;
1326  }
1327  }
1328  } else if (type == Query::LEAF_TERM) {
1329  // Detect common easy case where all subqueries are terms, none of
1330  // which are the same.
1331  wdf_disjoint = true;
1332  unordered_set<string> terms;
1333  for (auto&& q : subqueries) {
1334  if (q.get_type() != Query::LEAF_TERM) {
1335  wdf_disjoint = false;
1336  break;
1337  }
1338  auto qt = static_cast<const QueryTerm*>(q.internal.get());
1339  if (!terms.insert(qt->get_term()).second) {
1340  wdf_disjoint = false;
1341  break;
1342  }
1343  }
1344  }
1345 
1346  // We currently assume wqf is 1 for calculating the synonym's weight
1347  // since conceptually the synonym is one "virtual" term. If we were
1348  // to combine multiple occurrences of the same synonym expansion into
1349  // a single instance with wqf set, we would want to track the wqf.
1350 
1351  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1352  // SynonymPostList, which supplies the weights.
1353  RETURN(qopt->make_synonym_postlist(pl, factor, wdf_disjoint));
1354 }
1355 
1356 PostList *
1357 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1358 {
1359  LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1360  OrContext ctx(qopt, subqueries.size());
1361  do_or_like(ctx, qopt, factor);
1362  if (factor == 0.0) {
1363  // If we have a factor of 0, we don't care about the weights, so
1364  // we're just like a normal OR query.
1365  RETURN(ctx.postlist());
1366  }
1367 
1368  // We currently assume wqf is 1 for calculating the OP_MAX's weight
1369  // since conceptually the OP_MAX is one "virtual" term. If we were
1370  // to combine multiple occurrences of the same OP_MAX expansion into
1371  // a single instance with wqf set, we would want to track the wqf.
1372  RETURN(ctx.postlist_max());
1373 }
1374 
1377 {
1378  return get_op();
1379 }
1380 
1381 size_t
1383 {
1384  return subqueries.size();
1385 }
1386 
1387 const Query
1389 {
1390  return subqueries[n];
1391 }
1392 
1393 const string
1395  Xapian::termcount parameter) const
1396 {
1397  string desc = "(";
1398  QueryVector::const_iterator i;
1399  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1400  if (desc.size() > 1) {
1401  desc += op;
1402  if (parameter) {
1403  desc += str(parameter);
1404  desc += ' ';
1405  }
1406  }
1407  Assert((*i).internal.get());
1408  // MatchNothing subqueries should have been removed by done(), and we
1409  // shouldn't get called before done() is, since that happens at the
1410  // end of the Xapian::Query constructor.
1411  desc += (*i).internal->get_description();
1412  }
1413  desc += ')';
1414  return desc;
1415 }
1416 
1419 {
1420  // If window size not specified, default it.
1421  if (window == 0)
1422  window = subqueries.size();
1423  return QueryAndLike::done();
1424 }
1425 
1426 void
1427 QueryScaleWeight::gather_terms(void * void_terms) const
1428 {
1429  subquery.internal->gather_terms(void_terms);
1430 }
1431 
1432 void QueryTerm::serialise(string & result) const
1433 {
1434  size_t len = term.size();
1435  if (len == 0) {
1436  if (wqf == 1 && pos == 0) {
1437  // Query::MatchAll
1438  result += '\x0f';
1439  } else {
1440  // Weird mutant versions of MatchAll
1441  result += '\x0e';
1442  result += encode_length(wqf);
1443  result += encode_length(pos);
1444  }
1445  } else if (wqf == 1) {
1446  if (pos == 0) {
1447  // Single occurrence free-text term without position set.
1448  if (len >= 16) {
1449  result += static_cast<char>(0x40 | 0x10);
1450  result += encode_length(term.size() - 16);
1451  } else {
1452  result += static_cast<char>(0x40 | 0x10 | len);
1453  }
1454  result += term;
1455  } else {
1456  // Single occurrence free-text term with position set.
1457  if (len >= 16) {
1458  result += static_cast<char>(0x40 | 0x20);
1459  result += encode_length(term.size() - 16);
1460  } else {
1461  result += static_cast<char>(0x40 | 0x20 | len);
1462  }
1463  result += term;
1464  result += encode_length(pos);
1465  }
1466  } else if (wqf > 1 || pos > 0) {
1467  // General case.
1468  if (len >= 16) {
1469  result += static_cast<char>(0x40 | 0x30);
1470  result += encode_length(term.size() - 16);
1471  } else if (len) {
1472  result += static_cast<char>(0x40 | 0x30 | len);
1473  }
1474  result += term;
1475  result += encode_length(wqf);
1476  result += encode_length(pos);
1477  } else {
1478  // Typical boolean term.
1479  AssertEq(wqf, 0);
1480  AssertEq(pos, 0);
1481  if (len >= 16) {
1482  result += static_cast<char>(0x40);
1483  result += encode_length(term.size() - 16);
1484  } else {
1485  result += static_cast<char>(0x40 | len);
1486  }
1487  result += term;
1488  }
1489 }
1490 
1491 void QueryPostingSource::serialise(string & result) const
1492 {
1493  result += static_cast<char>(0x0c);
1494 
1495  const string & n = source->name();
1496  result += encode_length(n.size());
1497  result += n;
1498 
1499  const string & s = source->serialise();
1500  result += encode_length(s.size());
1501  result += s;
1502 }
1503 
1504 void QueryScaleWeight::serialise(string & result) const
1505 {
1506  Assert(subquery.internal.get());
1507  result += '\x0d';
1508  result += serialise_double(scale_factor);
1509  subquery.internal->serialise(result);
1510 }
1511 
1512 void
1514 {
1515  // If the AndLike is already MatchNothing, do nothing.
1516  if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1517  return;
1518  // If we're adding MatchNothing, discard any previous subqueries.
1519  if (subquery.internal.get() == NULL)
1520  subqueries.clear();
1521  subqueries.push_back(subquery);
1522 }
1523 
1526 {
1527  // Empty AndLike gives MatchNothing.
1528  if (subqueries.empty())
1529  return NULL;
1530  // We handle any subquery being MatchNothing in add_subquery() by leaving
1531  // a single MatchNothing subquery, and so this check results in AndLike
1532  // giving MatchNothing.
1533  if (subqueries.size() == 1)
1534  return subqueries[0].internal.get();
1535  return this;
1536 }
1537 
1539 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1540 {
1541  LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1542  AndContext ctx(qopt, subqueries.size());
1543  postlist_sub_and_like(ctx, qopt, factor);
1544  RETURN(ctx.postlist());
1545 }
1546 
1547 void
1549 {
1550  QueryVector::const_iterator i;
1551  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1552  // MatchNothing subqueries should have been removed by done().
1553  Assert((*i).internal.get());
1554  (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1555  }
1556 }
1557 
1558 void
1560 {
1561  // Drop any subqueries which are MatchNothing.
1562  if (subquery.internal.get() != NULL)
1563  subqueries.push_back(subquery);
1564 }
1565 
1568 {
1569  // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1570  // subqueries which are MatchNothing.
1571  if (subqueries.empty())
1572  return NULL;
1573  if (subqueries.size() == 1)
1574  return subqueries[0].internal.get();
1575  return this;
1576 }
1577 
1578 void
1580 {
1581  if (!subqueries.empty()) {
1582  // We're adding the 2nd or subsequent subquery, so this subquery is
1583  // negated.
1584  if (subqueries[0].internal.get() == NULL) {
1585  // The left side is already MatchNothing so drop any right side.
1586  //
1587  // MatchNothing AND_NOT X == MatchNothing
1588  return;
1589  }
1590  if (subquery.internal.get() == NULL) {
1591  // Drop MatchNothing on the right of AndNot.
1592  //
1593  // X AND_NOT MatchNothing == X
1594  return;
1595  }
1596  if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
1597  // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1598  // AndNot as no weight is taken from them.
1599  subqueries.push_back(subquery.get_subquery(0));
1600  // The Query constructor for OP_SCALE_WEIGHT constructor should
1601  // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1602  Assert(subquery.get_subquery(0).internal.get() != NULL);
1603  return;
1604  }
1605  }
1606  subqueries.push_back(subquery);
1607 }
1608 
1611 {
1612  // Any MatchNothing right subqueries get discarded by add_subquery() - if
1613  // that leaves just the left subquery, return that.
1614  //
1615  // If left subquery is MatchNothing, then add_subquery() discards all right
1616  // subqueries, so this check also gives MatchNothing for this case.
1617  if (subqueries.size() == 1)
1618  return subqueries[0].internal.get();
1619  return this;
1620 }
1621 
1622 void
1624 {
1625  // If the left side of AndMaybe is already MatchNothing, do nothing.
1626  if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1627  return;
1628  // Drop any 2nd or subsequent subqueries which are MatchNothing.
1629  if (subquery.internal.get() != NULL || subqueries.empty())
1630  subqueries.push_back(subquery);
1631 }
1632 
1635 {
1636  // Any MatchNothing right subqueries get discarded by add_subquery() - if
1637  // that leaves just the left subquery, return that.
1638  //
1639  // If left subquery is MatchNothing, then add_subquery() discards all right
1640  // subqueries, so this check also gives MatchNothing for this case.
1641  if (subqueries.size() == 1)
1642  return subqueries[0].internal.get();
1643  return this;
1644 }
1645 
1647 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1648 {
1649  LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1650  OrContext ctx(qopt, subqueries.size());
1651  do_or_like(ctx, qopt, factor);
1652  RETURN(ctx.postlist());
1653 }
1654 
1655 void
1656 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1657 {
1658  do_or_like(ctx, qopt, factor);
1659 }
1660 
1662 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1663 {
1664  LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1665  AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1666  OrContext ctx(qopt, subqueries.size() - 1);
1667  do_or_like(ctx, qopt, 0.0, 0, 1);
1668  AutoPtr<PostList> r(ctx.postlist());
1669  RETURN(new AndNotPostList(l.release(), r.release(),
1670  qopt->matcher, qopt->db_size));
1671 }
1672 
1673 void
1675  QueryOptimiser* qopt,
1676  double factor) const
1677 {
1678  subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor);
1679  do_or_like(ctx.get_not_ctx(subqueries.size() - 1), qopt, 0.0, 0, 1);
1680 }
1681 
1683 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1684 {
1685  LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1686  XorContext ctx(qopt, subqueries.size());
1687  postlist_sub_xor(ctx, qopt, factor);
1688  RETURN(ctx.postlist());
1689 }
1690 
1691 void
1692 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1693 {
1694  QueryVector::const_iterator i;
1695  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1696  // MatchNothing subqueries should have been removed by done().
1697  Assert((*i).internal.get());
1698  (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1699  }
1700 }
1701 
1703 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1704 {
1705  LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1706  // FIXME: Combine and-like side with and-like stuff above.
1707  AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1708  if (factor == 0.0) {
1709  // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1710  RETURN(l.release());
1711  }
1712  OrContext ctx(qopt, subqueries.size() - 1);
1713  do_or_like(ctx, qopt, factor, 0, 1);
1714  AutoPtr<PostList> r(ctx.postlist());
1715  RETURN(new AndMaybePostList(l.release(), r.release(),
1716  qopt->matcher, qopt->db_size));
1717 }
1718 
1719 void
1721  QueryOptimiser* qopt,
1722  double factor) const
1723 {
1724  subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor);
1725  do_or_like(ctx.get_maybe_ctx(subqueries.size() - 1), qopt, factor, 0, 1);
1726 }
1727 
1729 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1730 {
1731  LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1732  AndContext ctx(qopt, subqueries.size());
1733  for (const auto& subq : subqueries) {
1734  // MatchNothing subqueries should have been removed by done().
1735  Assert(subq.internal.get());
1736  subq.internal->postlist_sub_and_like(ctx, qopt, factor);
1737  // Second and subsequent subqueries are unweighted.
1738  factor = 0.0;
1739  }
1740  RETURN(ctx.postlist());
1741 }
1742 
1743 void
1745 {
1746  QueryVector::const_iterator i;
1747  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1748  // MatchNothing subqueries should have been removed by done().
1749  Assert((*i).internal.get());
1750  (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1751  // Second and subsequent subqueries are unweighted.
1752  factor = 0.0;
1753  }
1754 }
1755 
1756 void
1758 {
1759  if (!qopt->full_db_has_positions()) {
1760  // No positional data anywhere, so just handle as AND.
1761  QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1762  return;
1763  }
1764 
1765  if (!qopt->db.has_positions()) {
1766  // No positions in this subdatabase so this matches nothing, which
1767  // means the whole andcontext matches nothing.
1768  //
1769  // Bailing out here means we don't recurse deeper and that means we
1770  // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1771  // the phrase, but at least one shard will count them, and the matcher
1772  // takes the highest answer (since 1.4.6).
1773  ctx.shrink(0);
1774  return;
1775  }
1776 
1777  bool old_need_positions = qopt->need_positions;
1778  qopt->need_positions = true;
1779 
1780  QueryVector::const_iterator i;
1781  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1782  // MatchNothing subqueries should have been removed by done().
1783  Assert((*i).internal.get());
1784  bool is_term = ((*i).internal->get_type() == Query::LEAF_TERM);
1785  PostList* pl = (*i).internal->postlist(qopt, factor);
1786  if (!is_term)
1787  pl = new OrPosPostList(pl);
1788  ctx.add_postlist(pl);
1789  }
1790  // Record the positional filter to apply higher up the tree.
1791  ctx.add_pos_filter(op, subqueries.size(), window);
1792 
1793  qopt->need_positions = old_need_positions;
1794 }
1795 
1796 void
1798 {
1800 }
1801 
1802 void
1804 {
1806 }
1807 
1809 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1810 {
1811  LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1812  OrContext ctx(qopt, subqueries.size());
1813  do_or_like(ctx, qopt, factor, set_size);
1814  RETURN(ctx.postlist());
1815 }
1816 
1817 void
1819 {
1820  do_or_like(ctx, qopt, factor, set_size);
1821 }
1822 
1824 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1825 {
1826  LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1827  // Save and restore total_subqs so we only add one for the whole
1828  // OP_SYNONYM subquery (or none if we're not weighted).
1829  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1830  if (factor != 0.0)
1831  ++save_total_subqs;
1832  PostList * pl = do_synonym(qopt, factor);
1833  qopt->set_total_subqs(save_total_subqs);
1834  RETURN(pl);
1835 }
1836 
1839 {
1840  // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1841  // subqueries which are MatchNothing.
1842  if (subqueries.empty())
1843  return NULL;
1844  if (subqueries.size() == 1) {
1845  Query::op sub_type = subqueries[0].get_type();
1846  // Synonym of a single subquery should only be simplified if that
1847  // subquery is a term (or MatchAll), or if it's also OP_SYNONYM. Note
1848  // that MatchNothing subqueries are dropped, so we'd never get here
1849  // with a single MatchNothing subquery.
1850  if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1851  sub_type == Query::OP_SYNONYM) {
1852  return subqueries[0].internal.get();
1853  }
1854  if (sub_type == Query::OP_WILDCARD) {
1855  auto q = static_cast<QueryWildcard*>(subqueries[0].internal.get());
1856  // SYNONYM over WILDCARD X -> WILDCARD SYNONYM for any combiner X.
1858  }
1859  }
1860  return this;
1861 }
1862 
1864 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1865 {
1866  LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1867  // Save and restore total_subqs so we only add one for the whole
1868  // OP_MAX subquery (or none if we're not weighted).
1869  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1870  if (factor != 0.0)
1871  ++save_total_subqs;
1872  PostList * pl = do_max(qopt, factor);
1873  qopt->set_total_subqs(save_total_subqs);
1874  RETURN(pl);
1875 }
1876 
1879 {
1880  return Xapian::Query::OP_AND;
1881 }
1882 
1885 {
1886  return Xapian::Query::OP_OR;
1887 }
1888 
1891 {
1893 }
1894 
1897 {
1898  return Xapian::Query::OP_XOR;
1899 }
1900 
1903 {
1905 }
1906 
1909 {
1910  return Xapian::Query::OP_FILTER;
1911 }
1912 
1915 {
1916  return Xapian::Query::OP_NEAR;
1917 }
1918 
1921 {
1922  return Xapian::Query::OP_PHRASE;
1923 }
1924 
1927 {
1929 }
1930 
1933 {
1935 }
1936 
1939 {
1940  return Xapian::Query::OP_MAX;
1941 }
1942 
1945 {
1947 }
1948 
1949 string
1951 {
1952  return get_description_helper(" AND ");
1953 }
1954 
1955 string
1957 {
1958  return get_description_helper(" OR ");
1959 }
1960 
1961 string
1963 {
1964  return get_description_helper(" AND_NOT ");
1965 }
1966 
1967 string
1969 {
1970  return get_description_helper(" XOR ");
1971 }
1972 
1973 string
1975 {
1976  return get_description_helper(" AND_MAYBE ");
1977 }
1978 
1979 string
1981 {
1982  return get_description_helper(" FILTER ");
1983 }
1984 
1985 string
1987 {
1988  return get_description_helper(" NEAR ", window);
1989 }
1990 
1991 string
1993 {
1994  return get_description_helper(" PHRASE ", window);
1995 }
1996 
1997 string
1999 {
2000  return get_description_helper(" ELITE_SET ", set_size);
2001 }
2002 
2003 string
2005 {
2006  if (subqueries.size() == 1) {
2007  string d = "(SYNONYM ";
2008  d += subqueries[0].internal->get_description();
2009  d += ")";
2010  return d;
2011  }
2012  return get_description_helper(" SYNONYM ");
2013 }
2014 
2015 string
2017 {
2018  return get_description_helper(" MAX ");
2019 }
2020 
2023 {
2025 }
2026 
2029 {
2030  throw Xapian::InvalidOperationError("Query is invalid");
2031 }
2032 
2033 void
2034 QueryInvalid::serialise(std::string & result) const
2035 {
2036  result += static_cast<char>(0x00);
2037 }
2038 
2039 string
2041 {
2042  return "<INVALID>";
2043 }
2044 
2045 }
2046 }
Wrapper postlist providing positions for an OR.
Definition: orpospostlist.h:28
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
#define MISC(X)
OrContext(QueryOptimiser *qopt_, size_t reserve)
#define RETURN(A)
Definition: debuglog.h:482
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
#define Assert(COND)
Definition: omassert.h:122
vector< PostList * > pls
std::string get_description() const
N-way OR postlist with wt=max(wt_i).
Definition: maxpostlist.h:32
Xapian::doccount db_size
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Wildcard expansion.
Definition: query.h:255
Xapian::Query::op get_type() const
Abstract base class for postlists.
Definition: postlist.h:37
const Query get_subquery(size_t n) const
Read a top level subquery.
Definition: query.cc:226
XorContext(QueryOptimiser *qopt_, size_t reserve)
N-way XOR postlist.
N-way XOR postlist.
Xapian::Query::op get_op() const
Return docs containing terms forming a particular exact phrase.
virtual Xapian::doccount get_value_freq(Xapian::valueno slot) const =0
Return the frequency of a given value slot.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
length encoded as a string
#define AssertEq(A, B)
Definition: omassert.h:124
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
virtual void postlist_sub_and_like(Xapian::Internal::AndContext &ctx, QueryOptimiser *qopt, double factor) const
Merged postlist: items from one list, weights from both.
A postlist with weights modified by another postlist.
Postlist which matches an exact phrase using positional information.
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_total_subqs(Xapian::termcount n)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query internals.
Xapian::Query::op get_op() const
virtual termcount get_length() const
Match documents which an odd number of subqueries match.
Definition: query.h:107
#define AssertRel(A, REL, B)
Definition: omassert.h:123
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
A PostList which contains no entries.
Xapian::Query::op get_op() const
std::string get_description() const
A PostList which contains no entries.
Definition: emptypostlist.h:27
op
Query operators.
Definition: query.h:78
Base class for databases.
Definition: database.h:56
virtual const Query get_subquery(size_t n) const
Postlist which matches a phrase using positional information.
Wrapper postlist providing positions for an OR.
void serialise(std::string &result) const
void inc_total_subqs()
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
OrContext & get_not_ctx(size_t reserve)
WildcardError indicates an error expanding a wildcarded query.
Definition: error.h:1013
virtual void postlist_sub_xor(Xapian::Internal::XorContext &ctx, QueryOptimiser *qopt, double factor) const
PostList * do_synonym(QueryOptimiser *qopt, double factor) const
External sources of posting information.
QueryPostingSource(PostingSource *source_)
const std::string get_description_helper(const char *op, Xapian::termcount window=0) const
virtual void postlist_sub_or_like(Xapian::Internal::OrContext &ctx, QueryOptimiser *qopt, double factor) const
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
STL namespace.
Pick the maximum weight of any subquery.
Definition: query.h:249
A postlist comprising two postlists ORed together.
Definition: orpostlist.h:38
Convert types to std::string.
const Xapian::PostingSource * get_posting_source(const std::string &name) const
Get a posting source given a name.
Definition: registry.cc:286
virtual bool has_positions() const =0
Check whether this database contains any positional information.
void serialise(std::string &result) const
PostList * do_max(QueryOptimiser *qopt, double factor) const
AutoPtr< OrContext > not_ctx
virtual PostingSource * unserialise_with_registry(const std::string &serialised, const Registry &registry) const
Create object given string serialisation returned by serialise().
virtual LeafPostList * open_post_list(const string &tname) const =0
Open a posting list.
std::string encode_length(T len)
Encode a length as a variable-length string.
Definition: length.h:36
void gather_terms(void *void_terms) const
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:49
bool operator()(const PostList *a, const PostList *b) const
Order by descending get_termfreq_est().
Xapian::Query::op get_op() const
void select_most_frequent(size_t set_size)
Select the set_size postlists with the highest term frequency.
void serialise(std::string &result) const
void destroy_postlist(PostList *pl)
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
Abstract base class for leaf postlists.
MultiMatch * matcher
#define rare(COND)
Definition: config.h:543
std::string get_description() const
Return docs containing terms forming a particular phrase.
Xapian::Query::op get_type() const
void do_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, Xapian::termcount elite_set_size=0, size_t first=0) const
Xapian::Query API class.
void add_postlist(PostList *pl)
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
Class providing an operator which sorts postlists to select max or terms.
PostList * postlist(PostList *pl, const vector< PostList *> &pls) const
OR of two posting lists.
void serialise(std::string &result) const
virtual void gather_terms(void *void_terms) const
std::string get_description() const
Hierarchy of classes which Xapian can throw as exceptions.
std::string get_description() const
Xapian::Query::op get_op() const
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
const Query get_subquery(size_t n) const
void serialise_(std::string &result, Xapian::termcount parameter=0) const
functions to serialise and unserialise a double
Return items which are in A, unless they&#39;re in B.
virtual std::string get_value_upper_bound(Xapian::valueno slot) const =0
Get an upper bound on the values stored in the given value slot.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Postlist which matches terms occurring within a specified window.
Definition: nearpostlist.h:38
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Xapian::Internal::opt_intrusive_ptr< PostingSource > source
Definition: queryinternal.h:71
Limit OP_WILDCARD expansion to the most frequent terms.
Definition: query.h:307
std::string get_description() const
void description_append(std::string &desc, const std::string &s)
Definition: unittest.cc:100
Xapian::termcount get_total_subqs() const
virtual size_t get_num_subqueries() const
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
PostList * make_synonym_postlist(PostList *pl, double factor, bool wdf_disjoint)
Xapian::Query::op get_type() const
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
#define MULTIWAY(X)
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:276
Scale the weight contributed by a subquery.
Definition: query.h:166
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
std::string get_description() const
Value returned by get_type() for a PostingSource.
Definition: query.h:269
virtual Xapian::doccount get_termfreq_est() const =0
Get an estimate of the number of documents indexed by this term.
void add_subquery(const Xapian::Query &subquery)
std::string get_description() const
Registry for user subclasses.
Definition: registry.h:47
void postlist_sub_xor(XorContext &ctx, QueryOptimiser *qopt, double factor) const
void select_elite_set(size_t set_size, size_t out_of)
Select the best set_size postlists from the last out_of added.
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
Match only documents where a value slot is >= a given value.
Definition: query.h:223
void gather_terms(void *void_terms) const
Xapian::Query::op get_type() const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor) const
void serialise(std::string &result) const
virtual TermList * open_allterms(const string &prefix) const =0
Open an allterms list.
std::string get_description() const
bool operator()(const PostList *a, const PostList *b)
Return true if and only if a has a strictly greater termweight than b.
N-way AND postlist.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
void add_pos_filter(Query::op op_, size_t n_subqs, Xapian::termcount window)
Xapian::Query::op get_type() const
Return document ids matching a >= test on a specified doc value.
Match only documents where a value slot is within a given range.
Definition: query.h:158
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Xapian::Query::op get_op() const
Match only documents where a value slot is <= a given value.
Definition: query.h:231
std::string get_description() const
void serialise(std::string &result) const
Details passed around while building PostList tree from Query tree.
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor) const
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
Return docs containing terms within a specified window.
Construct an invalid query.
Definition: query.h:263
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_type() const
void serialise(std::string &result) const
LeafPostList * open_lazy_post_list(const std::string &term, Xapian::termcount wqf, double factor)
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
void serialise(std::string &result) const
void add_subquery(const Xapian::Query &subquery)
virtual std::string get_value_lower_bound(Xapian::valueno slot) const =0
Get a lower bound on the values stored in the given value slot.
virtual double get_maxweight() const =0
Return an upper bound on what get_weight() can return.
void shrink(size_t new_size)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
LeafPostList * open_post_list(const std::string &term, Xapian::termcount wqf, double factor)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
void serialise(std::string &result) const
Append a string to an object description, escaping invalid UTF-8.
Comparison functor which orders PostList* by descending get_termfreq_est().
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Match only documents which all subqueries match.
Definition: query.h:84
virtual Query::Internal * done()=0
Xapian::Query::op get_op() const
void serialise(std::string &result) const
std::string serialise_double(double v)
Serialise a double to a string.
void decode_length_and_check(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:112
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
const Xapian::Database::Internal & db
OrContext & get_maybe_ctx(size_t reserve)
std::string get_description() const
Match only documents where all subqueries match near each other.
Definition: query.h:140
static Query::Internal * unserialise(const char **p, const char *end, const Registry &reg)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
std::string get_description() const
Xapian::Query::op get_op() const
Xapian::Query::op get_op() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Value returned by get_type() for a term.
Definition: query.h:266
void gather_terms(void *void_terms) const
Return document ids from an external source.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
QueryScaleWeight(double factor, const Query &subquery_)
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
N-way AND postlist.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
void postlist_windowed(Xapian::Query::op op, AndContext &ctx, QueryOptimiser *qopt, double factor) const
bool full_db_has_positions() const
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
Various handy helpers which std::string really should provide.
Abstract base class for termlists.
PostingSource * release()
Start reference counting this object.
virtual Xapian::doccount get_doccount() const =0
Return the number of docs in this (sub) database.
Stop expanding when OP_WILDCARD reaches its expansion limit.
Definition: query.h:297
op get_type() const
Get the type of the top level of the query.
Definition: query.cc:212
void serialise(std::string &result) const
Xapian::Query::op get_op() const
A postlist generated by taking one postlist (the left-hand postlist), and removing any documents whic...
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
const Query get_subquery(size_t n) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Return document ids matching a range test on a specified doc value.
std::string get_description() const
Xapian::Query::op get_op() const
PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_, Xapian::termcount window_)
Various assertion macros.
Class representing a query.
Definition: query.h:46
N-way OR postlist with wt=max(wt_i)
Xapian::Query::op get_type() const
Xapian::Query::op get_op() const
list< PosFilter > pos_filters
Xapian::doccount shard_index
virtual void add_subquery(const Xapian::Query &subquery)=0
std::string get_description() const
AndContext(QueryOptimiser *qopt_, size_t reserve)
#define XAPIAN_NOEXCEPT
Definition: attributes.h:39
void decode_length(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:94
Xapian::Query::op get_type() const
Wrapper around standard unique_ptr template.
Xapian::Query::op get_type() const
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
AutoPtr< OrContext > maybe_ctx
void serialise(std::string &result) const
QueryWildcard * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
virtual Query::op get_type() const =0
std::string get_description() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const