xapian-core  1.4.25
queryinternal.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "queryinternal.h"
25 
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
29 
30 #include "leafpostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
36 #include "matcher/maxpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpospostlist.h"
41 #include "matcher/orpostlist.h"
42 #include "matcher/phrasepostlist.h"
43 #include "matcher/queryoptimiser.h"
46 #include "net/length.h"
47 #include "serialise-double.h"
48 #include "stringutils.h"
49 #include "termlist.h"
50 
51 #include "autoptr.h"
52 #include "debuglog.h"
53 #include "omassert.h"
54 #include "str.h"
56 
57 #include <algorithm>
58 #include <list>
59 #include <string>
60 #include <unordered_set>
61 #include <vector>
62 
63 using namespace std;
64 
68 
69 namespace Xapian {
70 
71 namespace Internal {
72 
78 struct CmpMaxOrTerms {
80  bool operator()(const PostList *a, const PostList *b) {
81 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
82  defined(__mc68000__) || defined(__mc68010__) || \
83  defined(__mc68020__) || defined(__mc68030__)
84  // On some architectures, most common of which is x86, floating point
85  // values are calculated and stored in registers with excess precision.
86  // If the two get_maxweight() calls below return identical values in a
87  // register, the excess precision may be dropped for one of them but
88  // not the other (e.g. because the compiler saves the first calculated
89  // weight to memory while calculating the second, then reloads it to
90  // compare). This leads to both a > b and b > a being true, which
91  // violates the antisymmetry property of the strict weak ordering
92  // required by nth_element(). This can have serious consequences (e.g.
93  // segfaults).
94  //
95  // Note that m68k only has excess precision in earlier models - 68040
96  // and later are OK:
97  // https://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
98  //
99  // To avoid this, we store each result in a volatile double prior to
100  // comparing them. This means that the result of this test should
101  // match that on other architectures with the same double format (which
102  // is desirable), and actually has less overhead than rounding both
103  // results to float (which is another approach which works).
104  volatile double a_max_wt = a->get_maxweight();
105  volatile double b_max_wt = b->get_maxweight();
106  return a_max_wt > b_max_wt;
107 #else
108  return a->get_maxweight() > b->get_maxweight();
109 #endif
110  }
111 };
112 
116  bool operator()(const PostList *a, const PostList *b) const {
117  return a->get_termfreq_est() > b->get_termfreq_est();
118  }
119 };
120 
121 class Context {
122  protected:
124 
125  vector<PostList*> pls;
126 
127  public:
128  Context(QueryOptimiser* qopt_, size_t reserve);
129 
130  ~Context();
131 
132  void add_postlist(PostList * pl) {
133  pls.push_back(pl);
134  }
135 
136  bool empty() const {
137  return pls.empty();
138  }
139 
140  size_t size() const {
141  return pls.size();
142  }
143 
144  void shrink(size_t new_size);
145 };
146 
147 Context::Context(QueryOptimiser* qopt_, size_t reserve)
148  : qopt(qopt_)
149 {
150  pls.reserve(reserve);
151 }
152 
153 void
154 Context::shrink(size_t new_size)
155 {
156  AssertRel(new_size, <=, pls.size());
157  if (new_size >= pls.size())
158  return;
159 
160  for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
161  qopt->destroy_postlist(*i);
162  }
163  pls.resize(new_size);
164 }
165 
167 {
168  shrink(0);
169 }
170 
171 class OrContext : public Context {
172  public:
173  OrContext(QueryOptimiser* qopt_, size_t reserve)
174  : Context(qopt_, reserve) { }
175 
177  void select_elite_set(size_t set_size, size_t out_of);
178 
180  void select_most_frequent(size_t set_size);
181 
182  PostList * postlist();
183  PostList * postlist_max();
184 };
185 
186 void
187 OrContext::select_elite_set(size_t set_size, size_t out_of)
188 {
189  // Call recalc_maxweight() as otherwise get_maxweight()
190  // may not be valid before next() or skip_to()
191  auto begin = pls.begin() + pls.size() - out_of;
192  for (auto i = begin; i != pls.end(); ++i) {
193  (*i)->recalc_maxweight();
194  }
195  nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
196  shrink(pls.size() - out_of + set_size);
197 }
198 
199 void
201 {
202  vector<PostList*>::iterator begin = pls.begin();
203  nth_element(begin, begin + set_size - 1, pls.end(),
205  shrink(set_size);
206 }
207 
208 PostList *
210 {
211  Assert(!pls.empty());
212 
213  if (pls.size() == 1) {
214  PostList * pl = pls[0];
215  pls.clear();
216  return pl;
217  }
218 
219  // Make postlists into a heap so that the postlist with the greatest term
220  // frequency is at the top of the heap.
221  make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
222 
223  // Now build a tree of binary OrPostList objects.
224  //
225  // The algorithm used to build the tree is like that used to build an
226  // optimal Huffman coding tree. If we called next() repeatedly, this
227  // arrangement would minimise the number of method calls. Generally we
228  // don't actually do that, but this arrangement is still likely to be a
229  // good one, and it does minimise the work in the worst case.
230  while (true) {
231  // We build the tree such that at each branch:
232  //
233  // l.get_termfreq_est() >= r.get_termfreq_est()
234  //
235  // We do this so that the OrPostList class can be optimised assuming
236  // that this is the case.
237  PostList * r = pls.front();
238  pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
239  pls.pop_back();
240  PostList * pl;
241  pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
242 
243  if (pls.size() == 1) {
244  pls.clear();
245  return pl;
246  }
247 
248  pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
249  pls.back() = pl;
250  push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
251  }
252 }
253 
254 PostList *
256 {
257  Assert(!pls.empty());
258 
259  if (pls.size() == 1) {
260  PostList * pl = pls[0];
261  pls.clear();
262  return pl;
263  }
264 
265  // Sort the postlists so that the postlist with the greatest term frequency
266  // is first.
267  sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
268 
269  PostList * pl;
270  pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
271 
272  pls.clear();
273  return pl;
274 }
275 
276 class XorContext : public Context {
277  public:
278  XorContext(QueryOptimiser* qopt_, size_t reserve)
279  : Context(qopt_, reserve) { }
280 
281  PostList * postlist();
282 };
283 
284 PostList *
286 {
287  Xapian::doccount db_size = qopt->db_size;
288  PostList * pl;
289  pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
290 
291  // Empty pls so our destructor doesn't delete them all!
292  pls.clear();
293  return pl;
294 }
295 
296 class AndContext : public Context {
297  class PosFilter {
299 
301  size_t begin, end;
302 
304 
305  public:
306  PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
307  Xapian::termcount window_)
308  : op_(op__), begin(begin_), end(end_), window(window_) { }
309 
310  PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
311  };
312 
313  list<PosFilter> pos_filters;
314 
315  AutoPtr<OrContext> not_ctx;
316 
317  AutoPtr<OrContext> maybe_ctx;
318 
319  public:
320  AndContext(QueryOptimiser* qopt_, size_t reserve)
321  : Context(qopt_, reserve) { }
322 
323  void add_pos_filter(Query::op op_,
324  size_t n_subqs,
325  Xapian::termcount window);
326 
327  OrContext& get_not_ctx(size_t reserve) {
328  if (!not_ctx) {
329  not_ctx.reset(new OrContext(qopt, reserve));
330  }
331  return *not_ctx;
332  }
333 
334  OrContext& get_maybe_ctx(size_t reserve) {
335  if (!maybe_ctx) {
336  maybe_ctx.reset(new OrContext(qopt, reserve));
337  }
338  return *maybe_ctx;
339  }
340 
341  PostList * postlist();
342 };
343 
344 PostList *
345 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
346 try {
347  vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
348  vector<PostList *>::const_iterator terms_end = pls.begin() + end;
349 
350  if (op_ == Xapian::Query::OP_NEAR) {
351  pl = new NearPostList(pl, window, terms_begin, terms_end);
352  } else if (window == end - begin) {
354  pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
355  } else {
357  pl = new PhrasePostList(pl, window, terms_begin, terms_end);
358  }
359  return pl;
360 } catch (...) {
361  delete pl;
362  throw;
363 }
364 
365 void
367  size_t n_subqs,
368  Xapian::termcount window)
369 {
370  Assert(n_subqs > 1);
371  size_t end = pls.size();
372  size_t begin = end - n_subqs;
373  pos_filters.push_back(PosFilter(op_, begin, end, window));
374 }
375 
376 PostList *
378 {
379  if (pls.empty()) {
380  // This case only happens if this sub-database has no positional data
381  // (but another sub-database does).
382  Assert(pos_filters.empty());
383  return new EmptyPostList;
384  }
385 
386  auto matcher = qopt->matcher;
387  auto db_size = qopt->db_size;
388 
389  AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
390  matcher, db_size));
391 
392  if (not_ctx) {
393  PostList* rhs = not_ctx->postlist();
394  pl.reset(new AndNotPostList(pl.release(), rhs, matcher, db_size));
395  not_ctx.reset();
396  }
397 
398  // Sort the positional filters to try to apply them in an efficient order.
399  // FIXME: We need to figure out what that is! Try applying lowest cf/tf
400  // first?
401 
402  // Apply any positional filters.
403  for (const PosFilter& filter : pos_filters) {
404  pl.reset(filter.postlist(pl.release(), pls));
405  }
406 
407  // Empty pls so our destructor doesn't delete them all!
408  pls.clear();
409 
410  if (maybe_ctx) {
411  PostList* rhs = maybe_ctx->postlist();
412  pl.reset(new AndMaybePostList(pl.release(), rhs, matcher, db_size));
413  maybe_ctx.reset();
414  }
415 
416  return pl.release();
417 }
418 
419 }
420 
422 
423 size_t
425 {
426  return 0;
427 }
428 
429 const Query
431 {
432  throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
433 }
434 
435 void
437 {
438 }
439 
442 {
443  return 0;
444 }
445 
447 Query::Internal::unserialise(const char ** p, const char * end,
448  const Registry & reg)
449 {
450  if (*p == end)
451  return NULL;
452  unsigned char ch = *(*p)++;
453  switch (ch >> 5) {
454  case 4: case 5: case 6: case 7: {
455  // Multi-way branch
456  //
457  // 1ccccnnn where:
458  // nnn -> n_subqs (0 means encoded value follows)
459  // cccc -> code (which OP_XXX)
460  size_t n_subqs = ch & 0x07;
461  if (n_subqs == 0) {
462  decode_length(p, end, n_subqs);
463  n_subqs += 8;
464  }
465  unsigned char code = (ch >> 3) & 0x0f;
466  Xapian::termcount parameter = 0;
467  if (code >= 13)
468  decode_length(p, end, parameter);
470  switch (code) {
471  case 0: // OP_AND
472  result = new Xapian::Internal::QueryAnd(n_subqs);
473  break;
474  case 1: // OP_OR
475  result = new Xapian::Internal::QueryOr(n_subqs);
476  break;
477  case 2: // OP_AND_NOT
478  result = new Xapian::Internal::QueryAndNot(n_subqs);
479  break;
480  case 3: // OP_XOR
481  result = new Xapian::Internal::QueryXor(n_subqs);
482  break;
483  case 4: // OP_AND_MAYBE
484  result = new Xapian::Internal::QueryAndMaybe(n_subqs);
485  break;
486  case 5: // OP_FILTER
487  result = new Xapian::Internal::QueryFilter(n_subqs);
488  break;
489  case 6: // OP_SYNONYM
490  result = new Xapian::Internal::QuerySynonym(n_subqs);
491  break;
492  case 7: // OP_MAX
493  result = new Xapian::Internal::QueryMax(n_subqs);
494  break;
495  case 13: // OP_ELITE_SET
496  result = new Xapian::Internal::QueryEliteSet(n_subqs,
497  parameter);
498  break;
499  case 14: // OP_NEAR
500  result = new Xapian::Internal::QueryNear(n_subqs,
501  parameter);
502  break;
503  case 15: // OP_PHRASE
504  result = new Xapian::Internal::QueryPhrase(n_subqs,
505  parameter);
506  break;
507  default:
508  // 8 to 12 are currently unused.
509  throw SerialisationError("Unknown multi-way branch Query operator");
510  }
511  do {
512  result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
513  } while (--n_subqs);
514  result->done();
515  return result;
516  }
517  case 2: case 3: { // Term
518  // Term
519  //
520  // 01ccLLLL where:
521  // LLLL -> length (0 means encoded value follows)
522  // cc -> code:
523  // 0: wqf = 0; pos = 0
524  // 1: wqf = 1; pos = 0
525  // 2: wqf = 1; pos -> encoded value follows
526  // 3: wqf -> encoded value follows; pos -> encoded value follows
527  size_t len = ch & 0x0f;
528  if (len == 0) {
529  decode_length(p, end, len);
530  len += 16;
531  }
532  if (size_t(end - *p) < len)
533  throw SerialisationError("Not enough data");
534  string term(*p, len);
535  *p += len;
536 
537  int code = ((ch >> 4) & 0x03);
538 
539  Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
540  if (code == 3)
541  decode_length(p, end, wqf);
542 
543  Xapian::termpos pos = 0;
544  if (code >= 2)
545  decode_length(p, end, pos);
546 
547  return new Xapian::Internal::QueryTerm(term, wqf, pos);
548  }
549  case 1: {
550  // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
551  //
552  // 001tssss where:
553  // ssss -> slot number (15 means encoded value follows)
554  // t -> op:
555  // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
556  // 1: OP_VALUE_GE
557  Xapian::valueno slot = ch & 15;
558  if (slot == 15) {
559  decode_length(p, end, slot);
560  slot += 15;
561  }
562  size_t len;
563  decode_length_and_check(p, end, len);
564  string begin(*p, len);
565  *p += len;
566  if (ch & 0x10) {
567  // OP_VALUE_GE
568  return new Xapian::Internal::QueryValueGE(slot, begin);
569  }
570 
571  // OP_VALUE_RANGE
572  decode_length_and_check(p, end, len);
573  string end_(*p, len);
574  *p += len;
575  if (begin.empty()) // FIXME: is this right?
576  return new Xapian::Internal::QueryValueLE(slot, end_);
577  return new Xapian::Internal::QueryValueRange(slot, begin, end_);
578  }
579  case 0: {
580  // Other operators
581  //
582  // 000ttttt where:
583  // ttttt -> encodes which OP_XXX
584  switch (ch & 0x1f) {
585  case 0x00: // OP_INVALID
586  return new Xapian::Internal::QueryInvalid();
587  case 0x0b: { // Wildcard
588  if (*p == end)
589  throw SerialisationError("not enough data");
590  Xapian::termcount max_expansion;
591  decode_length(p, end, max_expansion);
592  if (end - *p < 2)
593  throw SerialisationError("not enough data");
594  int max_type = static_cast<unsigned char>(*(*p)++);
595  op combiner = static_cast<op>(*(*p)++);
596  size_t len;
597  decode_length_and_check(p, end, len);
598  string pattern(*p, len);
599  *p += len;
600  return new Xapian::Internal::QueryWildcard(pattern,
601  max_expansion,
602  max_type,
603  combiner);
604  }
605  case 0x0c: { // PostingSource
606  size_t len;
607  decode_length_and_check(p, end, len);
608  string name(*p, len);
609  *p += len;
610 
611  const PostingSource * reg_source = reg.get_posting_source(name);
612  if (!reg_source) {
613  string m = "PostingSource ";
614  m += name;
615  m += " not registered";
616  throw SerialisationError(m);
617  }
618 
619  decode_length_and_check(p, end, len);
620  PostingSource * source =
621  reg_source->unserialise_with_registry(string(*p, len),
622  reg);
623  *p += len;
624  return new Xapian::Internal::QueryPostingSource(source->release());
625  }
626  case 0x0d: {
628  double scale_factor = unserialise_double(p, end);
629  return new QueryScaleWeight(scale_factor,
630  Query(unserialise(p, end, reg)));
631  }
632  case 0x0e: {
633  Xapian::termcount wqf;
634  Xapian::termpos pos;
635  decode_length(p, end, wqf);
636  decode_length(p, end, pos);
637  return new Xapian::Internal::QueryTerm(string(), wqf, pos);
638  }
639  case 0x0f:
640  return new Xapian::Internal::QueryTerm();
641  default: // Others currently unused.
642  break;
643  }
644  break;
645  }
646  }
647  string msg = "Unknown Query serialisation: ";
648  msg += str(ch);
649  throw SerialisationError(msg);
650 }
651 
652 void
655  double factor) const
656 {
657  ctx.add_postlist(postlist(qopt, factor));
658 }
659 
660 void
663  double factor) const
664 {
665  ctx.add_postlist(postlist(qopt, factor));
666 }
667 
668 void
671  double factor) const
672 {
673  ctx.add_postlist(postlist(qopt, factor));
674 }
675 
676 namespace Internal {
677 
678 Query::op
680 {
681  return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
682 }
683 
684 string
686 {
687  string desc;
688  if (term.empty()) {
689  desc = "<alldocuments>";
690  } else {
691  description_append(desc, term);
692  }
693  if (wqf != 1) {
694  desc += '#';
695  desc += str(wqf);
696  }
697  if (pos) {
698  desc += '@';
699  desc += str(pos);
700  }
701  return desc;
702 }
703 
705  : source(source_)
706 {
707  if (!source_)
708  throw Xapian::InvalidArgumentError("source parameter can't be NULL");
709  if (source->_refs == 0) {
710  // source_ isn't reference counted, so try to clone it. If clone()
711  // isn't implemented, just use the object provided and it's the
712  // caller's responsibility to ensure it stays valid while in use.
713  PostingSource * cloned_source = source->clone();
714  if (cloned_source) source = cloned_source->release();
715  }
716 }
717 
718 Query::op
720 {
722 }
723 
724 string
726 {
727  string desc = "PostingSource(";
728  desc += source->get_description();
729  desc += ')';
730  return desc;
731 }
732 
733 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
734  : scale_factor(factor), subquery(subquery_)
735 {
736  if (rare(scale_factor < 0.0))
737  throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
738 }
739 
740 Query::op
742 {
743  return Query::OP_SCALE_WEIGHT;
744 }
745 
746 size_t
748 {
749  return 1;
750 }
751 
752 const Query
754 {
755  return subquery;
756 }
757 
758 string
760 {
761  Assert(subquery.internal.get());
762  string desc = str(scale_factor);
763  desc += " * ";
764  desc += subquery.internal->get_description();
765  return desc;
766 }
767 
769 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
770 {
771  LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
772  if (factor != 0.0)
773  qopt->inc_total_subqs();
774  RETURN(qopt->open_post_list(term, wqf, factor));
775 }
776 
778 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
779 {
780  LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
781  Assert(source.get());
782  if (factor != 0.0)
783  qopt->inc_total_subqs();
784  // Casting away const on the Database::Internal here is OK, as we wrap
785  // them in a const Xapian::Database so non-const methods can't actually
786  // be called on the Database::Internal object.
787  const Xapian::Database wrappeddb(
788  const_cast<Xapian::Database::Internal*>(&(qopt->db)));
789  RETURN(new ExternalPostList(wrappeddb, source.get(), factor,
790  qopt->matcher,
791  qopt->shard_index));
792 }
793 
795 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
796 {
797  LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
798  RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
799 }
800 
801 void
802 QueryTerm::gather_terms(void * void_terms) const
803 {
804  // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
805  if (!term.empty()) {
806  vector<pair<Xapian::termpos, string>> &terms =
807  *static_cast<vector<pair<Xapian::termpos, string>>*>(void_terms);
808  terms.push_back(make_pair(pos, term));
809  }
810 }
811 
813 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
814 {
815  LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
816  if (factor != 0.0)
817  qopt->inc_total_subqs();
818  const Xapian::Database::Internal & db = qopt->db;
819  const string & lb = db.get_value_lower_bound(slot);
820  if (lb.empty()) {
821  // This should only happen if there are no values in this slot (which
822  // could be because the backend just doesn't support values at all).
823  // If there were values in the slot, the backend should have a
824  // non-empty lower bound, even if it isn't a tight one.
825  AssertEq(db.get_value_freq(slot), 0);
826  RETURN(new EmptyPostList);
827  }
828  if (end < lb) {
829  RETURN(new EmptyPostList);
830  }
831  const string & ub = db.get_value_upper_bound(slot);
832  if (begin > ub) {
833  RETURN(new EmptyPostList);
834  }
835  if (end >= ub) {
836  if (begin <= lb) {
837  // The range check isn't needed, but we do still need to consider
838  // which documents have a value set in this slot. If this value is
839  // set for all documents, we can replace it with the MatchAll
840  // postlist, which is especially efficient if there are no gaps in
841  // the docids.
842  if (db.get_value_freq(slot) == qopt->db_size) {
843  RETURN(db.open_post_list(string()));
844  }
845  // Otherwise we can at least replace the lower bound with an empty
846  // string for a small efficiency gain.
847  RETURN(new ValueGePostList(&db, slot, string()));
848  }
849  RETURN(new ValueGePostList(&db, slot, begin));
850  }
851  RETURN(new ValueRangePostList(&db, slot, begin, end));
852 }
853 
854 void
855 QueryValueRange::serialise(string & result) const
856 {
857  if (slot < 15) {
858  result += static_cast<char>(0x20 | slot);
859  } else {
860  result += static_cast<char>(0x20 | 15);
861  result += encode_length(slot - 15);
862  }
863  result += encode_length(begin.size());
864  result += begin;
865  result += encode_length(end.size());
866  result += end;
867 }
868 
869 Query::op
871 {
872  return Query::OP_VALUE_RANGE;
873 }
874 
875 string
877 {
878  string desc = "VALUE_RANGE ";
879  desc += str(slot);
880  desc += ' ';
881  description_append(desc, begin);
882  desc += ' ';
883  description_append(desc, end);
884  return desc;
885 }
886 
888 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
889 {
890  LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
891  if (factor != 0.0)
892  qopt->inc_total_subqs();
893  const Xapian::Database::Internal & db = qopt->db;
894  const string & lb = db.get_value_lower_bound(slot);
895  if (lb.empty()) {
896  // This should only happen if there are no values in this slot (which
897  // could be because the backend just doesn't support values at all).
898  // If there were values in the slot, the backend should have a
899  // non-empty lower bound, even if it isn't a tight one.
900  AssertEq(db.get_value_freq(slot), 0);
901  RETURN(new EmptyPostList);
902  }
903  if (limit < lb) {
904  RETURN(new EmptyPostList);
905  }
906  if (limit >= db.get_value_upper_bound(slot)) {
907  // The range check isn't needed, but we do still need to consider
908  // which documents have a value set in this slot. If this value is
909  // set for all documents, we can replace it with the MatchAll
910  // postlist, which is especially efficient if there are no gaps in
911  // the docids.
912  if (db.get_value_freq(slot) == qopt->db_size) {
913  RETURN(db.open_post_list(string()));
914  }
915  }
916  RETURN(new ValueRangePostList(&db, slot, string(), limit));
917 }
918 
919 void
920 QueryValueLE::serialise(string & result) const
921 {
922  // Encode as a range with an empty start (which only takes a single byte to
923  // encode).
924  if (slot < 15) {
925  result += static_cast<char>(0x20 | slot);
926  } else {
927  result += static_cast<char>(0x20 | 15);
928  result += encode_length(slot - 15);
929  }
930  result += encode_length(0);
931  result += encode_length(limit.size());
932  result += limit;
933 }
934 
935 Query::op
937 {
938  return Query::OP_VALUE_LE;
939 }
940 
941 string
943 {
944  string desc = "VALUE_LE ";
945  desc += str(slot);
946  desc += ' ';
947  description_append(desc, limit);
948  return desc;
949 }
950 
952 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
953 {
954  LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
955  if (factor != 0.0)
956  qopt->inc_total_subqs();
957  const Xapian::Database::Internal & db = qopt->db;
958  const string & lb = db.get_value_lower_bound(slot);
959  if (lb.empty()) {
960  // This should only happen if there are no values in this slot (which
961  // could be because the backend just doesn't support values at all).
962  // If there were values in the slot, the backend should have a
963  // non-empty lower bound, even if it isn't a tight one.
964  AssertEq(db.get_value_freq(slot), 0);
965  RETURN(new EmptyPostList);
966  }
967  if (limit > db.get_value_upper_bound(slot)) {
968  RETURN(new EmptyPostList);
969  }
970  if (limit <= lb) {
971  // The range check isn't needed, but we do still need to consider
972  // which documents have a value set in this slot. If this value is
973  // set for all documents, we can replace it with the MatchAll
974  // postlist, which is especially efficient if there are no gaps in
975  // the docids.
976  if (db.get_value_freq(slot) == qopt->db_size) {
977  RETURN(db.open_post_list(string()));
978  }
979  }
980  RETURN(new ValueGePostList(&db, slot, limit));
981 }
982 
983 void
984 QueryValueGE::serialise(string & result) const
985 {
986  if (slot < 15) {
987  result += static_cast<char>(0x20 | 0x10 | slot);
988  } else {
989  result += static_cast<char>(0x20 | 0x10 | 15);
990  result += encode_length(slot - 15);
991  }
992  result += encode_length(limit.size());
993  result += limit;
994 }
995 
996 Query::op
998 {
999  return Query::OP_VALUE_GE;
1000 }
1001 
1002 string
1004 {
1005  string desc = "VALUE_GE ";
1006  desc += str(slot);
1007  desc += ' ';
1008  description_append(desc, limit);
1009  return desc;
1010 }
1011 
1013 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
1014 {
1015  LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
1016  Query::op op = combiner;
1017  double or_factor = 0.0;
1018  if (factor == 0.0) {
1019  // If we have a factor of 0, we don't care about the weights, so
1020  // we're just like a normal OR query.
1021  op = Query::OP_OR;
1022  } else if (op != Query::OP_SYNONYM) {
1023  or_factor = factor;
1024  }
1025 
1026  bool old_in_synonym = qopt->in_synonym;
1027  if (!old_in_synonym) {
1028  qopt->in_synonym = (op == Query::OP_SYNONYM);
1029  }
1030 
1031  OrContext ctx(qopt, 0);
1032  AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
1033  Xapian::termcount expansions_left = max_expansion;
1034  // If there's no expansion limit, set expansions_left to the maximum
1035  // value Xapian::termcount can hold.
1036  if (expansions_left == 0)
1037  --expansions_left;
1038  while (true) {
1039  t->next();
1040  if (t->at_end())
1041  break;
1043  if (expansions_left-- == 0) {
1044  if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
1045  break;
1046  string msg("Wildcard ");
1047  msg += pattern;
1048  msg += "* expands to more than ";
1049  msg += str(max_expansion);
1050  msg += " terms";
1051  throw Xapian::WildcardError(msg);
1052  }
1053  }
1054  const string & term = t->get_termname();
1055  ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
1056  }
1057 
1059  // FIXME: open_lazy_post_list() results in the term getting registered
1060  // for stats, so we still incur an avoidable cost from the full
1061  // expansion size of the wildcard, which is most likely to be visible
1062  // with the remote backend. Perhaps we should split creating the lazy
1063  // postlist from registering the term for stats.
1064  if (ctx.size() > max_expansion)
1065  ctx.select_most_frequent(max_expansion);
1066  }
1067 
1068  if (factor != 0.0) {
1069  if (op != Query::OP_SYNONYM) {
1070  qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1071  } else {
1072  qopt->inc_total_subqs();
1073  }
1074  }
1075 
1076  qopt->in_synonym = old_in_synonym;
1077 
1078  if (ctx.empty())
1079  RETURN(new EmptyPostList);
1080 
1081  if (op == Query::OP_MAX)
1082  RETURN(ctx.postlist_max());
1083 
1084  PostList * pl = ctx.postlist();
1085  if (op == Query::OP_OR)
1086  RETURN(pl);
1087 
1088  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1089  // SynonymPostList, which supplies the weights.
1090  //
1091  // We know the subqueries from a wildcard expansion are wdf-disjoint
1092  // (i.e. each wdf from the document contributes at most itself to the
1093  // wdf of the subquery).
1094  RETURN(qopt->make_synonym_postlist(pl, factor, true));
1095 }
1096 
1097 termcount
1099 {
1100  // We currently assume wqf is 1 for calculating the synonym's weight
1101  // since conceptually the synonym is one "virtual" term. If we were
1102  // to combine multiple occurrences of the same synonym expansion into
1103  // a single instance with wqf set, we would want to track the wqf.
1104  return 1;
1105 }
1106 
1107 void
1108 QueryWildcard::serialise(string & result) const
1109 {
1110  result += static_cast<char>(0x0b);
1111  result += encode_length(max_expansion);
1112  result += static_cast<unsigned char>(max_type);
1113  result += static_cast<unsigned char>(combiner);
1114  result += encode_length(pattern.size());
1115  result += pattern;
1116 }
1117 
1118 Query::op
1120 {
1121  return Query::OP_WILDCARD;
1122 }
1123 
1124 string
1126 {
1127  string desc = "WILDCARD ";
1128  switch (combiner) {
1129  case Query::OP_SYNONYM:
1130  desc += "SYNONYM ";
1131  break;
1132  case Query::OP_MAX:
1133  desc += "MAX ";
1134  break;
1135  case Query::OP_OR:
1136  desc += "OR ";
1137  break;
1138  default:
1139  desc += "BAD ";
1140  break;
1141  }
1142  description_append(desc, pattern);
1143  return desc;
1144 }
1145 
1148 {
1149  // Sum results from all subqueries.
1150  Xapian::termcount result = 0;
1151  QueryVector::const_iterator i;
1152  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1153  // MatchNothing subqueries should have been removed by done(), but we
1154  // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1155  // segfault anyway.
1156  result += (*i).internal->get_length();
1157  }
1158  return result;
1159 }
1160 
1161 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1162 #define MISC(X) static_cast<unsigned char>(X)
1163 void
1164 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1165 {
1166  static const unsigned char first_byte[] = {
1167  MULTIWAY(0), // OP_AND
1168  MULTIWAY(1), // OP_OR
1169  MULTIWAY(2), // OP_AND_NOT
1170  MULTIWAY(3), // OP_XOR
1171  MULTIWAY(4), // OP_AND_MAYBE
1172  MULTIWAY(5), // OP_FILTER
1173  MULTIWAY(14), // OP_NEAR
1174  MULTIWAY(15), // OP_PHRASE
1175  0, // OP_VALUE_RANGE
1176  MISC(3), // OP_SCALE_WEIGHT
1177  MULTIWAY(13), // OP_ELITE_SET
1178  0, // OP_VALUE_GE
1179  0, // OP_VALUE_LE
1180  MULTIWAY(6), // OP_SYNONYM
1181  MULTIWAY(7) // OP_MAX
1182  };
1183  Xapian::Query::op op_ = get_op();
1184  AssertRel(size_t(op_),<,sizeof(first_byte));
1185  unsigned char ch = first_byte[op_];
1186  if (ch & 0x80) {
1187  // Multi-way operator.
1188  if (subqueries.size() < 8)
1189  ch |= subqueries.size();
1190  result += ch;
1191  if (subqueries.size() >= 8)
1192  result += encode_length(subqueries.size() - 8);
1193  if (ch >= MULTIWAY(13))
1194  result += encode_length(parameter);
1195  } else {
1196  result += ch;
1197  }
1198 
1199  QueryVector::const_iterator i;
1200  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1201  // MatchNothing subqueries should have been removed by done().
1202  Assert((*i).internal.get());
1203  (*i).internal->serialise(result);
1204  }
1205 
1206  // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1207  // appended next by an overloaded serialise() method in the subclass.
1208 }
1209 
1210 void
1211 QueryBranch::serialise(string & result) const
1212 {
1213  QueryBranch::serialise_(result);
1214 }
1215 
1216 void
1217 QueryNear::serialise(string & result) const
1218 {
1219  // FIXME: window - subqueries.size() ?
1220  QueryBranch::serialise_(result, window);
1221 }
1222 
1223 void
1224 QueryPhrase::serialise(string & result) const
1225 {
1226  // FIXME: window - subqueries.size() ?
1227  QueryBranch::serialise_(result, window);
1228 }
1229 
1230 void
1231 QueryEliteSet::serialise(string & result) const
1232 {
1233  // FIXME: set_size - subqueries.size() ?
1234  QueryBranch::serialise_(result, set_size);
1235 }
1236 
1237 void
1238 QueryBranch::gather_terms(void * void_terms) const
1239 {
1240  // Gather results from all subqueries.
1241  QueryVector::const_iterator i;
1242  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1243  // MatchNothing subqueries should have been removed by done().
1244  Assert((*i).internal.get());
1245  (*i).internal->gather_terms(void_terms);
1246  }
1247 }
1248 
1249 void
1251  Xapian::termcount elite_set_size, size_t first) const
1252 {
1253  LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1254 
1255  // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1256  // for AND-like operations.
1257 
1258  // OP_SYNONYM with a single subquery is only simplified by
1259  // QuerySynonym::done() if the single subquery is a term or MatchAll.
1260  Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1261 
1262  size_t size_before = ctx.size();
1263  QueryVector::const_iterator q;
1264  for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1265  // MatchNothing subqueries should have been removed by done().
1266  Assert((*q).internal.get());
1267  (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1268  }
1269 
1270  size_t out_of = ctx.size() - size_before;
1271  if (elite_set_size && elite_set_size < out_of) {
1272  ctx.select_elite_set(elite_set_size, out_of);
1273  // FIXME: This isn't quite right as we flatten ORs under the ELITE_SET
1274  // and then pick from amongst all the subqueries. Consider:
1275  //
1276  // Query subqs[] = {q1 | q2, q3 | q4};
1277  // Query q(OP_ELITE_SET, begin(subqs), end(subqs), 1);
1278  //
1279  // Here q should be either q1 | q2 or q3 | q4, but actually it'll be
1280  // just one of q1 or q2 or q3 or q4 (assuming those aren't themselves
1281  // OP_OR or OP_OR-like queries).
1282  }
1283 }
1284 
1285 PostList *
1286 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1287 {
1288  LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1289  OrContext ctx(qopt, subqueries.size());
1290  if (factor == 0.0) {
1291  // If we have a factor of 0, we don't care about the weights, so
1292  // we're just like a normal OR query.
1293  do_or_like(ctx, qopt, 0.0);
1294  return ctx.postlist();
1295  }
1296 
1297  bool old_in_synonym = qopt->in_synonym;
1298  qopt->in_synonym = true;
1299  do_or_like(ctx, qopt, 0.0);
1300  PostList * pl = ctx.postlist();
1301  qopt->in_synonym = old_in_synonym;
1302 
1303  bool wdf_disjoint = false;
1304  Assert(!subqueries.empty());
1305  auto type = (*subqueries.begin()).get_type();
1306  if (type == Query::OP_WILDCARD) {
1307  // Detect common easy case where all subqueries are OP_WILDCARD whose
1308  // constant prefixes form a prefix-free set.
1309  wdf_disjoint = true;
1310  vector<string> prefixes;
1311  for (auto&& q : subqueries) {
1312  if (q.get_type() != Query::OP_WILDCARD) {
1313  wdf_disjoint = false;
1314  break;
1315  }
1316  auto qw = static_cast<const QueryWildcard*>(q.internal.get());
1317  prefixes.push_back(qw->get_pattern());
1318  }
1319 
1320  if (wdf_disjoint) {
1321  sort(prefixes.begin(), prefixes.end());
1322  const string* prev = nullptr;
1323  for (const auto& i : prefixes) {
1324  if (prev) {
1325  if (startswith(i, *prev)) {
1326  wdf_disjoint = false;
1327  break;
1328  }
1329  }
1330  prev = &i;
1331  }
1332  }
1333  } else if (type == Query::LEAF_TERM) {
1334  // Detect common easy case where all subqueries are terms, none of
1335  // which are the same.
1336  wdf_disjoint = true;
1337  unordered_set<string> terms;
1338  for (auto&& q : subqueries) {
1339  if (q.get_type() != Query::LEAF_TERM) {
1340  wdf_disjoint = false;
1341  break;
1342  }
1343  auto qt = static_cast<const QueryTerm*>(q.internal.get());
1344  if (!terms.insert(qt->get_term()).second) {
1345  wdf_disjoint = false;
1346  break;
1347  }
1348  }
1349  }
1350 
1351  // We currently assume wqf is 1 for calculating the synonym's weight
1352  // since conceptually the synonym is one "virtual" term. If we were
1353  // to combine multiple occurrences of the same synonym expansion into
1354  // a single instance with wqf set, we would want to track the wqf.
1355 
1356  // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1357  // SynonymPostList, which supplies the weights.
1358  RETURN(qopt->make_synonym_postlist(pl, factor, wdf_disjoint));
1359 }
1360 
1361 PostList *
1362 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1363 {
1364  LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1365  OrContext ctx(qopt, subqueries.size());
1366  do_or_like(ctx, qopt, factor);
1367  if (factor == 0.0) {
1368  // If we have a factor of 0, we don't care about the weights, so
1369  // we're just like a normal OR query.
1370  RETURN(ctx.postlist());
1371  }
1372 
1373  // We currently assume wqf is 1 for calculating the OP_MAX's weight
1374  // since conceptually the OP_MAX is one "virtual" term. If we were
1375  // to combine multiple occurrences of the same OP_MAX expansion into
1376  // a single instance with wqf set, we would want to track the wqf.
1377  RETURN(ctx.postlist_max());
1378 }
1379 
1382 {
1383  return get_op();
1384 }
1385 
1386 size_t
1388 {
1389  return subqueries.size();
1390 }
1391 
1392 const Query
1394 {
1395  return subqueries[n];
1396 }
1397 
1398 const string
1400  Xapian::termcount parameter) const
1401 {
1402  string desc = "(";
1403  QueryVector::const_iterator i;
1404  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1405  if (desc.size() > 1) {
1406  desc += op;
1407  if (parameter) {
1408  desc += str(parameter);
1409  desc += ' ';
1410  }
1411  }
1412  Assert((*i).internal.get());
1413  // MatchNothing subqueries should have been removed by done(), and we
1414  // shouldn't get called before done() is, since that happens at the
1415  // end of the Xapian::Query constructor.
1416  desc += (*i).internal->get_description();
1417  }
1418  desc += ')';
1419  return desc;
1420 }
1421 
1424 {
1425  // If window size not specified, default it.
1426  if (window == 0)
1427  window = subqueries.size();
1428  return QueryAndLike::done();
1429 }
1430 
1431 void
1432 QueryScaleWeight::gather_terms(void * void_terms) const
1433 {
1434  subquery.internal->gather_terms(void_terms);
1435 }
1436 
1437 void QueryTerm::serialise(string & result) const
1438 {
1439  size_t len = term.size();
1440  if (len == 0) {
1441  if (wqf == 1 && pos == 0) {
1442  // Query::MatchAll
1443  result += '\x0f';
1444  } else {
1445  // Weird mutant versions of MatchAll
1446  result += '\x0e';
1447  result += encode_length(wqf);
1448  result += encode_length(pos);
1449  }
1450  } else if (wqf == 1) {
1451  if (pos == 0) {
1452  // Single occurrence free-text term without position set.
1453  if (len >= 16) {
1454  result += static_cast<char>(0x40 | 0x10);
1455  result += encode_length(term.size() - 16);
1456  } else {
1457  result += static_cast<char>(0x40 | 0x10 | len);
1458  }
1459  result += term;
1460  } else {
1461  // Single occurrence free-text term with position set.
1462  if (len >= 16) {
1463  result += static_cast<char>(0x40 | 0x20);
1464  result += encode_length(term.size() - 16);
1465  } else {
1466  result += static_cast<char>(0x40 | 0x20 | len);
1467  }
1468  result += term;
1469  result += encode_length(pos);
1470  }
1471  } else if (wqf > 1 || pos > 0) {
1472  // General case.
1473  if (len >= 16) {
1474  result += static_cast<char>(0x40 | 0x30);
1475  result += encode_length(term.size() - 16);
1476  } else if (len) {
1477  result += static_cast<char>(0x40 | 0x30 | len);
1478  }
1479  result += term;
1480  result += encode_length(wqf);
1481  result += encode_length(pos);
1482  } else {
1483  // Typical boolean term.
1484  AssertEq(wqf, 0);
1485  AssertEq(pos, 0);
1486  if (len >= 16) {
1487  result += static_cast<char>(0x40);
1488  result += encode_length(term.size() - 16);
1489  } else {
1490  result += static_cast<char>(0x40 | len);
1491  }
1492  result += term;
1493  }
1494 }
1495 
1496 void QueryPostingSource::serialise(string & result) const
1497 {
1498  result += static_cast<char>(0x0c);
1499 
1500  const string & n = source->name();
1501  result += encode_length(n.size());
1502  result += n;
1503 
1504  const string & s = source->serialise();
1505  result += encode_length(s.size());
1506  result += s;
1507 }
1508 
1509 void QueryScaleWeight::serialise(string & result) const
1510 {
1511  Assert(subquery.internal.get());
1512  result += '\x0d';
1513  result += serialise_double(scale_factor);
1514  subquery.internal->serialise(result);
1515 }
1516 
1517 void
1519 {
1520  // If the AndLike is already MatchNothing, do nothing.
1521  if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1522  return;
1523  // If we're adding MatchNothing, discard any previous subqueries.
1524  if (subquery.internal.get() == NULL)
1525  subqueries.clear();
1526  subqueries.push_back(subquery);
1527 }
1528 
1531 {
1532  // Empty AndLike gives MatchNothing.
1533  if (subqueries.empty())
1534  return NULL;
1535  // We handle any subquery being MatchNothing in add_subquery() by leaving
1536  // a single MatchNothing subquery, and so this check results in AndLike
1537  // giving MatchNothing.
1538  if (subqueries.size() == 1)
1539  return subqueries[0].internal.get();
1540  return this;
1541 }
1542 
1544 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1545 {
1546  LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1547  AndContext ctx(qopt, subqueries.size());
1548  postlist_sub_and_like(ctx, qopt, factor);
1549  RETURN(ctx.postlist());
1550 }
1551 
1552 void
1554 {
1555  QueryVector::const_iterator i;
1556  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1557  // MatchNothing subqueries should have been removed by done().
1558  Assert((*i).internal.get());
1559  (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1560  }
1561 }
1562 
1563 void
1565 {
1566  // Drop any subqueries which are MatchNothing.
1567  if (subquery.internal.get() != NULL)
1568  subqueries.push_back(subquery);
1569 }
1570 
1573 {
1574  // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1575  // subqueries which are MatchNothing.
1576  if (subqueries.empty())
1577  return NULL;
1578  if (subqueries.size() == 1)
1579  return subqueries[0].internal.get();
1580  return this;
1581 }
1582 
1583 void
1585 {
1586  if (!subqueries.empty()) {
1587  // We're adding the 2nd or subsequent subquery, so this subquery is
1588  // negated.
1589  if (subqueries[0].internal.get() == NULL) {
1590  // The left side is already MatchNothing so drop any right side.
1591  //
1592  // MatchNothing AND_NOT X == MatchNothing
1593  return;
1594  }
1595  if (subquery.internal.get() == NULL) {
1596  // Drop MatchNothing on the right of AndNot.
1597  //
1598  // X AND_NOT MatchNothing == X
1599  return;
1600  }
1601  if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
1602  // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1603  // AndNot as no weight is taken from them.
1604  subqueries.push_back(subquery.get_subquery(0));
1605  // The Query constructor for OP_SCALE_WEIGHT constructor should
1606  // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1607  Assert(subquery.get_subquery(0).internal.get() != NULL);
1608  return;
1609  }
1610  }
1611  subqueries.push_back(subquery);
1612 }
1613 
1616 {
1617  // Any MatchNothing right subqueries get discarded by add_subquery() - if
1618  // that leaves just the left subquery, return that.
1619  //
1620  // If left subquery is MatchNothing, then add_subquery() discards all right
1621  // subqueries, so this check also gives MatchNothing for this case.
1622  if (subqueries.size() == 1)
1623  return subqueries[0].internal.get();
1624  return this;
1625 }
1626 
1627 void
1629 {
1630  // If the left side of AndMaybe is already MatchNothing, do nothing.
1631  if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1632  return;
1633  // Drop any 2nd or subsequent subqueries which are MatchNothing.
1634  if (subquery.internal.get() != NULL || subqueries.empty())
1635  subqueries.push_back(subquery);
1636 }
1637 
1640 {
1641  // Any MatchNothing right subqueries get discarded by add_subquery() - if
1642  // that leaves just the left subquery, return that.
1643  //
1644  // If left subquery is MatchNothing, then add_subquery() discards all right
1645  // subqueries, so this check also gives MatchNothing for this case.
1646  if (subqueries.size() == 1)
1647  return subqueries[0].internal.get();
1648  return this;
1649 }
1650 
1652 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1653 {
1654  LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1655  OrContext ctx(qopt, subqueries.size());
1656  do_or_like(ctx, qopt, factor);
1657  RETURN(ctx.postlist());
1658 }
1659 
1660 void
1661 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1662 {
1663  do_or_like(ctx, qopt, factor);
1664 }
1665 
1667 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1668 {
1669  LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1670  AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1671  OrContext ctx(qopt, subqueries.size() - 1);
1672  do_or_like(ctx, qopt, 0.0, 0, 1);
1673  AutoPtr<PostList> r(ctx.postlist());
1674  RETURN(new AndNotPostList(l.release(), r.release(),
1675  qopt->matcher, qopt->db_size));
1676 }
1677 
1678 void
1680  QueryOptimiser* qopt,
1681  double factor) const
1682 {
1683  subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor);
1684  do_or_like(ctx.get_not_ctx(subqueries.size() - 1), qopt, 0.0, 0, 1);
1685 }
1686 
1688 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1689 {
1690  LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1691  XorContext ctx(qopt, subqueries.size());
1692  postlist_sub_xor(ctx, qopt, factor);
1693  RETURN(ctx.postlist());
1694 }
1695 
1696 void
1697 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1698 {
1699  QueryVector::const_iterator i;
1700  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1701  // MatchNothing subqueries should have been removed by done().
1702  Assert((*i).internal.get());
1703  (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1704  }
1705 }
1706 
1708 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1709 {
1710  LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1711  // FIXME: Combine and-like side with and-like stuff above.
1712  AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1713  if (factor == 0.0) {
1714  // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1715  RETURN(l.release());
1716  }
1717  OrContext ctx(qopt, subqueries.size() - 1);
1718  do_or_like(ctx, qopt, factor, 0, 1);
1719  AutoPtr<PostList> r(ctx.postlist());
1720  RETURN(new AndMaybePostList(l.release(), r.release(),
1721  qopt->matcher, qopt->db_size));
1722 }
1723 
1724 void
1726  QueryOptimiser* qopt,
1727  double factor) const
1728 {
1729  subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor);
1730  do_or_like(ctx.get_maybe_ctx(subqueries.size() - 1), qopt, factor, 0, 1);
1731 }
1732 
1734 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1735 {
1736  LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1737  AndContext ctx(qopt, subqueries.size());
1738  QueryFilter::postlist_sub_and_like(ctx, qopt, factor);
1739  RETURN(ctx.postlist());
1740 }
1741 
1742 void
1744 {
1745  QueryVector::const_iterator i;
1746  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1747  // MatchNothing subqueries should have been removed by done().
1748  Assert((*i).internal.get());
1749  (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1750  // Second and subsequent subqueries are unweighted.
1751  factor = 0.0;
1752  }
1753 }
1754 
1755 void
1757 {
1758  if (!qopt->full_db_has_positions()) {
1759  // No positional data anywhere, so just handle as AND.
1760  QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1761  return;
1762  }
1763 
1764  if (!qopt->db.has_positions()) {
1765  // No positions in this subdatabase so this matches nothing, which
1766  // means the whole andcontext matches nothing.
1767  //
1768  // Bailing out here means we don't recurse deeper and that means we
1769  // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1770  // the phrase, but at least one shard will count them, and the matcher
1771  // takes the highest answer (since 1.4.6).
1772  ctx.shrink(0);
1773  return;
1774  }
1775 
1776  bool old_need_positions = qopt->need_positions;
1777  qopt->need_positions = true;
1778 
1779  QueryVector::const_iterator i;
1780  for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1781  // MatchNothing subqueries should have been removed by done().
1782  Assert((*i).internal.get());
1783  bool is_term = ((*i).internal->get_type() == Query::LEAF_TERM);
1784  PostList* pl = (*i).internal->postlist(qopt, factor);
1785  if (!is_term)
1786  pl = new OrPosPostList(pl);
1787  ctx.add_postlist(pl);
1788  }
1789  // Record the positional filter to apply higher up the tree.
1790  ctx.add_pos_filter(op, subqueries.size(), window);
1791 
1792  qopt->need_positions = old_need_positions;
1793 }
1794 
1795 void
1797 {
1799 }
1800 
1801 void
1803 {
1805 }
1806 
1808 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1809 {
1810  LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1811  OrContext ctx(qopt, subqueries.size());
1812  do_or_like(ctx, qopt, factor, set_size);
1813  RETURN(ctx.postlist());
1814 }
1815 
1816 void
1818 {
1819  do_or_like(ctx, qopt, factor, set_size);
1820 }
1821 
1823 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1824 {
1825  LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1826  // Save and restore total_subqs so we only add one for the whole
1827  // OP_SYNONYM subquery (or none if we're not weighted).
1828  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1829  if (factor != 0.0)
1830  ++save_total_subqs;
1831  PostList * pl = do_synonym(qopt, factor);
1832  qopt->set_total_subqs(save_total_subqs);
1833  RETURN(pl);
1834 }
1835 
1838 {
1839  // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1840  // subqueries which are MatchNothing.
1841  if (subqueries.empty())
1842  return NULL;
1843  if (subqueries.size() == 1) {
1844  Query::op sub_type = subqueries[0].get_type();
1845  // Synonym of a single subquery should only be simplified if that
1846  // subquery is a term (or MatchAll), or if it's also OP_SYNONYM. Note
1847  // that MatchNothing subqueries are dropped, so we'd never get here
1848  // with a single MatchNothing subquery.
1849  if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1850  sub_type == Query::OP_SYNONYM) {
1851  return subqueries[0].internal.get();
1852  }
1853  if (sub_type == Query::OP_WILDCARD) {
1854  auto q = static_cast<QueryWildcard*>(subqueries[0].internal.get());
1855  // SYNONYM over WILDCARD X -> WILDCARD SYNONYM for any combiner X.
1857  }
1858  }
1859  return this;
1860 }
1861 
1863 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1864 {
1865  LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1866  // Save and restore total_subqs so we only add one for the whole
1867  // OP_MAX subquery (or none if we're not weighted).
1868  Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1869  if (factor != 0.0)
1870  ++save_total_subqs;
1871  PostList * pl = do_max(qopt, factor);
1872  qopt->set_total_subqs(save_total_subqs);
1873  RETURN(pl);
1874 }
1875 
1878 {
1879  return Xapian::Query::OP_AND;
1880 }
1881 
1884 {
1885  return Xapian::Query::OP_OR;
1886 }
1887 
1890 {
1892 }
1893 
1896 {
1897  return Xapian::Query::OP_XOR;
1898 }
1899 
1902 {
1904 }
1905 
1908 {
1909  return Xapian::Query::OP_FILTER;
1910 }
1911 
1914 {
1915  return Xapian::Query::OP_NEAR;
1916 }
1917 
1920 {
1921  return Xapian::Query::OP_PHRASE;
1922 }
1923 
1926 {
1928 }
1929 
1932 {
1934 }
1935 
1938 {
1939  return Xapian::Query::OP_MAX;
1940 }
1941 
1944 {
1946 }
1947 
1948 string
1950 {
1951  return get_description_helper(" AND ");
1952 }
1953 
1954 string
1956 {
1957  return get_description_helper(" OR ");
1958 }
1959 
1960 string
1962 {
1963  return get_description_helper(" AND_NOT ");
1964 }
1965 
1966 string
1968 {
1969  return get_description_helper(" XOR ");
1970 }
1971 
1972 string
1974 {
1975  return get_description_helper(" AND_MAYBE ");
1976 }
1977 
1978 string
1980 {
1981  return get_description_helper(" FILTER ");
1982 }
1983 
1984 string
1986 {
1987  return get_description_helper(" NEAR ", window);
1988 }
1989 
1990 string
1992 {
1993  return get_description_helper(" PHRASE ", window);
1994 }
1995 
1996 string
1998 {
1999  return get_description_helper(" ELITE_SET ", set_size);
2000 }
2001 
2002 string
2004 {
2005  if (subqueries.size() == 1) {
2006  string d = "(SYNONYM ";
2007  d += subqueries[0].internal->get_description();
2008  d += ")";
2009  return d;
2010  }
2011  return get_description_helper(" SYNONYM ");
2012 }
2013 
2014 string
2016 {
2017  return get_description_helper(" MAX ");
2018 }
2019 
2022 {
2024 }
2025 
2028 {
2029  throw Xapian::InvalidOperationError("Query is invalid");
2030 }
2031 
2032 void
2033 QueryInvalid::serialise(std::string & result) const
2034 {
2035  result += static_cast<char>(0x00);
2036 }
2037 
2038 string
2040 {
2041  return "<INVALID>";
2042 }
2043 
2044 }
2045 }
Wrapper postlist providing positions for an OR.
Definition: orpospostlist.h:28
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
#define MISC(X)
OrContext(QueryOptimiser *qopt_, size_t reserve)
#define RETURN(A)
Definition: debuglog.h:493
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
#define Assert(COND)
Definition: omassert.h:122
vector< PostList * > pls
std::string get_description() const
N-way OR postlist with wt=max(wt_i).
Definition: maxpostlist.h:32
Xapian::doccount db_size
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Wildcard expansion.
Definition: query.h:255
Xapian::Query::op get_type() const
Abstract base class for postlists.
Definition: postlist.h:37
const Query get_subquery(size_t n) const
Read a top level subquery.
Definition: query.cc:226
XorContext(QueryOptimiser *qopt_, size_t reserve)
N-way XOR postlist.
N-way XOR postlist.
Xapian::Query::op get_op() const
Return docs containing terms forming a particular exact phrase.
virtual Xapian::doccount get_value_freq(Xapian::valueno slot) const =0
Return the frequency of a given value slot.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
length encoded as a string
#define AssertEq(A, B)
Definition: omassert.h:124
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
virtual void postlist_sub_and_like(Xapian::Internal::AndContext &ctx, QueryOptimiser *qopt, double factor) const
Merged postlist: items from one list, weights from both.
A postlist with weights modified by another postlist.
Postlist which matches an exact phrase using positional information.
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_total_subqs(Xapian::termcount n)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query internals.
Xapian::Query::op get_op() const
virtual termcount get_length() const
Match documents which an odd number of subqueries match.
Definition: query.h:107
#define AssertRel(A, REL, B)
Definition: omassert.h:123
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
A PostList which contains no entries.
Xapian::Query::op get_op() const
std::string get_description() const
A PostList which contains no entries.
Definition: emptypostlist.h:27
op
Query operators.
Definition: query.h:78
Base class for databases.
Definition: database.h:57
virtual const Query get_subquery(size_t n) const
Postlist which matches a phrase using positional information.
Wrapper postlist providing positions for an OR.
void serialise(std::string &result) const
void inc_total_subqs()
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
OrContext & get_not_ctx(size_t reserve)
WildcardError indicates an error expanding a wildcarded query.
Definition: error.h:1013
virtual void postlist_sub_xor(Xapian::Internal::XorContext &ctx, QueryOptimiser *qopt, double factor) const
PostList * do_synonym(QueryOptimiser *qopt, double factor) const
External sources of posting information.
QueryPostingSource(PostingSource *source_)
const std::string get_description_helper(const char *op, Xapian::termcount window=0) const
virtual void postlist_sub_or_like(Xapian::Internal::OrContext &ctx, QueryOptimiser *qopt, double factor) const
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
STL namespace.
Pick the maximum weight of any subquery.
Definition: query.h:249
A postlist comprising two postlists ORed together.
Definition: orpostlist.h:38
Convert types to std::string.
const Xapian::PostingSource * get_posting_source(const std::string &name) const
Get a posting source given a name.
Definition: registry.cc:286
virtual bool has_positions() const =0
Check whether this database contains any positional information.
void serialise(std::string &result) const
PostList * do_max(QueryOptimiser *qopt, double factor) const
AutoPtr< OrContext > not_ctx
virtual PostingSource * unserialise_with_registry(const std::string &serialised, const Registry &registry) const
Create object given string serialisation returned by serialise().
virtual LeafPostList * open_post_list(const string &tname) const =0
Open a posting list.
std::string encode_length(T len)
Encode a length as a variable-length string.
Definition: length.h:36
void gather_terms(void *void_terms) const
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:49
bool operator()(const PostList *a, const PostList *b) const
Order by descending get_termfreq_est().
Xapian::Query::op get_op() const
void select_most_frequent(size_t set_size)
Select the set_size postlists with the highest term frequency.
void serialise(std::string &result) const
void destroy_postlist(PostList *pl)
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
Abstract base class for leaf postlists.
MultiMatch * matcher
#define rare(COND)
Definition: config.h:565
std::string get_description() const
Return docs containing terms forming a particular phrase.
Xapian::Query::op get_type() const
void do_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, Xapian::termcount elite_set_size=0, size_t first=0) const
Xapian::Query API class.
void add_postlist(PostList *pl)
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
Class providing an operator which sorts postlists to select max or terms.
PostList * postlist(PostList *pl, const vector< PostList *> &pls) const
OR of two posting lists.
void serialise(std::string &result) const
virtual void gather_terms(void *void_terms) const
std::string get_description() const
Hierarchy of classes which Xapian can throw as exceptions.
std::string get_description() const
Xapian::Query::op get_op() const
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
const Query get_subquery(size_t n) const
void serialise_(std::string &result, Xapian::termcount parameter=0) const
functions to serialise and unserialise a double
Return items which are in A, unless they&#39;re in B.
virtual std::string get_value_upper_bound(Xapian::valueno slot) const =0
Get an upper bound on the values stored in the given value slot.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Postlist which matches terms occurring within a specified window.
Definition: nearpostlist.h:38
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Xapian::Internal::opt_intrusive_ptr< PostingSource > source
Definition: queryinternal.h:71
Limit OP_WILDCARD expansion to the most frequent terms.
Definition: query.h:307
std::string get_description() const
void description_append(std::string &desc, const std::string &s)
Definition: unittest.cc:102
Xapian::termcount get_total_subqs() const
virtual size_t get_num_subqueries() const
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
PostList * make_synonym_postlist(PostList *pl, double factor, bool wdf_disjoint)
Xapian::Query::op get_type() const
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
#define MULTIWAY(X)
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:276
Scale the weight contributed by a subquery.
Definition: query.h:166
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
std::string get_description() const
Value returned by get_type() for a PostingSource.
Definition: query.h:269
virtual Xapian::doccount get_termfreq_est() const =0
Get an estimate of the number of documents indexed by this term.
void add_subquery(const Xapian::Query &subquery)
std::string get_description() const
Registry for user subclasses.
Definition: registry.h:47
void postlist_sub_xor(XorContext &ctx, QueryOptimiser *qopt, double factor) const
void select_elite_set(size_t set_size, size_t out_of)
Select the best set_size postlists from the last out_of added.
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
Match only documents where a value slot is >= a given value.
Definition: query.h:223
void gather_terms(void *void_terms) const
Xapian::Query::op get_type() const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor) const
void serialise(std::string &result) const
virtual TermList * open_allterms(const string &prefix) const =0
Open an allterms list.
std::string get_description() const
bool operator()(const PostList *a, const PostList *b)
Return true if and only if a has a strictly greater termweight than b.
N-way AND postlist.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
void add_pos_filter(Query::op op_, size_t n_subqs, Xapian::termcount window)
Xapian::Query::op get_type() const
Return document ids matching a >= test on a specified doc value.
Match only documents where a value slot is within a given range.
Definition: query.h:158
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Xapian::Query::op get_op() const
Match only documents where a value slot is <= a given value.
Definition: query.h:231
std::string get_description() const
void serialise(std::string &result) const
Details passed around while building PostList tree from Query tree.
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor) const
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
Return docs containing terms within a specified window.
Construct an invalid query.
Definition: query.h:263
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_type() const
void serialise(std::string &result) const
LeafPostList * open_lazy_post_list(const std::string &term, Xapian::termcount wqf, double factor)
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
void serialise(std::string &result) const
void add_subquery(const Xapian::Query &subquery)
virtual std::string get_value_lower_bound(Xapian::valueno slot) const =0
Get a lower bound on the values stored in the given value slot.
virtual double get_maxweight() const =0
Return an upper bound on what get_weight() can return.
void shrink(size_t new_size)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
LeafPostList * open_post_list(const std::string &term, Xapian::termcount wqf, double factor)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
void serialise(std::string &result) const
Append a string to an object description, escaping invalid UTF-8.
Comparison functor which orders PostList* by descending get_termfreq_est().
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Match only documents which all subqueries match.
Definition: query.h:84
virtual Query::Internal * done()=0
Xapian::Query::op get_op() const
void serialise(std::string &result) const
std::string serialise_double(double v)
Serialise a double to a string.
void decode_length_and_check(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:112
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
const Xapian::Database::Internal & db
OrContext & get_maybe_ctx(size_t reserve)
std::string get_description() const
Match only documents where all subqueries match near each other.
Definition: query.h:140
static Query::Internal * unserialise(const char **p, const char *end, const Registry &reg)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
std::string get_description() const
Xapian::Query::op get_op() const
Xapian::Query::op get_op() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Value returned by get_type() for a term.
Definition: query.h:266
void gather_terms(void *void_terms) const
Return document ids from an external source.
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
QueryScaleWeight(double factor, const Query &subquery_)
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
N-way AND postlist.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
void postlist_windowed(Xapian::Query::op op, AndContext &ctx, QueryOptimiser *qopt, double factor) const
bool full_db_has_positions() const
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
Various handy helpers which std::string really should provide.
Abstract base class for termlists.
PostingSource * release()
Start reference counting this object.
Stop expanding when OP_WILDCARD reaches its expansion limit.
Definition: query.h:297
op get_type() const
Get the type of the top level of the query.
Definition: query.cc:212
void serialise(std::string &result) const
Xapian::Query::op get_op() const
A postlist generated by taking one postlist (the left-hand postlist), and removing any documents whic...
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
const Query get_subquery(size_t n) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Return document ids matching a range test on a specified doc value.
std::string get_description() const
Xapian::Query::op get_op() const
PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_, Xapian::termcount window_)
Various assertion macros.
Class representing a query.
Definition: query.h:46
N-way OR postlist with wt=max(wt_i)
Xapian::Query::op get_type() const
Xapian::Query::op get_op() const
list< PosFilter > pos_filters
Xapian::doccount shard_index
virtual void add_subquery(const Xapian::Query &subquery)=0
std::string get_description() const
AndContext(QueryOptimiser *qopt_, size_t reserve)
#define XAPIAN_NOEXCEPT
Definition: attributes.h:39
void decode_length(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:94
Xapian::Query::op get_type() const
Wrapper around standard unique_ptr template.
Xapian::Query::op get_type() const
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
AutoPtr< OrContext > maybe_ctx
void serialise(std::string &result) const
QueryWildcard * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
virtual Query::op get_type() const =0
std::string get_description() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const