xapian-core  2.0.0
query.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2011,2012,2013,2015,2016,2017,2018,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "xapian/query.h"
24 #include "queryinternal.h"
25 
26 #include <algorithm>
27 #include <string_view>
28 
29 #include "debuglog.h"
30 #include "omassert.h"
31 #include "vectortermlist.h"
32 
33 #include "xapian/error.h"
34 
35 using namespace std;
36 
37 namespace Xapian {
38 
39 const Query Query::MatchAll(string_view{});
40 
41 const Query Query::MatchNothing;
42 
43 Query::Query(string_view term, Xapian::termcount wqf, Xapian::termpos pos)
44  : internal(new Xapian::Internal::QueryTerm(term, wqf, pos))
45 {
46  LOGCALL_CTOR(API, "Query", term | wqf | pos);
47 }
48 
50  : internal(new Xapian::Internal::QueryPostingSource(source))
51 {
52  LOGCALL_CTOR(API, "Query", source);
53 }
54 
55 Query::Query(double factor, const Xapian::Query & subquery)
56 {
57  LOGCALL_CTOR(API, "Query", factor | subquery);
58 
59  if (!subquery.empty())
60  internal = new Xapian::Internal::QueryScaleWeight(factor, subquery);
61 }
62 
63 Query::Query(op op_, const Xapian::Query & subquery, double factor)
64 {
65  LOGCALL_CTOR(API, "Query", op_ | subquery | factor);
66 
67  if (rare(op_ != OP_SCALE_WEIGHT))
68  throw Xapian::InvalidArgumentError("op must be OP_SCALE_WEIGHT");
69  // If the subquery is MatchNothing then generate Query() which matches
70  // nothing.
71  if (!subquery.internal) return;
72  switch (subquery.internal->get_type()) {
73  case OP_VALUE_RANGE:
74  case OP_VALUE_GE:
75  case OP_VALUE_LE:
76  // These operators always return weight 0, so OP_SCALE_WEIGHT has
77  // no effect on them.
78  internal = subquery.internal;
79  return;
80  default:
81  break;
82  }
83  internal = new Xapian::Internal::QueryScaleWeight(factor, subquery);
84 }
85 
86 Query::Query(op op_, Xapian::valueno slot, std::string_view limit)
87 {
88  LOGCALL_CTOR(API, "Query", op_ | slot | limit);
89 
90  if (op_ == OP_VALUE_GE) {
91  if (limit.empty())
92  internal = new Xapian::Internal::QueryTerm();
93  else
94  internal = new Xapian::Internal::QueryValueGE(slot, limit);
95  } else if (usual(op_ == OP_VALUE_LE)) {
96  internal = new Xapian::Internal::QueryValueLE(slot, limit);
97  } else {
98  throw Xapian::InvalidArgumentError("op must be OP_VALUE_LE or OP_VALUE_GE");
99  }
100 }
101 
103  std::string_view begin, std::string_view end)
104 {
105  LOGCALL_CTOR(API, "Query", op_ | slot | begin | end);
106 
107  if (rare(op_ != OP_VALUE_RANGE))
108  throw Xapian::InvalidArgumentError("op must be OP_VALUE_RANGE");
109  // If begin > end then generate Query() which matches nothing.
110  if (begin.empty()) {
111  internal = new Xapian::Internal::QueryValueLE(slot, end);
112  } else if (usual(begin <= end)) {
113  internal = new Xapian::Internal::QueryValueRange(slot, begin, end);
114  }
115 }
116 
118  std::string_view pattern,
119  Xapian::termcount max_expansion,
120  int flags,
121  op combiner)
122 {
123  LOGCALL_CTOR(API, "Query", op_ | pattern | max_expansion | flags | combiner);
124  if (rare(combiner != OP_SYNONYM && combiner != OP_MAX && combiner != OP_OR))
125  throw Xapian::InvalidArgumentError("combiner must be OP_SYNONYM or "
126  "OP_MAX or OP_OR");
127  if (op_ == OP_EDIT_DISTANCE) {
128  internal = new Xapian::Internal::QueryEditDistance(pattern,
129  max_expansion,
130  flags,
131  combiner);
132  return;
133  }
134  if (rare(op_ != OP_WILDCARD))
135  throw Xapian::InvalidArgumentError("op must be OP_EDIT_DISTANCE or "
136  "OP_WILDCARD");
137 
138  auto just_flags = flags & ~Query::WILDCARD_LIMIT_MASK_;
139  if (pattern.empty()) {
140  if (just_flags == 0) {
141  // Empty pattern with implicit trailing '*' -> MatchAll.
142  internal = new Xapian::Internal::QueryTerm();
143  } else {
144  // Empty pattern with extended wildcards -> MatchNothing.
145  }
146  return;
147  }
148 
149  // Check if pattern consists of one or more '*' and at most one '?' (in any
150  // order) - if so treat it as just MatchAll.
151  bool match_all = false;
152  bool question_marks = false;
153  for (auto&& ch : pattern) {
154  if (ch == '*' && (flags & Query::WILDCARD_PATTERN_MULTI)) {
155  match_all = true;
156  } else if (ch == '?' && !question_marks &&
157  (flags & Query::WILDCARD_PATTERN_SINGLE)) {
158  question_marks = true;
159  } else {
160  match_all = false;
161  break;
162  }
163  }
164  if (match_all) {
165  internal = new Xapian::Internal::QueryTerm();
166  return;
167  }
168 
169  internal = new Xapian::Internal::QueryWildcard(pattern,
170  max_expansion,
171  flags,
172  combiner);
173 }
174 
176  std::string_view pattern,
177  Xapian::termcount max_expansion,
178  int flags,
179  op combiner,
180  unsigned edit_distance,
181  size_t min_prefix_len)
182 {
183  LOGCALL_CTOR(API, "Query", op_ | pattern | max_expansion | flags | combiner | edit_distance | min_prefix_len);
184  if (rare(combiner != OP_SYNONYM && combiner != OP_MAX && combiner != OP_OR))
185  throw Xapian::InvalidArgumentError("combiner must be OP_SYNONYM or "
186  "OP_MAX or OP_OR");
187  if (rare(op_ != OP_EDIT_DISTANCE))
188  throw Xapian::InvalidArgumentError("op must be OP_EDIT_DISTANCE");
189  internal = new Xapian::Internal::QueryEditDistance(pattern,
190  max_expansion,
191  flags,
192  combiner,
193  edit_distance,
194  min_prefix_len);
195 }
196 
197 const TermIterator
199 {
200  if (!internal)
201  return TermIterator();
202 
203  vector<pair<Xapian::termpos, string>> terms;
204  internal->gather_terms(static_cast<void*>(&terms));
205  sort(terms.begin(), terms.end());
206 
207  vector<string> v;
208  const string * old_term = NULL;
209  Xapian::termpos old_pos = 0;
210  for (auto && i : terms) {
211  // Remove duplicates (same term at the same position).
212  if (old_term && old_pos == i.first && *old_term == i.second)
213  continue;
214 
215  v.push_back(i.second);
216  old_pos = i.first;
217  old_term = &(i.second);
218  }
219  return TermIterator(new VectorTermList(v.begin(), v.end()));
220 }
221 
222 const TermIterator
224 {
225  if (!internal)
226  return TermIterator();
227 
228  vector<pair<Xapian::termpos, string>> terms;
229  internal->gather_terms(static_cast<void*>(&terms));
230  sort(terms.begin(), terms.end(), [](
231  const pair<Xapian::termpos, string>& a,
232  const pair<Xapian::termpos, string>& b) {
233  return a.second < b.second;
234  });
235 
236  vector<string> v;
237  const string * old_term = NULL;
238  for (auto && i : terms) {
239  // Remove duplicate term names.
240  if (old_term && *old_term == i.second)
241  continue;
242 
243  v.push_back(i.second);
244  old_term = &(i.second);
245  }
246  return TermIterator(new VectorTermList(v.begin(), v.end()));
247 }
248 
250 Query::get_length() const noexcept
251 {
252  return (internal ? internal->get_length() : 0);
253 }
254 
255 string
257 {
258  string result;
259  if (internal)
260  internal->serialise(result);
261  return result;
262 }
263 
264 const Query
265 Query::unserialise(string_view s, const Registry& reg)
266 {
267  const char * p = s.data();
268  const char * end = p + s.size();
270  AssertEq(p, end);
271  return Query(q);
272 }
273 
275 Query::get_type() const noexcept
276 {
277  if (!internal)
279  return internal->get_type();
280 }
281 
282 size_t
284 {
285  return internal ? internal->get_num_subqueries() : 0;
286 }
287 
288 const Query
289 Query::get_subquery(size_t n) const
290 {
291  return internal->get_subquery(n);
292 }
293 
296 {
297  return internal->get_wqf();
298 }
299 
302 {
303  return internal->get_pos();
304 }
305 
306 string
308 {
309  string desc = "Query(";
310  if (internal)
311  desc += internal->get_description();
312  desc += ")";
313  return desc;
314 }
315 
316 void
317 Query::init(op op_, size_t n_subqueries, Xapian::termcount parameter)
318 {
319  if (parameter > 0 &&
320  op_ != OP_NEAR && op_ != OP_PHRASE && op_ != OP_ELITE_SET)
321  throw InvalidArgumentError("parameter only valid with OP_NEAR, "
322  "OP_PHRASE or OP_ELITE_SET");
323 
324  switch (op_) {
325  case OP_AND:
326  internal = new Xapian::Internal::QueryAnd(n_subqueries);
327  break;
328  case OP_OR:
329  internal = new Xapian::Internal::QueryOr(n_subqueries);
330  break;
331  case OP_AND_NOT:
332  internal = new Xapian::Internal::QueryAndNot(n_subqueries);
333  break;
334  case OP_XOR:
335  internal = new Xapian::Internal::QueryXor(n_subqueries);
336  break;
337  case OP_AND_MAYBE:
338  internal = new Xapian::Internal::QueryAndMaybe(n_subqueries);
339  break;
340  case OP_FILTER:
341  internal = new Xapian::Internal::QueryFilter(n_subqueries);
342  break;
343  case OP_NEAR:
344  internal = new Xapian::Internal::QueryNear(n_subqueries,
345  parameter);
346  break;
347  case OP_PHRASE:
348  internal = new Xapian::Internal::QueryPhrase(n_subqueries,
349  parameter);
350  break;
351  case OP_ELITE_SET:
352  internal = new Xapian::Internal::QueryEliteSet(n_subqueries,
353  parameter);
354  break;
355  case OP_SYNONYM:
356  internal = new Xapian::Internal::QuerySynonym(n_subqueries);
357  break;
358  case OP_MAX:
359  internal = new Xapian::Internal::QueryMax(n_subqueries);
360  break;
361  default:
362  if (op_ == OP_INVALID && n_subqueries == 0) {
363  internal = new Xapian::Internal::QueryInvalid();
364  break;
365  }
366  throw InvalidArgumentError("op not valid with a list of subqueries");
367  }
368 }
369 
370 void
371 Query::add_subquery(bool positional, const Xapian::Query & subquery)
372 {
373  // We could handle this in a type-safe way, but we'd need to at least
374  // declare Xapian::Internal::QueryBranch in the API header, which seems
375  // less desirable than a static_cast<> here.
376  Xapian::Internal::QueryBranch * branch_query =
377  static_cast<Xapian::Internal::QueryBranch*>(internal.get());
378  Assert(branch_query);
379  if (positional) {
380  switch (subquery.get_type()) {
381  case LEAF_TERM:
382  break;
383  case LEAF_POSTING_SOURCE:
384  case LEAF_MATCH_ALL:
385  case LEAF_MATCH_NOTHING:
386  // None of these have positions, so positional operators won't
387  // match. Add MatchNothing as that is has special handling in
388  // AND-like queries to reduce the parent query to MatchNothing,
389  // which is appropriate in this case.
390  branch_query->add_subquery(MatchNothing);
391  return;
392  case OP_OR:
393  // OP_OR is now handled below OP_NEAR and OP_PHRASE.
394  break;
395  default:
396  throw Xapian::UnimplementedError("OP_NEAR and OP_PHRASE only currently support leaf subqueries");
397  }
398  }
399  branch_query->add_subquery(subquery);
400 }
401 
402 void
404 {
405  Xapian::Internal::QueryBranch * branch_query =
406  static_cast<Xapian::Internal::QueryBranch*>(internal.get());
407  if (branch_query)
408  internal = branch_query->done();
409 }
410 
411 }
This class stores a list of terms.
virtual Query::Internal * done()=0
virtual void add_subquery(const Xapian::Query &subquery)=0
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
static Query::Internal * unserialise(const char **p, const char *end, const Registry &reg)
Class representing a query.
Definition: query.h:45
const Query get_subquery(size_t n) const
Read a top level subquery.
Definition: query.cc:289
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Definition: query.cc:198
const TermIterator get_unique_terms_begin() const
Begin iterator for unique terms in the query object.
Definition: query.cc:223
Xapian::termcount get_leaf_wqf() const
Get the wqf parameter of a leaf node.
Definition: query.cc:295
std::string get_description() const
Return a string describing this object.
Definition: query.cc:307
void init(Query::op op_, size_t n_subqueries, Xapian::termcount window=0)
Definition: query.cc:317
op get_type() const noexcept
Get the type of the top level of the query.
Definition: query.cc:275
~Query()
Destructor.
Definition: query.h:357
size_t get_num_subqueries() const noexcept
Get the number of subqueries of the top level query.
Definition: query.cc:283
static const Query unserialise(std::string_view serialised, const Registry &reg=Registry())
Unserialise a string and return a Query object.
Definition: query.cc:265
op
Query operators.
Definition: query.h:78
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
Definition: query.h:166
@ LEAF_POSTING_SOURCE
Value returned by get_type() for a PostingSource.
Definition: query.h:283
@ OP_MAX
Pick the maximum weight of any subquery.
Definition: query.h:249
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
Definition: query.h:158
@ OP_WILDCARD
Wildcard expansion.
Definition: query.h:255
@ OP_XOR
Match documents which an odd number of subqueries match.
Definition: query.h:107
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:290
@ OP_NEAR
Match only documents where all subqueries match near each other.
Definition: query.h:140
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
@ OP_AND
Match only documents which all subqueries match.
Definition: query.h:84
@ LEAF_MATCH_NOTHING
Value returned by get_type() for MatchNothing or equivalent.
Definition: query.h:296
@ OP_OR
Match documents which at least one subquery matches.
Definition: query.h:92
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
Definition: query.h:152
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
Definition: query.h:231
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
Definition: query.h:99
@ OP_EDIT_DISTANCE
Edit distance expansion.
Definition: query.h:269
@ LEAF_TERM
Value returned by get_type() for a term.
Definition: query.h:280
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:223
@ OP_INVALID
Construct an invalid query.
Definition: query.h:277
void add_subquery(bool positional, const Xapian::Query &subquery)
Definition: query.cc:371
void done()
Definition: query.cc:403
std::string serialise() const
Serialise this object into a string.
Definition: query.cc:256
Xapian::termpos get_leaf_pos() const
Get the pos parameter of a leaf node.
Definition: query.cc:301
static const Xapian::Query MatchNothing
A query matching no documents.
Definition: query.h:64
bool empty() const noexcept
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:661
Xapian::termcount get_length() const noexcept
Return the length of this query object.
Definition: query.cc:250
@ WILDCARD_PATTERN_MULTI
Support * which matches 0 or more characters.
Definition: query.h:330
@ WILDCARD_LIMIT_MASK_
Definition: query.h:324
@ WILDCARD_PATTERN_SINGLE
Support ? which matches a single character.
Definition: query.h:336
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:48
Query() noexcept
Construct a query matching no documents.
Definition: query.h:354
Registry for user subclasses.
Definition: registry.h:47
Class for iterating over a list of terms.
Definition: termiterator.h:41
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
#define usual(COND)
Definition: config.h:608
#define rare(COND)
Definition: config.h:607
string term
PositionList * p
Xapian::termpos pos
Debug logging macros.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:480
Hierarchy of classes which Xapian can throw as exceptions.
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:277
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
Various assertion macros.
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
Xapian::Query API class.
Xapian::Query internals.
A vector-like container of terms which can be iterated.