xapian-core  1.4.19
query.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2011,2012,2013,2015,2016,2017,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include "xapian/query.h"
24 #include "queryinternal.h"
25 
26 #include <algorithm>
27 
28 #include "debuglog.h"
29 #include "omassert.h"
30 #include "vectortermlist.h"
31 
32 #include "xapian/error.h"
33 
34 using namespace std;
35 
36 namespace Xapian {
37 
38 // Extra () are needed to resolve ambiguity with method declaration.
39 const Query Query::MatchAll((string()));
40 
41 const Query Query::MatchNothing;
42 
43 Query::Query(const string & term, Xapian::termcount wqf, Xapian::termpos pos)
44  : internal(new Xapian::Internal::QueryTerm(term, wqf, pos))
45 {
46  LOGCALL_CTOR(API, "Query", term | wqf | pos);
47 }
48 
50  : internal(new Xapian::Internal::QueryPostingSource(source))
51 {
52  LOGCALL_CTOR(API, "Query", source);
53 }
54 
55 Query::Query(double factor, const Xapian::Query & subquery)
56 {
57  LOGCALL_CTOR(API, "Query", factor | subquery);
58 
59  if (!subquery.empty())
60  internal = new Xapian::Internal::QueryScaleWeight(factor, subquery);
61 }
62 
63 Query::Query(op op_, const Xapian::Query & subquery, double factor)
64 {
65  LOGCALL_CTOR(API, "Query", op_ | subquery | factor);
66 
67  if (rare(op_ != OP_SCALE_WEIGHT))
68  throw Xapian::InvalidArgumentError("op must be OP_SCALE_WEIGHT");
69  // If the subquery is MatchNothing then generate Query() which matches
70  // nothing.
71  if (!subquery.internal.get()) return;
72  switch (subquery.internal->get_type()) {
73  case OP_VALUE_RANGE:
74  case OP_VALUE_GE:
75  case OP_VALUE_LE:
76  // These operators always return weight 0, so OP_SCALE_WEIGHT has
77  // no effect on them.
78  internal = subquery.internal;
79  return;
80  default:
81  break;
82  }
83  internal = new Xapian::Internal::QueryScaleWeight(factor, subquery);
84 }
85 
86 Query::Query(op op_, Xapian::valueno slot, const std::string & limit)
87 {
88  LOGCALL_CTOR(API, "Query", op_ | slot | limit);
89 
90  if (op_ == OP_VALUE_GE) {
91  if (limit.empty())
92  internal = new Xapian::Internal::QueryTerm();
93  else
94  internal = new Xapian::Internal::QueryValueGE(slot, limit);
95  } else if (usual(op_ == OP_VALUE_LE)) {
96  internal = new Xapian::Internal::QueryValueLE(slot, limit);
97  } else {
98  throw Xapian::InvalidArgumentError("op must be OP_VALUE_LE or OP_VALUE_GE");
99  }
100 }
101 
103  const std::string & begin, const std::string & end)
104 {
105  LOGCALL_CTOR(API, "Query", op_ | slot | begin | end);
106 
107  if (rare(op_ != OP_VALUE_RANGE))
108  throw Xapian::InvalidArgumentError("op must be OP_VALUE_RANGE");
109  // If begin > end then generate Query() which matches nothing.
110  if (begin.empty()) {
111  internal = new Xapian::Internal::QueryValueLE(slot, end);
112  } else if (usual(begin <= end)) {
113  internal = new Xapian::Internal::QueryValueRange(slot, begin, end);
114  }
115 }
116 
118  const std::string & pattern,
119  Xapian::termcount max_expansion,
120  int max_type,
121  op combiner)
122 {
123  LOGCALL_CTOR(API, "Query", op_ | pattern | max_expansion | max_type | combiner);
124  if (rare(op_ != OP_WILDCARD))
125  throw Xapian::InvalidArgumentError("op must be OP_WILDCARD");
126  if (rare(combiner != OP_SYNONYM && combiner != OP_MAX && combiner != OP_OR))
127  throw Xapian::InvalidArgumentError("combiner must be OP_SYNONYM or OP_MAX or OP_OR");
128  internal = new Xapian::Internal::QueryWildcard(pattern,
129  max_expansion,
130  max_type,
131  combiner);
132 }
133 
134 const TermIterator
136 {
137  if (!internal.get())
138  return TermIterator();
139 
140  vector<pair<Xapian::termpos, string>> terms;
141  internal->gather_terms(static_cast<void*>(&terms));
142  sort(terms.begin(), terms.end());
143 
144  vector<string> v;
145  const string * old_term = NULL;
146  Xapian::termpos old_pos = 0;
147  for (auto && i : terms) {
148  // Remove duplicates (same term at the same position).
149  if (old_term && old_pos == i.first && *old_term == i.second)
150  continue;
151 
152  v.push_back(i.second);
153  old_pos = i.first;
154  old_term = &(i.second);
155  }
156  return TermIterator(new VectorTermList(v.begin(), v.end()));
157 }
158 
159 const TermIterator
161 {
162  if (!internal.get())
163  return TermIterator();
164 
165  vector<pair<Xapian::termpos, string>> terms;
166  internal->gather_terms(static_cast<void*>(&terms));
167  sort(terms.begin(), terms.end(), [](
168  const pair<Xapian::termpos, string>& a,
169  const pair<Xapian::termpos, string>& b) {
170  return a.second < b.second;
171  });
172 
173  vector<string> v;
174  const string * old_term = NULL;
175  for (auto && i : terms) {
176  // Remove duplicate term names.
177  if (old_term && *old_term == i.second)
178  continue;
179 
180  v.push_back(i.second);
181  old_term = &(i.second);
182  }
183  return TermIterator(new VectorTermList(v.begin(), v.end()));
184 }
185 
188 {
189  return (internal.get() ? internal->get_length() : 0);
190 }
191 
192 string
194 {
195  string result;
196  if (internal.get())
197  internal->serialise(result);
198  return result;
199 }
200 
201 const Query
202 Query::unserialise(const string & s, const Registry & reg)
203 {
204  const char * p = s.data();
205  const char * end = p + s.size();
206  Query::Internal * q = Query::Internal::unserialise(&p, end, reg);
207  AssertEq(p, end);
208  return Query(q);
209 }
210 
213 {
214  if (!internal.get())
216  return internal->get_type();
217 }
218 
219 size_t
221 {
222  return internal.get() ? internal->get_num_subqueries() : 0;
223 }
224 
225 const Query
226 Query::get_subquery(size_t n) const
227 {
228  return internal->get_subquery(n);
229 }
230 
231 string
233 {
234  string desc = "Query(";
235  if (internal.get())
236  desc += internal->get_description();
237  desc += ")";
238  return desc;
239 }
240 
241 void
242 Query::init(op op_, size_t n_subqueries, Xapian::termcount parameter)
243 {
244  if (parameter > 0 &&
245  op_ != OP_NEAR && op_ != OP_PHRASE && op_ != OP_ELITE_SET)
246  throw InvalidArgumentError("parameter only valid with OP_NEAR, "
247  "OP_PHRASE or OP_ELITE_SET");
248 
249  switch (op_) {
250  case OP_AND:
251  internal = new Xapian::Internal::QueryAnd(n_subqueries);
252  break;
253  case OP_OR:
254  internal = new Xapian::Internal::QueryOr(n_subqueries);
255  break;
256  case OP_AND_NOT:
257  internal = new Xapian::Internal::QueryAndNot(n_subqueries);
258  break;
259  case OP_XOR:
260  internal = new Xapian::Internal::QueryXor(n_subqueries);
261  break;
262  case OP_AND_MAYBE:
263  internal = new Xapian::Internal::QueryAndMaybe(n_subqueries);
264  break;
265  case OP_FILTER:
266  internal = new Xapian::Internal::QueryFilter(n_subqueries);
267  break;
268  case OP_NEAR:
269  internal = new Xapian::Internal::QueryNear(n_subqueries,
270  parameter);
271  break;
272  case OP_PHRASE:
273  internal = new Xapian::Internal::QueryPhrase(n_subqueries,
274  parameter);
275  break;
276  case OP_ELITE_SET:
277  internal = new Xapian::Internal::QueryEliteSet(n_subqueries,
278  parameter);
279  break;
280  case OP_SYNONYM:
281  internal = new Xapian::Internal::QuerySynonym(n_subqueries);
282  break;
283  case OP_MAX:
284  internal = new Xapian::Internal::QueryMax(n_subqueries);
285  break;
286  default:
287  if (op_ == OP_INVALID && n_subqueries == 0) {
288  internal = new Xapian::Internal::QueryInvalid();
289  break;
290  }
291  throw InvalidArgumentError("op not valid with a list of subqueries");
292  }
293 }
294 
295 void
296 Query::add_subquery(bool positional, const Xapian::Query & subquery)
297 {
298  // We could handle this in a type-safe way, but we'd need to at least
299  // declare Xapian::Internal::QueryBranch in the API header, which seems
300  // less desirable than a static_cast<> here.
301  Xapian::Internal::QueryBranch * branch_query =
302  static_cast<Xapian::Internal::QueryBranch*>(internal.get());
303  Assert(branch_query);
304  if (positional) {
305  switch (subquery.get_type()) {
306  case LEAF_TERM:
307  break;
308  case LEAF_POSTING_SOURCE:
309  case LEAF_MATCH_ALL:
310  case LEAF_MATCH_NOTHING:
311  // None of these have positions, so positional operators won't
312  // match. Add MatchNothing as that is has special handling in
313  // AND-like queries to reduce the parent query to MatchNothing,
314  // which is appropriate in this case.
315  branch_query->add_subquery(MatchNothing);
316  return;
317  case OP_OR:
318  // OP_OR is now handled below OP_NEAR and OP_PHRASE.
319  break;
320  default:
321  throw Xapian::UnimplementedError("OP_NEAR and OP_PHRASE only currently support leaf subqueries");
322  }
323  }
324  branch_query->add_subquery(subquery);
325 }
326 
327 void
329 {
330  Xapian::Internal::QueryBranch * branch_query =
331  static_cast<Xapian::Internal::QueryBranch*>(internal.get());
332  if (branch_query)
333  internal = branch_query->done();
334 }
335 
336 }
Xapian::termcount get_length() const
Return the length of this query object.
Definition: query.cc:187
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
#define Assert(COND)
Definition: omassert.h:122
Wildcard expansion.
Definition: query.h:255
static const Query unserialise(const std::string &serialised, const Registry &reg=Registry())
Unserialise a string and return a Query object.
Definition: query.cc:202
const Query get_subquery(size_t n) const
Read a top level subquery.
Definition: query.cc:226
#define AssertEq(A, B)
Definition: omassert.h:124
Xapian::Query internals.
Match documents which an odd number of subqueries match.
Definition: query.h:107
void init(Query::op op_, size_t n_subqueries, Xapian::termcount window=0)
Definition: query.cc:242
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Definition: query.cc:135
#define usual(COND)
Definition: config.h:544
op
Query operators.
Definition: query.h:78
STL namespace.
Pick the maximum weight of any subquery.
Definition: query.h:249
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: query.h:49
A vector-like container of terms which can be iterated.
#define rare(COND)
Definition: config.h:543
const TermIterator get_unique_terms_begin() const
Begin iterator for unique terms in the query object.
Definition: query.cc:160
Xapian::Query API class.
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Pick the best N subqueries and combine with OP_OR.
Definition: query.h:215
Value returned by get_type() for MatchAll or equivalent.
Definition: query.h:276
Scale the weight contributed by a subquery.
Definition: query.h:166
Match only documents where all subqueries match near and in order.
Definition: query.h:152
Match the first subquery taking extra weight from other subqueries.
Definition: query.h:118
Value returned by get_type() for a PostingSource.
Definition: query.h:269
Registry for user subclasses.
Definition: registry.h:47
Match like OP_AND but only taking weight from the first subquery.
Definition: query.h:128
Query()
Construct a query matching no documents.
Definition: query.h:319
Match only documents where a value slot is >= a given value.
Definition: query.h:223
Match only documents where a value slot is within a given range.
Definition: query.h:158
Match only documents where a value slot is <= a given value.
Definition: query.h:231
std::string serialise() const
Serialise this object into a string.
Definition: query.cc:193
Construct an invalid query.
Definition: query.h:263
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:478
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
This class stores a list of terms.
Match only documents which all subqueries match.
Definition: query.h:84
virtual Query::Internal * done()=0
void add_subquery(bool positional, const Xapian::Query &subquery)
Definition: query.cc:296
size_t get_num_subqueries() const
Get the number of subqueries of the top level query.
Definition: query.cc:220
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
Match only documents where all subqueries match near each other.
Definition: query.h:140
static Query::Internal * unserialise(const char **p, const char *end, const Registry &reg)
Value returned by get_type() for a term.
Definition: query.h:266
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:524
Match documents which the first subquery matches but no others do.
Definition: query.h:99
Match documents which at least one subquery matches.
Definition: query.h:92
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
static const Xapian::Query MatchNothing
A query matching no documents.
Definition: query.h:65
op get_type() const
Get the type of the top level of the query.
Definition: query.cc:212
Various assertion macros.
Class representing a query.
Definition: query.h:46
virtual void add_subquery(const Xapian::Query &subquery)=0
#define XAPIAN_NOEXCEPT
Definition: attributes.h:39
void done()
Definition: query.cc:328
Debug logging macros.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Value returned by get_type() for MatchNothing or equivalent.
Definition: query.h:282