xapian-core  1.4.26
postingsource.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008-2022 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  * Copyright (C) 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include <config.h>
24 
25 // We need to be able to set deprecated members of ValuePostingSource.
26 #define XAPIAN_DEPRECATED(X) X
27 #include "xapian/postingsource.h"
28 
29 #include "autoptr.h"
30 
31 #include "backends/database.h"
32 #include "backends/document.h"
33 #include "matcher/multimatch.h"
34 
35 #include "xapian/document.h"
36 #include "xapian/error.h"
37 #include "xapian/queryparser.h" // For sortable_unserialise().
38 
39 #include "omassert.h"
40 #include "net/length.h"
41 #include "serialise-double.h"
42 #include "str.h"
43 
44 #include <cfloat>
45 
46 using namespace std;
47 
48 namespace Xapian {
49 
50 PostingSource::~PostingSource() { }
51 
52 void
53 PostingSource::set_maxweight(double max_weight)
54 {
55  if (usual(matcher_)) {
56  MultiMatch * multimatch = static_cast<MultiMatch*>(matcher_);
57  multimatch->recalc_maxweight();
58  }
59  max_weight_ = max_weight;
60 }
61 
62 double
63 PostingSource::get_weight() const
64 {
65  return 0;
66 }
67 
68 void
69 PostingSource::skip_to(Xapian::docid did, double min_wt)
70 {
71  while (!at_end() && get_docid() < did) {
72  next(min_wt);
73  }
74 }
75 
76 bool
77 PostingSource::check(Xapian::docid did, double min_wt)
78 {
79  skip_to(did, min_wt);
80  return true;
81 }
82 
84 PostingSource::clone() const
85 {
86  return NULL;
87 }
88 
89 string
91 {
92  return string();
93 }
94 
95 string
96 PostingSource::serialise() const
97 {
98  throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
99 }
100 
102 PostingSource::unserialise(const string &) const
103 {
104  throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
105 }
106 
108 PostingSource::unserialise_with_registry(const std::string &s,
109  const Registry &) const
110 {
111  return unserialise(s);
112 }
113 
114 string
115 PostingSource::get_description() const
116 {
117  return "Xapian::PostingSource subclass";
118 }
119 
120 
121 ValuePostingSource::ValuePostingSource(Xapian::valueno slot_)
122  : real_slot(slot_),
123  db(real_db),
124  slot(real_slot),
125  value_it(real_value_it),
126  started(real_started),
127  termfreq_min(real_termfreq_min),
128  termfreq_est(real_termfreq_est),
129  termfreq_max(real_termfreq_max)
130 {
131 }
132 
135 {
136  return real_termfreq_min;
137 }
138 
141 {
142  return real_termfreq_est;
143 }
144 
147 {
148  return real_termfreq_max;
149 }
150 
151 void
153 {
154  if (!real_started) {
155  real_started = true;
157  } else {
158  ++real_value_it;
159  }
160 
162 
163  if (min_wt > get_maxweight()) {
165  return;
166  }
167 }
168 
169 void
171 {
172  if (!real_started) {
173  real_started = true;
175 
177  }
178 
179  if (min_wt > get_maxweight()) {
181  return;
182  }
183  real_value_it.skip_to(min_docid);
184 }
185 
186 bool
187 ValuePostingSource::check(Xapian::docid min_docid, double min_wt)
188 {
189  if (!real_started) {
190  real_started = true;
192 
193  if (real_value_it == real_db.valuestream_end(real_slot)) return true;
194  }
195 
196  if (min_wt > get_maxweight()) {
198  return true;
199  }
200  return real_value_it.check(min_docid);
201 }
202 
203 bool
205 {
207 }
208 
211 {
212  return real_value_it.get_docid();
213 }
214 
215 void
217 {
218  real_db = db_;
219  real_started = false;
220  set_maxweight(DBL_MAX);
224 }
225 
226 
228  : ValuePostingSource(slot_)
229 {
230 }
231 
232 double
234 {
235  Assert(!at_end());
236  Assert(get_started());
238 }
239 
242 {
243  return new ValueWeightPostingSource(get_slot());
244 }
245 
246 string
248 {
249  return string("Xapian::ValueWeightPostingSource");
250 }
251 
252 string
254 {
255  return encode_length(get_slot());
256 }
257 
260 {
261  const char * p = s.data();
262  const char * end = p + s.size();
263 
264  Xapian::valueno new_slot;
265  decode_length(&p, end, new_slot);
266  if (p != end) {
267  throw Xapian::NetworkError("Bad serialised ValueWeightPostingSource - junk at end");
268  }
269 
270  return new ValueWeightPostingSource(new_slot);
271 }
272 
273 void
275 {
277 
278  string upper_bound = get_database().get_value_upper_bound(get_slot());
279  if (upper_bound.empty()) {
280  // This should only happen if there are no entries, in which case the
281  // maxweight is 0.
282  set_maxweight(0.0);
283  } else {
284  set_maxweight(sortable_unserialise(upper_bound));
285  }
286 }
287 
288 string
290 {
291  string desc("Xapian::ValueWeightPostingSource(slot=");
292  desc += str(get_slot());
293  desc += ")";
294  return desc;
295 }
296 
297 
299  : ValuePostingSource(slot_),
300  default_weight(0.0),
301  max_weight_in_map(0.0)
302 {
303 }
304 
305 void
306 ValueMapPostingSource::add_mapping(const string & key, double wt)
307 {
308  weight_map[key] = wt;
310 }
311 
312 void
314 {
315  weight_map.clear();
316  max_weight_in_map = 0.0;
317 }
318 
319 void
321 {
322  default_weight = wt;
323 }
324 
325 double
327 {
328  map<string, double>::const_iterator wit = weight_map.find(get_value());
329  if (wit == weight_map.end()) {
330  return default_weight;
331  }
332  return wit->second;
333 }
334 
337 {
338  AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(get_slot()));
339  map<string, double>::const_iterator i;
340  for (i = weight_map.begin(); i != weight_map.end(); ++i) {
341  res->add_mapping(i->first, i->second);
342  }
343  res->set_default_weight(default_weight);
344  return res.release();
345 }
346 
347 string
349 {
350  return string("Xapian::ValueMapPostingSource");
351 }
352 
353 string
355 {
356  string result = encode_length(get_slot());
357  result += serialise_double(default_weight);
358 
359  map<string, double>::const_iterator i;
360  for (i = weight_map.begin(); i != weight_map.end(); ++i) {
361  result.append(encode_length(i->first.size()));
362  result.append(i->first);
363  result.append(serialise_double(i->second));
364  }
365 
366  return result;
367 }
368 
371 {
372  const char * p = s.data();
373  const char * end = p + s.size();
374 
375  Xapian::valueno new_slot;
376  decode_length(&p, end, new_slot);
377  AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(new_slot));
378  res->set_default_weight(unserialise_double(&p, end));
379  while (p != end) {
380  size_t keylen;
381  decode_length_and_check(&p, end, keylen);
382  string key(p, keylen);
383  p += keylen;
384  res->add_mapping(key, unserialise_double(&p, end));
385  }
386  return res.release();
387 }
388 
389 void
391 {
394 }
395 
396 string
398 {
399  string desc("Xapian::ValueMapPostingSource(slot=");
400  desc += str(get_slot());
401  desc += ")";
402  return desc;
403 }
404 
406  : started(false)
407 {
408  // The weight is fixed at wt, so that's the maxweight too. So just store wt
409  // as the maxweight and we can read it from there when we need it.
410  set_maxweight(wt);
411 }
412 
415 {
416  return termfreq;
417 }
418 
421 {
422  return termfreq;
423 }
424 
427 {
428  return termfreq;
429 }
430 
431 double
433 {
434  return get_maxweight();
435 }
436 
437 void
439 {
440  if (!started) {
441  started = true;
442  it = db.postlist_begin(string());
443  } else {
444  ++it;
445  }
446 
447  if (it == db.postlist_end(string())) return;
448 
449  if (check_docid) {
450  it.skip_to(check_docid + 1);
451  check_docid = 0;
452  }
453 
454  if (min_wt > get_maxweight()) {
455  it = db.postlist_end(string());
456  }
457 }
458 
459 void
461 {
462  if (!started) {
463  started = true;
464  it = db.postlist_begin(string());
465 
466  if (it == db.postlist_end(string())) return;
467  }
468 
469  if (check_docid) {
470  if (min_docid < check_docid)
471  min_docid = check_docid + 1;
472  check_docid = 0;
473  }
474 
475  if (min_wt > get_maxweight()) {
476  it = db.postlist_end(string());
477  return;
478  }
479  it.skip_to(min_docid);
480 }
481 
482 bool
484 {
485  // We're guaranteed not to be called if the document doesn't
486  // exist, so just remember the docid passed, and return true.
487  check_docid = min_docid;
488  return true;
489 }
490 
491 bool
493 {
494  if (check_docid != 0) return false;
495  return started && it == db.postlist_end(string());
496 }
497 
500 {
501  if (check_docid != 0) return check_docid;
502  return *it;
503 }
504 
507 {
509 }
510 
511 string
513 {
514  return string("Xapian::FixedWeightPostingSource");
515 }
516 
517 string
519 {
521 }
522 
525 {
526  const char * p = s.data();
527  const char * s_end = p + s.size();
528  double new_wt = unserialise_double(&p, s_end);
529  if (p != s_end) {
530  throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
531  }
532  return new FixedWeightPostingSource(new_wt);
533 }
534 
535 void
537 {
538  db = db_;
539  termfreq = db_.get_doccount();
540  started = false;
541  check_docid = 0;
542 }
543 
544 string
546 {
547  string desc("Xapian::FixedWeightPostingSource(wt=");
548  desc += str(get_maxweight());
549  desc += ")";
550  return desc;
551 }
552 
553 }
FixedWeightPostingSource * unserialise(const std::string &serialised) const
Create object given string serialisation returned by serialise().
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
#define Assert(COND)
Definition: omassert.h:122
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
ValueMapPostingSource * clone() const
Clone the posting source.
length encoded as a string
double max_weight_in_map
The maximum weight in weight_map.
std::string serialise() const
Serialise object parameters into a string.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
This class is used to access a database, or a group of databases.
Definition: database.h:68
ValueMapPostingSource * unserialise(const std::string &serialised) const
Create object given string serialisation returned by serialise().
void set_maxweight(double max_weight)
Specify an upper bound on what get_weight() will return from now on.
class for performing a match
Xapian::PostingIterator it
Iterator over all documents.
std::string get_description() const
Return a string describing this object.
Xapian::docid get_docid() const
Return the current docid.
A posting source which looks up weights in a map using values as the key.
#define usual(COND)
Definition: config.h:576
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
double get_weight() const
Return the weight contribution for the current document.
std::string name() const
Name of the posting source class.
bool at_end() const
Return true if the current position is past the last entry in this list.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
External sources of posting information.
ValueWeightPostingSource(Xapian::valueno slot_)
Construct a ValueWeightPostingSource.
bool & started
Flag indicating if we&#39;ve started (true if we have).
std::string serialise() const
Serialise object parameters into a string.
STL namespace.
Convert types to std::string.
double get_weight() const
Return the weight contribution for the current document.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
Definition: omdatabase.cc:386
std::string encode_length(T len)
Encode a length as a variable-length string.
Definition: length.h:36
#define false
Definition: header.h:9
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
void clear_mappings()
Clear all mappings.
std::string serialise() const
Serialise object parameters into a string.
Hierarchy of classes which Xapian can throw as exceptions.
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
functions to serialise and unserialise a double
std::string get_description() const
Return a string describing this object.
std::string get_value() const
Read current value.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
ValueIterator valuestream_end(Xapian::valueno) const
Return end iterator corresponding to valuestream_begin().
Definition: database.h:363
ValueMapPostingSource(Xapian::valueno slot_)
Construct a ValueMapPostingSource.
bool get_started() const
Flag indicating if we&#39;ve started (true if we have).
Xapian::doccount get_termfreq_est() const
An estimate of the number of documents this object can return.
Registry for user subclasses.
Definition: registry.h:47
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Xapian::doccount termfreq
Number of documents in the posting source.
Xapian::doccount real_termfreq_min
A posting source which returns a fixed weight for all documents.
bool at_end() const
Return true if the current position is past the last entry in this list.
void recalc_maxweight()
Called by postlists to indicate that they&#39;ve rearranged themselves and the maxweight now possible is ...
Definition: multimatch.h:136
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
ValueWeightPostingSource * clone() const
Clone the posting source.
bool started
Flag indicating if we&#39;ve started (true if we have).
double get_weight() const
Return the weight contribution for the current document.
std::string name() const
Name of the posting source class.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
std::map< std::string, double > weight_map
The value -> weight map.
FixedWeightPostingSource * clone() const
Clone the posting source.
ValueIterator valuestream_begin(Xapian::valueno slot) const
Return an iterator over the value in slot slot for each document.
Definition: omdatabase.cc:450
A posting source which generates weights from a value slot.
double sortable_unserialise(const std::string &serialised)
Convert a string encoded using sortable_serialise back to a floating point number.
bool check(Xapian::docid docid)
Check if the specified docid occurs.
Xapian::ValueIterator real_value_it
A posting source which reads weights from a value slot.
Xapian::doccount get_termfreq_min() const
A lower bound on the number of documents this object can return.
void set_default_weight(double wt)
Set a default weight for document values not in the map.
std::string serialise_double(double v)
Serialise a double to a string.
void decode_length_and_check(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:112
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
void add_mapping(const std::string &key, double wt)
Add a mapping.
FixedWeightPostingSource(double wt)
Construct a FixedWeightPostingSource.
Indicates a problem communicating with a remote database.
Definition: error.h:803
Xapian::docid get_docid() const
Return the current docid.
void skip_to(Xapian::docid did)
Advance the iterator to document did.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian::doccount real_termfreq_est
Xapian::docid get_docid() const
Return the docid at the current position.
Xapian::doccount get_termfreq_max() const
An upper bound on the number of documents this object can return.
Xapian::Database db
The database we&#39;re reading documents from.
double get_maxweight() const
Return the currently set upper bound on what get_weight() can return.
std::string get_description() const
Return a string describing this object.
double default_weight
The default weight.
void next(double min_wt)
Advance the current position to the next matching document.
Various assertion macros.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
API for working with documents.
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
std::string name() const
Name of the posting source class.
Xapian::doccount real_termfreq_max
Xapian::docid check_docid
The docid last passed to check() (0 if check() wasn&#39;t the last move).
Xapian::Database real_db
void decode_length(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:94
ValueWeightPostingSource * unserialise(const std::string &serialised) const
Create object given string serialisation returned by serialise().
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Definition: omdatabase.cc:355
Wrapper around standard unique_ptr template.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
parsing a user query string to build a Xapian::Query object
Xapian::valueno get_slot() const
The slot we&#39;re reading values from.
void skip_to(Xapian::docid docid_or_slot)
Advance the iterator to document id or value slot docid_or_slot.
Xapian::Database get_database() const
The database we&#39;re reading values from.
void next(double min_wt)
Advance the current position to the next matching document.