xapian-core  2.0.0
externalpostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008-2026 Olly Betts
5  * Copyright 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "externalpostlist.h"
25 
26 #include <xapian/postingsource.h>
27 
28 #include "debuglog.h"
29 #include "estimateop.h"
30 #include "omassert.h"
31 
32 using namespace std;
33 
35  Xapian::PostingSource* source_,
36  EstimateOp* estimate_op,
37  double factor_,
38  bool* max_weight_cached_flag_ptr,
39  Xapian::doccount shard_index)
40  : factor(factor_)
41 {
42  Assert(source_);
43  Xapian::PostingSource* newsource = source_->clone();
44  if (newsource != NULL) {
45  source = newsource->release();
46  } else if (shard_index == 0) {
47  // Allow use of a non-clone-able PostingSource with a non-sharded
48  // Database.
49  source = source_;
50  } else {
51  throw Xapian::InvalidOperationError("PostingSource subclass must "
52  "implement clone() to support use "
53  "with a sharded database");
54  }
55  source->set_max_weight_cached_flag_ptr_(max_weight_cached_flag_ptr);
56  source->reset(db, shard_index);
58  if (estimate_op) {
60  termfreq,
62  }
63 }
64 
67 {
68  LOGCALL(MATCH, Xapian::docid, "ExternalPostList::get_docid", NO_ARGS);
69  Assert(current);
70  RETURN(current);
71 }
72 
73 double
76  Xapian::termcount) const
77 {
78  LOGCALL(MATCH, double, "ExternalPostList::get_weight", NO_ARGS);
79  Assert(source);
80  if (factor == 0.0) RETURN(factor);
82 }
83 
84 double
86 {
87  LOGCALL(MATCH, double, "ExternalPostList::recalc_maxweight", NO_ARGS);
88  // source will be NULL here if we've reached the end.
89  if (!source) RETURN(0.0);
90  if (factor == 0.0) RETURN(0.0);
92 }
93 
96 {
97  return NULL;
98 }
99 
100 PostList *
102  LOGCALL(MATCH, PostList *, "ExternalPostList::update_after_advance", NO_ARGS);
103  Assert(source);
104  if (source->at_end()) {
105  LOGLINE(MATCH, "ExternalPostList now at end");
106  source = NULL;
107  } else {
108  current = source->get_docid();
109  }
110  RETURN(NULL);
111 }
112 
113 PostList *
115 {
116  LOGCALL(MATCH, PostList *, "ExternalPostList::next", w_min);
117  Assert(source);
118  source->next(w_min);
120 }
121 
122 PostList *
124 {
125  LOGCALL(MATCH, PostList *, "ExternalPostList::skip_to", did | w_min);
126  Assert(source);
127  if (did <= current) RETURN(NULL);
128  source->skip_to(did, w_min);
130 }
131 
132 PostList *
133 ExternalPostList::check(Xapian::docid did, double w_min, bool &valid)
134 {
135  LOGCALL(MATCH, PostList *, "ExternalPostList::check", did | w_min | valid);
136  Assert(source);
137  if (did <= current) {
138  valid = true;
139  RETURN(NULL);
140  }
141  valid = source->check(did, w_min);
142  if (source->at_end()) {
143  LOGLINE(MATCH, "ExternalPostList now at end");
144  source = NULL;
145  } else {
146  current = valid ? source->get_docid() : current;
147  }
148  RETURN(NULL);
149 }
150 
151 bool
153 {
154  LOGCALL(MATCH, bool, "ExternalPostList::at_end", NO_ARGS);
155  RETURN(!source);
156 }
157 
160 {
161  return 1;
162 }
163 
164 string
166 {
167  string desc = "ExternalPostList(";
168  if (source) desc += source->get_description();
169  desc += ")";
170  return desc;
171 }
Class for estimating the total number of matching documents.
Definition: estimateop.h:64
void report_termfreqs(Xapian::doccount min_, Xapian::doccount est, Xapian::doccount max_)
Fill in estimates for POSTING_SOURCE.
Definition: estimateop.h:186
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
PostList * update_after_advance()
ExternalPostList(const ExternalPostList &)
Disallow copying.
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
PostList * check(Xapian::docid did, double w_min, bool &valid)
Check if the specified docid occurs in this postlist.
Xapian::docid current
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Xapian::Internal::opt_intrusive_ptr< Xapian::PostingSource > source
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Xapian::docid get_docid() const
Return the current docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
std::string get_description() const
Return a string description of this object.
An indexed database of documents.
Definition: database.h:75
Abstract base class for postlists.
Definition: postlist.h:40
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
Definition: postlist.h:52
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
virtual void skip_to(Xapian::docid did, double min_wt)
Advance to the specified docid.
virtual Xapian::doccount get_termfreq_est() const =0
An estimate of the number of documents this object can return.
void set_max_weight_cached_flag_ptr_(bool *flag_ptr)
Definition: postingsource.h:71
virtual void next(double min_wt)=0
Advance the current position to the next matching document.
virtual Xapian::doccount get_termfreq_max() const =0
An upper bound on the number of documents this object can return.
virtual bool check(Xapian::docid did, double min_wt)
Check if the specified docid occurs.
PostingSource * release()
Start reference counting this object.
virtual PostingSource * clone() const
Clone the posting source.
virtual Xapian::doccount get_termfreq_min() const =0
A lower bound on the number of documents this object can return.
virtual void reset(const Database &db, Xapian::doccount shard_index)
Set this PostingSource to the start of the list of postings.
virtual double get_weight() const
Return the weight contribution for the current document.
virtual Xapian::docid get_docid() const =0
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
virtual std::string get_description() const
Return a string describing this object.
double get_maxweight() const noexcept
Return the currently set upper bound on what get_weight() can return.
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGLINE(a, b)
Definition: debuglog.h:485
Calculated bounds on and estimate of number of matches.
Return document ids from an external source.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
External sources of posting information.