xapian-core  1.4.25
andmaybepostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2003,2004,2005,2008,2009,2011,2017 Olly Betts
7  * Copyright 2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 #include "andmaybepostlist.h"
27 
28 #include "debuglog.h"
29 #include "multiandpostlist.h"
30 #include "omassert.h"
31 
32 PostList *
34 {
35  LOGCALL(MATCH, PostList *, "AndMaybePostList::process_next_or_skip_to", w_min | ret);
36  handle_prune(l, ret);
37  if (l->at_end()) {
38  // once l is over, so is the AND MAYBE
39  lhead = 0;
40  RETURN(NULL);
41  }
42 
43  lhead = l->get_docid();
44  if (lhead <= rhead) RETURN(NULL);
45 
46  bool valid;
47  check_handling_prune(r, lhead, w_min - lmax, matcher, valid);
48  if (r->at_end()) {
49  PostList *tmp = l;
50  l = NULL;
51  RETURN(tmp);
52  }
53  if (valid) {
54  rhead = r->get_docid();
55  } else {
56  rhead = 0;
57  }
58  RETURN(NULL);
59 }
60 
61 PostList *
63 {
64  LOGCALL(MATCH, PostList *, "AndMaybePostList::sync_rhs", w_min);
65  bool valid;
66  check_handling_prune(r, lhead, w_min - lmax, matcher, valid);
67  if (r->at_end()) {
68  PostList *tmp = l;
69  l = NULL;
70  RETURN(tmp);
71  }
72  if (valid) {
73  rhead = r->get_docid();
74  } else {
75  rhead = 0;
76  }
77  RETURN(NULL);
78 }
79 
80 PostList *
82 {
83  LOGCALL(MATCH, PostList *, "AndMaybePostList::next", w_min);
84  if (w_min > lmax) {
85  // we can replace the AND MAYBE with an AND
86  PostList *ret;
87  LOGLINE(MATCH, "AND MAYBE -> AND");
88  ret = new MultiAndPostList(l, r, lmax, rmax, matcher, dbsize);
89  l = r = NULL;
90  skip_to_handling_prune(ret, std::max(lhead, rhead) + 1, w_min, matcher);
91  RETURN(ret);
92  }
93  RETURN(process_next_or_skip_to(w_min, l->next(w_min - rmax)));
94 }
95 
96 PostList *
98 {
99  LOGCALL(MATCH, PostList *, "AndMaybePostList::skip_to", did | w_min);
100  if (w_min > lmax) {
101  // we can replace the AND MAYBE with an AND
102  PostList *ret;
103  LOGLINE(MATCH, "AND MAYBE -> AND (in skip_to)");
104  ret = new MultiAndPostList(l, r, lmax, rmax, matcher, dbsize);
105  did = std::max(did, std::max(lhead, rhead));
106  l = r = NULL;
107  skip_to_handling_prune(ret, did, w_min, matcher);
108  RETURN(ret);
109  }
110 
111  // exit if we're already past the skip point (or at it)
112  if (did <= lhead) RETURN(NULL);
113 
114  RETURN(process_next_or_skip_to(w_min, l->skip_to(did, w_min - rmax)));
115 }
116 
119 {
120  LOGCALL(MATCH, Xapian::doccount, "AndMaybePostList::get_termfreq_max", NO_ARGS);
121  // Termfreq is exactly that of left hand branch.
123 }
124 
127 {
128  LOGCALL(MATCH, Xapian::doccount, "AndMaybePostList::get_termfreq_min", NO_ARGS);
129  // Termfreq is exactly that of left hand branch.
131 }
132 
135 {
136  LOGCALL(MATCH, Xapian::doccount, "AndMaybePostList::get_termfreq_est", NO_ARGS);
137  // Termfreq is exactly that of left hand branch.
139 }
140 
141 TermFreqs
143  const Xapian::Weight::Internal & stats) const
144 {
145  LOGCALL(MATCH, TermFreqs, "AndMaybePostList::get_termfreq_est_using_stats", stats);
146  // Termfreq is exactly that of left hand branch.
148 }
149 
152 {
153  LOGCALL(MATCH, Xapian::docid, "AndMaybePostList::get_docid", NO_ARGS);
154  Assert(lhead != 0); // check we've started
155  RETURN(lhead);
156 }
157 
158 // only called if we are doing a probabilistic AND MAYBE
159 double
161 {
162  LOGCALL(MATCH, double, "AndMaybePostList::get_weight", NO_ARGS);
163  Assert(lhead != 0); // check we've started
164  if (lhead == rhead) RETURN(l->get_weight() + r->get_weight());
165  RETURN(l->get_weight());
166 }
167 
168 // only called if we are doing a probabilistic operation
169 double
171 {
172  LOGCALL(MATCH, double, "AndMaybePostList::get_maxweight", NO_ARGS);
173  RETURN(lmax + rmax);
174 }
175 
176 double
178 {
179  LOGCALL(MATCH, double, "AndMaybePostList::recalc_maxweight", NO_ARGS);
180  lmax = l->recalc_maxweight();
181  rmax = r->recalc_maxweight();
183 }
184 
185 bool
187 {
188  LOGCALL(MATCH, bool, "AndMaybePostList::at_end", NO_ARGS);
189  RETURN(lhead == 0);
190 }
191 
192 std::string
194 {
195  return "(" + l->get_description() + " AndMaybe " + r->get_description() +
196  ")";
197 }
198 
201 {
202  LOGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_doclength", NO_ARGS);
203  Assert(lhead != 0); // check we've started
205  RETURN(l->get_doclength());
206 }
207 
210 {
211  LOGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_unique_terms", NO_ARGS);
212  Assert(lhead != 0); // check we've started
215 }
216 
219 {
220  LOGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_wdf", NO_ARGS);
221  if (lhead == rhead) RETURN(l->get_wdf() + r->get_wdf());
222  RETURN(l->get_wdf());
223 }
224 
227 {
228  LOGCALL(MATCH, Xapian::termcount, "AndMaybePostList::count_matching_subqs", NO_ARGS);
229  if (lhead == rhead)
232 }
233 
234 void
236 {
237  l->gather_position_lists(orposlist);
238  if (lhead == rhead) r->gather_position_lists(orposlist);
239 }
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
MultiMatch * matcher
The object which is using this postlist to perform a match.
Abstract base class for postlists.
Definition: postlist.h:37
#define AssertEq(A, B)
Definition: omassert.h:124
Merged postlist: items from one list, weights from both.
virtual Xapian::docid get_docid() const =0
Return the current docid.
PostList * l
Left sub-postlist.
virtual void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
virtual Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
virtual Internal * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual double recalc_maxweight()=0
Recalculate the upper bound on what get_weight() can return.
Xapian::doccount get_termfreq_min() const
Get a lower bound on the number of documents indexed by this term.
PostList * sync_rhs(double w_min)
Synchronise the RHS to the LHS after construction.
virtual Xapian::termcount get_unique_terms() const =0
Return the number of unique terms in the current document.
virtual Xapian::termcount get_doclength() const
Return the document length of the document the current term comes from.
bool skip_to_handling_prune(PostList *&pl, Xapian::docid did, double w_min, MultiMatch *matcher)
void handle_prune(PostList *&kid, PostList *ret)
Utility method, to call recalc_maxweight() and do the pruning if a next() or skip_to() returns non-NU...
void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::docid lhead
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
std::string get_description() const
Return a string description of this object.
virtual TermFreqs get_termfreq_est_using_stats(const Xapian::Weight::Internal &stats) const
Get an estimate for the termfreq and reltermfreq, given the stats.
Definition: postlist.cc:36
virtual Xapian::doccount get_termfreq_max() const =0
Get an upper bound on the number of documents indexed by this term.
virtual Xapian::doccount get_termfreq_est() const =0
Get an estimate of the number of documents indexed by this term.
Xapian::termcount get_wdf() const
get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the sub postlists which are at the cu...
virtual Xapian::doccount get_termfreq_min() const =0
Get a lower bound on the number of documents indexed by this term.
Class to hold statistics for a given collection.
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
N-way AND postlist.
Xapian::doccount dbsize
PostList * process_next_or_skip_to(double w_min, PostList *ret)
bool check_handling_prune(PostList *&pl, Xapian::docid did, double w_min, MultiMatch *matcher, bool &valid)
double get_weight() const
Return the weight contribution for the current position.
TermFreqs get_termfreq_est_using_stats(const Xapian::Weight::Internal &stats) const
Get an estimate for the termfreq and reltermfreq, given the stats.
Xapian::doccount get_termfreq_est() const
Get an estimate of the number of documents indexed by this term.
double get_maxweight() const
Return an upper bound on what get_weight() can return.
The frequencies for a term.
virtual Internal * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::termcount get_doclength() const =0
Return the length of current document.
virtual std::string get_description() const =0
Return a string description of this object.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
PostList * r
Right sub-postlist.
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
virtual Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
N-way AND postlist.
Xapian::docid get_docid() const
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
Various assertion macros.
#define LOGLINE(a, b)
Definition: debuglog.h:494
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
bool at_end() const
Return true if the current position is past the last entry in this list.
virtual Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Definition: postlist.cc:44
virtual double get_weight() const =0
Return the weight contribution for the current position.
Xapian::doccount get_termfreq_max() const
Get an upper bound on the number of documents indexed by this term.
Debug logging macros.
Xapian::docid rhead
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487