xapian-core  1.4.21
mergepostlist.cc
Go to the documentation of this file.
1 /* mergepostlist.cc: merge postlists from different databases
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2006,2008,2009,2011,2015,2016 Olly Betts
6  * Copyright 2007,2009 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 #include "mergepostlist.h"
26 
27 #include "multimatch.h"
28 #include "api/emptypostlist.h"
29 #include "branchpostlist.h"
30 #include "debuglog.h"
31 #include "omassert.h"
32 #include "valuestreamdocument.h"
33 
34 #include "xapian/error.h"
35 
36 // NB don't prune - even with one sublist we still translate docids...
37 
39 {
40  LOGCALL_DTOR(MATCH, "MergePostList");
41  std::vector<PostList *>::const_iterator i;
42  for (i = plists.begin(); i != plists.end(); ++i) {
43  delete *i;
44  }
45 }
46 
47 PostList *
48 MergePostList::next(double w_min)
49 {
50  LOGCALL(MATCH, PostList *, "MergePostList::next", w_min);
51  LOGVALUE(MATCH, current);
52  if (current == -1) current = 0;
53  while (true) {
54  // FIXME: should skip over Remote matchers which aren't ready yet
55  // and come back to them later...
57  if (!plists[current]->at_end()) break;
58  ++current;
59  if (unsigned(current) >= plists.size()) break;
60  vsdoc.new_subdb(current);
62  }
63  LOGVALUE(MATCH, current);
64  RETURN(NULL);
65 }
66 
67 PostList *
69 {
70  LOGCALL(MATCH, PostList *, "MergePostList::skip_to", did | w_min);
71  (void)did;
72  (void)w_min;
73  // MergePostList doesn't return documents in docid order, so skip_to
74  // isn't a meaningful operation.
75  throw Xapian::InvalidOperationError("MergePostList doesn't support skip_to");
76 }
77 
80 {
81  LOGCALL(MATCH, Xapian::termcount, "MergePostList::get_wdf", NO_ARGS);
83 }
84 
87 {
88  LOGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_max", NO_ARGS);
89  // sum of termfreqs for all children
90  Xapian::doccount total = 0;
91  vector<PostList *>::const_iterator i;
92  for (i = plists.begin(); i != plists.end(); ++i) {
93  total += (*i)->get_termfreq_max();
94  }
95  RETURN(total);
96 }
97 
100 {
101  LOGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_min", NO_ARGS);
102  // sum of termfreqs for all children
103  Xapian::doccount total = 0;
104  vector<PostList *>::const_iterator i;
105  for (i = plists.begin(); i != plists.end(); ++i) {
106  total += (*i)->get_termfreq_min();
107  }
108  RETURN(total);
109 }
110 
113 {
114  LOGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_est", NO_ARGS);
115  // sum of termfreqs for all children
116  Xapian::doccount total = 0;
117  vector<PostList *>::const_iterator i;
118  for (i = plists.begin(); i != plists.end(); ++i) {
119  total += (*i)->get_termfreq_est();
120  }
121  RETURN(total);
122 }
123 
126 {
127  LOGCALL(MATCH, Xapian::docid, "MergePostList::get_docid", NO_ARGS);
128  Assert(current != -1);
129  // FIXME: this needs fixing so we can prune plists - see MultiPostlist
130  // for code which does this...
131  RETURN((plists[current]->get_docid() - 1) * plists.size() + current + 1);
132 }
133 
134 double
136 {
137  LOGCALL(MATCH, double, "MergePostList::get_weight", NO_ARGS);
138  Assert(current != -1);
140 }
141 
142 const string *
144 {
145  LOGCALL(MATCH, const string *, "MergePostList::get_sort_key", NO_ARGS);
146  Assert(current != -1);
148 }
149 
150 const string *
152 {
153  LOGCALL(MATCH, const string *, "MergePostList::get_collapse_key", NO_ARGS);
154  Assert(current != -1);
156 }
157 
158 double
160 {
161  LOGCALL(MATCH, double, "MergePostList::get_maxweight", NO_ARGS);
162  RETURN(w_max);
163 }
164 
165 double
167 {
168  LOGCALL(MATCH, double, "MergePostList::recalc_maxweight", NO_ARGS);
169  w_max = 0;
170  vector<PostList *>::iterator i;
171  for (i = plists.begin(); i != plists.end(); ++i) {
172  double w = (*i)->recalc_maxweight();
173  if (w > w_max) w_max = w;
174  }
175  RETURN(w_max);
176 }
177 
178 bool
180 {
181  LOGCALL(MATCH, bool, "MergePostList::at_end", NO_ARGS);
182  Assert(current != -1);
183  RETURN(unsigned(current) >= plists.size());
184 }
185 
186 string
188 {
189  string desc = "( Merge ";
190  vector<PostList *>::const_iterator i;
191  for (i = plists.begin(); i != plists.end(); ++i) {
192  desc += (*i)->get_description() + " ";
193  }
194  return desc + ")";
195 }
196 
199 {
200  LOGCALL(MATCH, Xapian::termcount, "MergePostList::get_doclength", NO_ARGS);
201  Assert(current != -1);
203 }
204 
207 {
208  LOGCALL(MATCH, Xapian::termcount, "MergePostList::get_unique_terms", NO_ARGS);
209  Assert(current != -1);
211 }
212 
215 {
216  LOGCALL(MATCH, Xapian::termcount, "MergePostList::count_matching_subqs", NO_ARGS);
218 }
#define RETURN(A)
Definition: debuglog.h:482
#define Assert(COND)
Definition: omassert.h:122
Abstract base class for postlists.
Definition: postlist.h:37
string get_description() const
Return a string description of this object.
class for performing a match
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
A PostList which contains no entries.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:479
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
const string * get_sort_key() const
double get_weight() const
Return the weight contribution for the current position.
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
virtual base class for branched types of postlist
#define LOGVALUE(a, b)
Definition: debuglog.h:484
merge postlists from different databases
Xapian::doccount get_termfreq_max() const
Get an upper bound on the number of documents indexed by this term.
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
const string * get_collapse_key() const
If the collapse key is already known, return it.
Xapian::doccount get_termfreq_min() const
Get a lower bound on the number of documents indexed by this term.
void recalc_maxweight()
Called by postlists to indicate that they&#39;ve rearranged themselves and the maxweight now possible is ...
Definition: multimatch.h:136
virtual Xapian::termcount get_unique_terms() const
Return the number of unique terms in the document.
A document which gets its values from a ValueStreamManager.
Xapian::docid get_docid() const
Return the current docid.
Xapian::doccount get_termfreq_est() const
Get an estimate of the number of documents indexed by this term.
bool next_handling_prune(PostList *&pl, double w_min, MultiMatch *matcher)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
ValueStreamDocument & vsdoc
Document proxy used for valuestream caching.
Definition: mergepostlist.h:60
virtual Xapian::termcount get_doclength() const
Return the document length of the document the current term comes from.
double get_maxweight() const
Return an upper bound on what get_weight() can return.
Various assertion macros.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
vector< PostList * > plists
Definition: mergepostlist.h:43
bool at_end() const
Return true if the current position is past the last entry in this list.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
MultiMatch * matcher
The object which is using this postlist to perform a match.
Definition: mergepostlist.h:52