xapian-core  2.0.0
maxpostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2010,2011,2012,2013,2014,2017 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "maxpostlist.h"
25 
26 #include "debuglog.h"
27 #include "omassert.h"
28 
29 using namespace std;
30 
32 {
33  if (plist) {
34  for (size_t i = 0; i < n_kids; ++i) {
35  delete plist[i];
36  }
37  delete [] plist;
38  }
39 }
40 
43 {
44  return did;
45 }
46 
47 double
49  Xapian::termcount unique_terms,
50  Xapian::termcount wdfdocmax) const
51 {
52  Assert(did);
53  double res = 0.0;
54  for (size_t i = 0; i < n_kids; ++i) {
55  if (plist[i]->get_docid() == did)
56  res = max(res, plist[i]->get_weight(doclen,
57  unique_terms,
58  wdfdocmax));
59  }
60  return res;
61 }
62 
63 bool
65 {
66  return (did == 0);
67 }
68 
69 double
71 {
72  double result = plist[0]->recalc_maxweight();
73  for (size_t i = 1; i < n_kids; ++i) {
74  result = max(result, plist[i]->recalc_maxweight());
75  }
76  return result;
77 }
78 
79 PostList *
80 MaxPostList::next(double w_min)
81 {
82  Xapian::docid old_did = did;
83  did = 0;
84  for (size_t i = 0; i < n_kids; UNSIGNED_OVERFLOW_OK(++i)) {
85  Xapian::docid cur_did = 0;
86  if (old_did != 0)
87  cur_did = plist[i]->get_docid();
88  if (cur_did <= old_did) {
89  PostList * res;
90  if (old_did == 0 || cur_did == old_did) {
91  res = plist[i]->next(w_min);
92  } else {
93  res = plist[i]->skip_to(old_did + 1, w_min);
94  }
95  if (res) {
96  delete plist[i];
97  plist[i] = res;
98  }
99 
100  if (plist[i]->at_end()) {
101  // erase_sublist(i) shuffles down i+1, etc down one index, so
102  // the next sublist to deal with is also at index i, unless
103  // this was the last index. We deal with this by decrementing
104  // i here and it'll be incremented by the loop, but this may
105  // underflow (which is OK because i is an unsigned type).
106  erase_sublist(UNSIGNED_OVERFLOW_OK(i--));
107  continue;
108  }
109 
110  if (res)
111  matcher->force_recalc();
112 
113  cur_did = plist[i]->get_docid();
114  }
115 
116  if (did == 0 || cur_did < did) {
117  did = cur_did;
118  }
119  }
120 
121  if (n_kids == 1) {
122  n_kids = 0;
123  return plist[0];
124  }
125 
126  return NULL;
127 }
128 
129 PostList *
130 MaxPostList::skip_to(Xapian::docid did_min, double w_min)
131 {
132  Xapian::docid old_did = did;
133  did = 0;
134  for (size_t i = 0; i < n_kids; ++i) {
135  Xapian::docid cur_did = 0;
136  if (old_did != 0)
137  cur_did = plist[i]->get_docid();
138  if (cur_did < did_min) {
139  PostList * res = plist[i]->skip_to(did_min, w_min);
140  if (res) {
141  delete plist[i];
142  plist[i] = res;
143  }
144 
145  if (plist[i]->at_end()) {
146  erase_sublist(i--);
147  continue;
148  }
149 
150  if (res)
151  matcher->force_recalc();
152 
153  cur_did = plist[i]->get_docid();
154  }
155 
156  if (did == 0 || cur_did < did) {
157  did = cur_did;
158  }
159  }
160 
161  if (n_kids == 1) {
162  n_kids = 0;
163  return plist[0];
164  }
165 
166  return NULL;
167 }
168 
169 void
171 {
172  plist[0]->get_docid_range(first, last);
173  for (size_t i = 1; i != n_kids; ++i) {
174  Xapian::docid f = 1, l = Xapian::docid(-1);
175  plist[i]->get_docid_range(f, l);
176  first = min(first, f);
177  last = max(last, l);
178  }
179 }
180 
181 string
183 {
184  string desc("(");
185  desc += plist[0]->get_description();
186  for (size_t i = 1; i < n_kids; ++i) {
187  desc += " MAX ";
188  desc += plist[i]->get_description();
189  }
190  desc += ')';
191  return desc;
192 }
193 
196 {
197  Xapian::termcount totwdf = 0;
198  for (size_t i = 0; i < n_kids; ++i) {
199  if (plist[i]->get_docid() == did)
200  totwdf += plist[i]->get_wdf();
201  }
202  return totwdf;
203 }
204 
207 {
208  return 1;
209 }
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
Definition: maxpostlist.cc:170
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: maxpostlist.cc:206
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Definition: maxpostlist.cc:70
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
Definition: maxpostlist.cc:48
bool at_end() const
Return true if the current position is past the last entry in this list.
Definition: maxpostlist.cc:64
Xapian::termcount get_wdf() const
get_wdf() for MaxPostlist returns the sum of the wdfs of the sub postlists which match the current do...
Definition: maxpostlist.cc:195
Xapian::docid get_docid() const
Return the current docid.
Definition: maxpostlist.cc:42
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Definition: maxpostlist.cc:130
std::string get_description() const
Return a string description of this object.
Definition: maxpostlist.cc:182
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::docid get_docid() const =0
Return the current docid.
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
Debug logging macros.
N-way OR postlist with wt=max(wt_i)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122