xapian-core  2.0.0
xorpostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2010,2011,2012,2016,2017 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "xorpostlist.h"
25 
26 #include "debuglog.h"
27 #include "omassert.h"
28 
29 #include <algorithm>
30 
31 using namespace std;
32 
34 {
35  if (plist) {
36  for (size_t i = 0; i < n_kids; ++i) {
37  delete plist[i];
38  }
39  delete [] plist;
40  }
41 }
42 
45 {
46  return did;
47 }
48 
49 double
51  Xapian::termcount unique_terms,
52  Xapian::termcount wdfdocmax) const
53 {
54  Assert(did);
55  double result = 0;
56  for (size_t i = 0; i < n_kids; ++i) {
57  if (plist[i]->get_docid() == did)
58  result += plist[i]->get_weight(doclen, unique_terms, wdfdocmax);
59  }
60  return result;
61 }
62 
63 bool
65 {
66  return (did == 0);
67 }
68 
69 double
71 {
72  LOGCALL(MATCH, double, "XorPostList::recalc_maxweight", NO_ARGS);
73  double max_total = plist[0]->recalc_maxweight();
74  double min_max = max_total;
75  for (size_t i = 1; i < n_kids; ++i) {
76  double new_max = plist[i]->recalc_maxweight();
77  if (new_max < min_max)
78  min_max = new_max;
79  max_total += new_max;
80  }
81  if ((n_kids & 1) == 0) {
82  // If n_kids is even, we omit the child with the smallest maxweight.
83  max_total -= min_max;
84  }
85  RETURN(max_total);
86 }
87 
88 PostList *
89 XorPostList::next(double w_min)
90 {
91  LOGCALL(MATCH, PostList*, "XorPostList::next", w_min);
92  Xapian::docid old_did = did;
93  did = 0;
94  size_t matching_count = 0;
95  for (size_t i = 0; i < n_kids; UNSIGNED_OVERFLOW_OK(++i)) {
96  if (old_did == 0 || plist[i]->get_docid() <= old_did) {
97  // FIXME: calculate the min weight required here...
98  PostList * res = plist[i]->next(0);
99  if (res) {
100  delete plist[i];
101  plist[i] = res;
102  matcher->force_recalc();
103  }
104 
105  if (plist[i]->at_end()) {
106  // erase_sublist(i) shuffles down i+1, etc down one index, so
107  // the next sublist to deal with is also at index i, unless
108  // this was the last index. We deal with this by decrementing
109  // i here and it'll be incremented by the loop, but this may
110  // underflow (which is OK because i is an unsigned type).
111  erase_sublist(UNSIGNED_OVERFLOW_OK(i--));
112  continue;
113  }
114  }
115 
116  Xapian::docid new_did = plist[i]->get_docid();
117  if (did == 0 || new_did < did) {
118  did = new_did;
119  matching_count = 1;
120  } else if (new_did == did) {
121  ++matching_count;
122  }
123  }
124 
125  if (n_kids == 1) {
126  n_kids = 0;
127  RETURN(plist[0]);
128  }
129 
130  // We've reached the end of all posting lists.
131  if (did == 0)
132  RETURN(NULL);
133 
134  // An odd number of sub-postlists match this docid, so the XOR matches.
135  if (matching_count & 1)
136  RETURN(NULL);
137 
138  // An even number of sub-postlists match this docid, so advance again.
139  RETURN(next(w_min));
140 }
141 
142 PostList *
143 XorPostList::skip_to(Xapian::docid did_min, double w_min)
144 {
145  LOGCALL(MATCH, PostList*, "XorPostList::skip_to", did_min | w_min);
146  Xapian::docid old_did = did;
147  did = 0;
148  size_t matching_count = 0;
149  for (size_t i = 0; i < n_kids; ++i) {
150  if (old_did == 0 || plist[i]->get_docid() < did_min) {
151  // FIXME: calculate the min weight required here...
152  PostList * res = plist[i]->skip_to(did_min, 0);
153  if (res) {
154  delete plist[i];
155  plist[i] = res;
156  matcher->force_recalc();
157  }
158 
159  if (plist[i]->at_end()) {
160  erase_sublist(i--);
161  continue;
162  }
163  }
164 
165  Xapian::docid new_did = plist[i]->get_docid();
166  if (did == 0 || new_did < did) {
167  did = new_did;
168  matching_count = 1;
169  } else if (new_did == did) {
170  ++matching_count;
171  }
172  }
173 
174  if (n_kids == 1) {
175  AssertEq(matching_count, 1);
176  n_kids = 0;
177  RETURN(plist[0]);
178  }
179 
180  // We've reached the end of all posting lists.
181  if (did == 0)
182  RETURN(NULL);
183 
184  // An odd number of sub-postlists match this docid, so the XOR matches.
185  if (matching_count & 1)
186  RETURN(NULL);
187 
188  // An even number of sub-postlists match this docid, so call next.
189  RETURN(next(w_min));
190 }
191 
192 void
194 {
195  plist[0]->get_docid_range(first, last);
196  for (size_t i = 1; i != n_kids; ++i) {
197  Xapian::docid f = 1, l = Xapian::docid(-1);
198  plist[i]->get_docid_range(f, l);
199  first = min(first, f);
200  last = max(last, l);
201  }
202 }
203 
204 string
206 {
207  string desc("(");
208  desc += plist[0]->get_description();
209  for (size_t i = 1; i < n_kids; ++i) {
210  desc += " XOR ";
211  desc += plist[i]->get_description();
212  }
213  desc += ')';
214  return desc;
215 }
216 
219 {
220  Xapian::termcount totwdf = 0;
221  for (size_t i = 0; i < n_kids; ++i) {
222  if (plist[i]->get_docid() == did)
223  totwdf += plist[i]->get_wdf();
224  }
225  return totwdf;
226 }
227 
230 {
231  Xapian::termcount total = 0;
232  for (size_t i = 0; i < n_kids; ++i) {
233  if (plist[i]->get_docid() == did)
234  total += plist[i]->count_matching_subqs();
235  }
236  return total;
237 }
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
Definition: xorpostlist.cc:193
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
Definition: xorpostlist.cc:50
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: xorpostlist.cc:229
std::string get_description() const
Return a string description of this object.
Definition: xorpostlist.cc:205
Xapian::docid get_docid() const
Return the current docid.
Definition: xorpostlist.cc:44
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Definition: xorpostlist.cc:70
bool at_end() const
Return true if the current position is past the last entry in this list.
Definition: xorpostlist.cc:64
Xapian::termcount get_wdf() const
Get the within-document frequency.
Definition: xorpostlist.cc:218
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Definition: xorpostlist.cc:143
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
N-way XOR postlist.