xapian-core  2.0.0
boolorpostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 2017,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "boolorpostlist.h"
24 
25 #include "heap.h"
26 #include "postlisttree.h"
27 
28 #include <algorithm>
29 #include <functional>
30 
31 using namespace std;
32 
34 {
35  if (plist) {
36  for (size_t i = 0; i < n_kids; ++i) {
37  delete plist[i].pl;
38  }
39  delete [] plist;
40  }
41 }
42 
45 {
46  Assert(did != 0);
47  return did;
48 }
49 
50 double
53  Xapian::termcount) const
54 {
55  return 0;
56 }
57 
58 double
60 {
61  return 0;
62 }
63 
64 PostList*
66 {
67  while (plist[0].did == did) {
68  PostList* res = plist[0].pl->next(0);
69  if (res) {
70  delete plist[0].pl;
71  plist[0].pl = res;
72  }
73 
74  if (plist[0].pl->at_end()) {
75  if (n_kids == 1) {
76  // We've reached the end of all posting lists - prune
77  // returning an at_end postlist.
78  n_kids = 0;
79  return plist[0].pl;
80  }
81  Heap::pop(plist, plist + n_kids, std::greater<PostListAndDocID>());
82  delete plist[--n_kids].pl;
83  continue;
84  }
85  plist[0].did = plist[0].pl->get_docid();
86  Heap::replace(plist, plist + n_kids, std::greater<PostListAndDocID>());
87  }
88 
89  if (n_kids == 1) {
90  n_kids = 0;
91  return plist[0].pl;
92  }
93 
94  did = plist[0].did;
95 
96  return NULL;
97 }
98 
99 PostList*
101 {
102  if (rare(did_min <= did)) return NULL;
103  did = Xapian::docid(-1);
104  size_t j = 0;
105  for (size_t i = 0; i < n_kids; ++i) {
106  if (plist[i].did < did_min) {
107  PostList * res = plist[i].pl->skip_to(did_min, 0);
108  if (res) {
109  delete plist[i].pl;
110  plist[j].pl = res;
111  } else {
112  plist[j].pl = plist[i].pl;
113  }
114 
115  if (plist[j].pl->at_end()) {
116  if (j == 0 && i == n_kids - 1) {
117  // We've reached the end of all posting lists - prune
118  // returning an at_end postlist.
119  n_kids = 0;
120  return plist[j].pl;
121  }
122  delete plist[j].pl;
123  continue;
124  }
125  plist[j].did = plist[j].pl->get_docid();
126  } else if (j != i) {
127  plist[j] = plist[i];
128  }
129 
130  did = min(did, plist[j].did);
131 
132  ++j;
133  }
134 
135  Assert(j != 0);
136  n_kids = j;
137  if (n_kids == 1) {
138  n_kids = 0;
139  return plist[0].pl;
140  }
141 
142  // Restore the heap invariant.
143  Heap::make(plist, plist + n_kids, std::greater<PostListAndDocID>());
144 
145  return NULL;
146 }
147 
148 bool
150 {
151  // We never need to return true here - if all but one child reaches
152  // at_end(), we prune to leave just that child. If all children reach
153  // at_end() together, we prune to leave one of them which will then
154  // indicate at_end() for us.
155  return false;
156 }
157 
158 void
160 {
161  plist[0].pl->get_docid_range(first, last);
162  for (size_t i = 1; i != n_kids; ++i) {
163  Xapian::docid f = 1, l = Xapian::docid(-1);
164  plist[i].pl->get_docid_range(f, l);
165  first = min(first, f);
166  last = max(last, l);
167  }
168 }
169 
170 std::string
172 {
173  string desc = "BoolOrPostList(";
174  desc += plist[0].pl->get_description();
175  for (size_t i = 1; i < n_kids; ++i) {
176  desc += ", ";
177  desc += plist[i].pl->get_description();
178  }
179  desc += ')';
180  return desc;
181 }
182 
185 {
186  return for_all_matches([](PostList* pl) {
187  return pl->get_wdf();
188  });
189 }
190 
193 {
194  return for_all_matches([](PostList* pl) {
195  return pl->count_matching_subqs();
196  });
197 }
198 
199 void
201 {
202  for_all_matches([&orposlist](PostList* pl) {
203  pl->gather_position_lists(orposlist);
204  return 0;
205  });
206 }
PostList class implementing unweighted Query::OP_OR.
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
std::string get_description() const
Return a string description of this object.
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
Xapian::docid get_docid() const
Return the current docid.
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Definition: postlist.cc:34
virtual Xapian::docid get_docid() const =0
Return the current docid.
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
virtual void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
Definition: postlist.cc:66
virtual Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: postlist.cc:59
#define rare(COND)
Definition: config.h:607
C++ STL heap implementation with extensions.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:213
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:230
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:259
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define Assert(COND)
Definition: omassert.h:122
Class for managing a tree of PostList objects.