xapian-core  2.0.0
boolorpostlist.h
Go to the documentation of this file.
1 
4 /* Copyright 2017,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_BOOLORPOSTLIST_H
22 #define XAPIAN_INCLUDED_BOOLORPOSTLIST_H
23 
24 #include "backends/postlist.h"
25 
27 class BoolOrPostList : public PostList {
29  void operator=(const BoolOrPostList&) = delete;
30 
32  BoolOrPostList(const BoolOrPostList&) = delete;
33 
36 
38  size_t n_kids;
39 
42 
44 
45  PostListAndDocID() : pl(nullptr) { }
46 
47  PostListAndDocID(PostList* pl_) : pl(pl_) { }
48 
49  bool operator>(const PostListAndDocID& o) const {
50  return did > o.did;
51  }
52  };
53 
55  PostListAndDocID* plist = nullptr;
56 
92  template<typename F>
94  for_all_matches(F func) const
95  {
96  size_t i = 0;
97  Xapian::termcount result = 0;
98  AssertEq(plist[0].did, did);
99  while (true) {
100  result += func(plist[i].pl);
101  // Children of i are (2 * i + 1) and (2 * i + 2).
102  size_t j = 2 * i + 2;
103  if (j < n_kids && plist[j].did == did) {
104  // Down right.
105  i = j;
106  continue;
107  }
108  --j;
109  if (j < n_kids && plist[j].did == did) {
110  // Down left.
111  i = j;
112  continue;
113  }
114  try_left:
115  if (i < 2) break;
116  if ((i & 1) == 0 && plist[i - 1].did == did) {
117  // Left.
118  --i;
119  continue;
120  }
121  // Up.
122  //
123  // We can ascend back up to our parent, plus any sequence of
124  // contiguous left branches up from our parent, with a neat
125  // bit-twiddling trick if we have __builtin_ffs() available.
126 #if HAVE_DECL___BUILTIN_FFS
127  ++i;
128  i >>= __builtin_ffs(i & ~1) - 1;
129  --i;
130 #else
131  // Fall-back to just ascending one level at a time, which is
132  // simple and probably similarly efficient to a portable
133  // fallback ffs() implementation which doesn't make use of
134  // specialised machine code instructions, since the distance
135  // we can ascend will be usually be short.
136  i = (i - 1) / 2;
137 #endif
138  goto try_left;
139  }
140  return result;
141  }
142 
143  public:
147  template<class RandomItor>
148  BoolOrPostList(RandomItor pl_begin, RandomItor pl_end,
149  Xapian::doccount db_size)
150  : n_kids(pl_end - pl_begin)
151  {
153  // This initialises all entries to have did 0, so all entries are
154  // equal, which is a valid heap.
155  std::copy(pl_begin, pl_end, plist);
156 
157  // We shortcut an empty shard and avoid creating a postlist tree for it.
158  Assert(db_size);
159  Assert(n_kids != 0);
160  // We calculate the estimate assuming independence. The simplest
161  // way to calculate this seems to be a series of (n_kids - 1) pairwise
162  // calculations, which gives the same answer regardless of the order.
163  double scale = 1.0 / db_size;
164  double P_est = plist[0].pl->get_termfreq() * scale;
165  for (size_t i = 1; i < n_kids; ++i) {
166  double P_i = plist[i].pl->get_termfreq() * scale;
167  P_est += P_i - P_est * P_i;
168  }
169  termfreq = static_cast<Xapian::doccount>(P_est * db_size + 0.5);
170  }
171 
172  ~BoolOrPostList();
173 
174  Xapian::docid get_docid() const;
175 
176  double get_weight(Xapian::termcount doclen,
177  Xapian::termcount unique_terms,
178  Xapian::termcount wdfdocmax) const;
179 
180  bool at_end() const;
181 
182  double recalc_maxweight();
183 
184  PostList* next(double w_min);
185 
186  PostList* skip_to(Xapian::docid did, double w_min);
187 
188  void get_docid_range(Xapian::docid& first, Xapian::docid& last) const;
189 
190  std::string get_description() const;
191 
192  Xapian::termcount get_wdf() const;
193 
195 
196  void gather_position_lists(OrPositionList* orposlist);
197 };
198 
199 #endif // XAPIAN_INCLUDED_BOOLORPOSTLIST_H
PostList class implementing unweighted Query::OP_OR.
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
BoolOrPostList(RandomItor pl_begin, RandomItor pl_end, Xapian::doccount db_size)
Construct from 2 random-access iterators to a container of PostList*, a pointer to the matcher,...
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
size_t n_kids
The number of sub-postlists.
bool at_end() const
Return true if the current position is past the last entry in this list.
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
void operator=(const BoolOrPostList &)=delete
Don't allow assignment.
Xapian::termcount for_all_matches(F func) const
Helper to apply operation to all postlists matching current docid.
PostListAndDocID * plist
Array of pointers to sub-postlists.
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Xapian::docid did
The current docid, or zero if we haven't started or are at_end.
std::string get_description() const
Return a string description of this object.
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
BoolOrPostList(const BoolOrPostList &)=delete
Don't allow copying.
Xapian::docid get_docid() const
Return the current docid.
Abstract base class for postlists.
Definition: postlist.h:40
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
Definition: postlist.h:67
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
Definition: postlist.h:52
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
Abstract base class for postlists.
bool operator>(const PostListAndDocID &o) const