xapian-core  2.0.0
andpostlist.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2011,2017,2021 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_ANDPOSTLIST_H
23 #define XAPIAN_INCLUDED_ANDPOSTLIST_H
24 
25 #include "backends/postlist.h"
26 #include "omassert.h"
27 #include "postlisttree.h"
28 
29 #include <algorithm>
30 
32 class AndPostList : public PostList {
37  bool operator()(const PostList *a, const PostList *b) const {
38  return a->get_termfreq() < b->get_termfreq();
39  }
40  };
41 
43  AndPostList& operator=(const AndPostList&) = delete;
44 
46  AndPostList(const AndPostList&) = delete;
47 
50 
52  size_t n_kids;
53 
55  PostList** plist = nullptr;
56 
58  double* max_wt = nullptr;
59 
61  double max_total = 0.0;
62 
65 
67  double new_min(double w_min, size_t n) {
68  return w_min - (max_total - max_wt[n]);
69  }
70 
72  void next_helper(size_t n, double w_min) {
73  PostList * res = plist[n]->next(new_min(w_min, n));
74  if (res) {
75  delete plist[n];
76  plist[n] = res;
77  if (max_wt[n] > 0)
79  }
80  }
81 
83  void skip_to_helper(size_t n, Xapian::docid did_min, double w_min) {
84  PostList * res = plist[n]->skip_to(did_min, new_min(w_min, n));
85  if (res) {
86  delete plist[n];
87  plist[n] = res;
88  if (max_wt[n] > 0)
90  }
91  }
92 
94  void check_helper(size_t n, Xapian::docid did_min, double w_min,
95  bool &valid) {
96  PostList * res = plist[n]->check(did_min, new_min(w_min, n), valid);
97  if (res) {
98  delete plist[n];
99  plist[n] = res;
100  if (max_wt[n] > 0)
102  }
103  }
104 
110 
112  PostList * find_next_match(double w_min);
113 
114  public:
118  template<class RandomItor>
119  AndPostList(RandomItor pl_begin, RandomItor pl_end, PostListTree* matcher_)
120  : n_kids(pl_end - pl_begin), matcher(matcher_)
121  {
123 
124  // Copy the postlists in ascending termfreq order, since it will
125  // be more efficient to look at the shorter lists first, and skip
126  // the longer lists based on those.
127  std::partial_sort_copy(pl_begin, pl_end, plist, plist + n_kids,
129 
130  Xapian::docid first = 1, last = Xapian::docid(-1);
131  AndPostList::get_docid_range(first, last);
132  if (rare(last < first)) {
133  // This probably always gets handled at a higher level.
134  termfreq = 0;
135  return;
136  }
137 
138  // We calculate the estimate assuming independence.
139  double r = (last - first + 1);
140  for (size_t i = 0; i < n_kids; ++i) {
141  auto est = plist[i]->get_termfreq();
142  first = 1;
143  last = Xapian::docid(-1);
144  plist[i]->get_docid_range(first, last);
145  // If this wasn't true then AndPostList::get_docid_range() should
146  // have returned last < first which is handled above.
147  Assert(last >= first);
148  r *= double(est) / (last - first + 1);
149  }
150  termfreq = static_cast<Xapian::doccount>(r + 0.5);
151  }
152 
155  double lmax, double rmax,
156  PostListTree* matcher_, Xapian::doccount termfreq_)
157  : n_kids(2), max_total(lmax + rmax), matcher(matcher_)
158  {
159  // Just copy the estimate from the PostList which decayed.
160  termfreq = termfreq_;
161 
162  // Even if we're the decay product of an OrPostList, we may want to
163  // swap here, as the subqueries may also have decayed and so their
164  // estimated termfreqs may have changed.
165  if (l->get_termfreq() < r->get_termfreq()) {
166  std::swap(l, r);
167  std::swap(lmax, rmax);
168  }
170  // Put the least frequent postlist first.
171  plist[0] = r;
172  plist[1] = l;
173  max_wt[0] = rmax;
174  max_wt[1] = lmax;
175  }
176 
177  ~AndPostList();
178 
179  Xapian::docid get_docid() const;
180 
181  double get_weight(Xapian::termcount doclen,
182  Xapian::termcount unique_terms,
183  Xapian::termcount wdfdocmax) const;
184 
185  bool at_end() const;
186 
187  double recalc_maxweight();
188 
189  PostList* next(double w_min);
190 
191  PostList* skip_to(Xapian::docid, double w_min);
192 
193  void get_docid_range(Xapian::docid& first, Xapian::docid& last) const;
194 
195  std::string get_description() const;
196 
205  Xapian::termcount get_wdf() const;
206 
208 
209  void gather_position_lists(OrPositionList* orposlist);
210 };
211 
212 #endif // XAPIAN_INCLUDED_ANDPOSTLIST_H
N-way AND postlist.
Definition: andpostlist.h:32
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: andpostlist.cc:172
double * max_wt
Array of maximum weights for the sub-postlists.
Definition: andpostlist.h:58
void skip_to_helper(size_t n, Xapian::docid did_min, double w_min)
Call skip_to on a sub-postlist n, and handle any pruning.
Definition: andpostlist.h:83
PostList ** plist
Array of pointers to sub-postlists.
Definition: andpostlist.h:55
size_t n_kids
The number of sub-postlists.
Definition: andpostlist.h:52
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
Definition: andpostlist.cc:62
void check_helper(size_t n, Xapian::docid did_min, double w_min, bool &valid)
Call check on a sub-postlist n, and handle any pruning.
Definition: andpostlist.h:94
PostList * find_next_match(double w_min)
Advance the sublists to the next match.
Definition: andpostlist.cc:93
AndPostList(const AndPostList &)=delete
Don't allow copying.
Xapian::docid get_docid() const
Return the current docid.
Definition: andpostlist.cc:56
AndPostList(RandomItor pl_begin, RandomItor pl_end, PostListTree *matcher_)
Construct from 2 random-access iterators to a container of PostList* and a pointer to the matcher.
Definition: andpostlist.h:119
Xapian::docid did
The current docid, or zero if we haven't started or are at_end.
Definition: andpostlist.h:49
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Definition: andpostlist.cc:129
double max_total
Total maximum weight (== sum of max_wt values).
Definition: andpostlist.h:61
AndPostList(PostList *l, PostList *r, double lmax, double rmax, PostListTree *matcher_, Xapian::doccount termfreq_)
Construct as the decay product of an OrPostList or AndMaybePostList.
Definition: andpostlist.h:154
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
Definition: andpostlist.cc:136
void next_helper(size_t n, double w_min)
Call next on a sub-postlist n, and handle any pruning.
Definition: andpostlist.h:72
void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
Definition: andpostlist.cc:182
PostListTree * matcher
Pointer to the matcher object, so we can report pruning.
Definition: andpostlist.h:64
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Definition: andpostlist.cc:81
Xapian::termcount get_wdf() const
Get the within-document frequency.
Definition: andpostlist.cc:162
AndPostList & operator=(const AndPostList &)=delete
Don't allow assignment.
bool at_end() const
Return true if the current position is past the last entry in this list.
Definition: andpostlist.cc:75
void allocate_plist_and_max_wt()
Allocate plist and max_wt arrays of n_kids each.
Definition: andpostlist.cc:32
double new_min(double w_min, size_t n)
Calculate the new minimum weight for sub-postlist n.
Definition: andpostlist.h:67
std::string get_description() const
Return a string description of this object.
Definition: andpostlist.cc:149
void force_recalc()
Definition: postlisttree.h:125
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
Definition: postlist.h:67
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
virtual PostList * check(Xapian::docid did, double w_min, bool &valid)
Check if the specified docid occurs in this postlist.
Definition: postlist.cc:52
virtual void get_docid_range(docid &first, docid &last) const
Get the bounds on the range of docids this PostList can return.
Definition: postlist.cc:72
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
Definition: postlist.h:52
#define rare(COND)
Definition: config.h:607
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
Abstract base class for postlists.
Class for managing a tree of PostList objects.
Comparison functor which orders PostList* by ascending get_termfreq().
Definition: andpostlist.h:35
bool operator()(const PostList *a, const PostList *b) const
Order by ascending get_termfreq().
Definition: andpostlist.h:37