xapian-core  1.4.27
multiandpostlist.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2011,2017,2021 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
23 #define XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
24 
25 #include "multimatch.h"
26 #include "omassert.h"
27 #include "api/postlist.h"
28 
29 #include <algorithm>
30 
32 class MultiAndPostList : public PostList {
37  bool operator()(const PostList *a, const PostList *b) const {
38  return a->get_termfreq_est() < b->get_termfreq_est();
39  }
40  };
41 
43  void operator=(const MultiAndPostList &);
44 
47 
50 
52  size_t n_kids;
53 
56 
58  double * max_wt;
59 
61  double max_total;
62 
65 
68 
70  double new_min(double w_min, size_t n) {
71  return w_min - (max_total - max_wt[n]);
72  }
73 
75  void next_helper(size_t n, double w_min) {
76  PostList * res = plist[n]->next(new_min(w_min, n));
77  if (res) {
78  delete plist[n];
79  plist[n] = res;
80  if (max_wt[n] > 0)
81  matcher->recalc_maxweight();
82  }
83  }
84 
86  void skip_to_helper(size_t n, Xapian::docid did_min, double w_min) {
87  PostList * res = plist[n]->skip_to(did_min, new_min(w_min, n));
88  if (res) {
89  delete plist[n];
90  plist[n] = res;
91  if (max_wt[n] > 0)
92  matcher->recalc_maxweight();
93  }
94  }
95 
97  void check_helper(size_t n, Xapian::docid did_min, double w_min,
98  bool &valid) {
99  PostList * res = plist[n]->check(did_min, new_min(w_min, n), valid);
100  if (res) {
101  delete plist[n];
102  plist[n] = res;
103  if (max_wt[n] > 0)
104  matcher->recalc_maxweight();
105  }
106  }
107 
113 
115  PostList * find_next_match(double w_min);
116 
117  public:
121  template<class RandomItor>
122  MultiAndPostList(RandomItor pl_begin, RandomItor pl_end,
123  MultiMatch * matcher_, Xapian::doccount db_size_)
124  : did(0), n_kids(pl_end - pl_begin), plist(NULL), max_wt(NULL),
125  max_total(0), db_size(db_size_), matcher(matcher_)
126  {
128 
129  // Copy the postlists in ascending termfreq order, since it will
130  // be more efficient to look at the shorter lists first, and skip
131  // the longer lists based on those.
132  std::partial_sort_copy(pl_begin, pl_end, plist, plist + n_kids,
134  }
135 
138  double lmax, double rmax,
139  MultiMatch * matcher_, Xapian::doccount db_size_)
140  : did(0), n_kids(2), plist(NULL), max_wt(NULL),
141  max_total(lmax + rmax), db_size(db_size_), matcher(matcher_)
142  {
143  // Even if we're the decay product of an OrPostList, we may want to
144  // swap here, as the subqueries may also have decayed and so their
145  // estimated termfreqs may have changed.
146  if (l->get_termfreq_est() < r->get_termfreq_est()) {
147  std::swap(l, r);
148  std::swap(lmax, rmax);
149  }
151  // Put the least frequent postlist first.
152  plist[0] = r;
153  plist[1] = l;
154  max_wt[0] = rmax;
155  max_wt[1] = lmax;
156  }
157 
159 
161 
163 
165 
167  const Xapian::Weight::Internal & stats) const;
168 
169  double get_maxweight() const;
170 
171  Xapian::docid get_docid() const;
172 
174 
176 
177  double get_weight() const;
178 
179  bool at_end() const;
180 
181  double recalc_maxweight();
182 
183  PostList* next(double w_min);
184 
185  PostList* skip_to(Xapian::docid, double w_min);
186 
187  std::string get_description() const;
188 
196  Xapian::termcount get_wdf() const;
197 
199 
200  void gather_position_lists(OrPositionList* orposlist);
201 };
202 
203 #endif // XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
size_t n_kids
The number of sub-postlists.
double * max_wt
Array of maximum weights for the sub-postlists.
void allocate_plist_and_max_wt()
Allocate plist and max_wt arrays of n_kids each.
Abstract base class for postlists.
Definition: postlist.h:37
double max_total
Total maximum weight (== sum of max_wt values).
Comparison functor which orders PostList* by ascending get_termfreq_est().
class for performing a match
void gather_position_lists(OrPositionList *orposlist)
Gather PositionList* objects for a subtree.
Xapian::doccount get_termfreq_min() const
Get a lower bound on the number of documents indexed by this term.
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
void next_helper(size_t n, double w_min)
Call next on a sub-postlist n, and handle any pruning.
Xapian::termcount get_doclength() const
Return the length of current document.
MultiAndPostList(RandomItor pl_begin, RandomItor pl_end, MultiMatch *matcher_, Xapian::doccount db_size_)
Construct from 2 random-access iterators to a container of PostList*, a pointer to the matcher...
void skip_to_helper(size_t n, Xapian::docid did_min, double w_min)
Call skip_to on a sub-postlist n, and handle any pruning.
std::string get_description() const
Return a string description of this object.
virtual Internal * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
void operator=(const MultiAndPostList &)
Don&#39;t allow assignment.
Xapian::docid get_docid() const
Return the current docid.
Abstract base class for postlists.
Xapian::doccount db_size
The number of documents in the database.
TermFreqs get_termfreq_est_using_stats(const Xapian::Weight::Internal &stats) const
Get an estimate for the termfreq and reltermfreq, given the stats.
PostList * find_next_match(double w_min)
Advance the sublists to the next match.
virtual Internal * check(Xapian::docid did, double w_min, bool &valid)
Check if the specified docid occurs in this postlist.
double get_maxweight() const
Return an upper bound on what get_weight() can return.
Xapian::termcount get_wdf() const
get_wdf() for MultiAndPostlists returns the sum of the wdfs of the sub postlists. ...
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::docid did
The current docid, or zero if we haven&#39;t started or are at_end.
MultiAndPostList(PostList *l, PostList *r, double lmax, double rmax, MultiMatch *matcher_, Xapian::doccount db_size_)
Construct as the decay product of an OrPostList or AndMaybePostList.
virtual Xapian::doccount get_termfreq_est() const =0
Get an estimate of the number of documents indexed by this term.
PostList ** plist
Array of pointers to sub-postlists.
void check_helper(size_t n, Xapian::docid did_min, double w_min, bool &valid)
Call check on a sub-postlist n, and handle any pruning.
Class to hold statistics for a given collection.
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
void recalc_maxweight()
Called by postlists to indicate that they&#39;ve rearranged themselves and the maxweight now possible is ...
Definition: multimatch.h:136
double new_min(double w_min, size_t n)
Calculate the new minimum weight for sub-postlist n.
The frequencies for a term.
virtual Internal * next(double w_min)=0
Advance the current position to the next document in the postlist.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::doccount get_termfreq_max() const
Get an upper bound on the number of documents indexed by this term.
N-way AND postlist.
MultiMatch * matcher
Pointer to the matcher object, so we can report pruning.
bool operator()(const PostList *a, const PostList *b) const
Order by ascending get_termfreq_est().
Various assertion macros.
double get_weight() const
Return the weight contribution for the current position.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Xapian::doccount get_termfreq_est() const
Get an estimate of the number of documents indexed by this term.
MultiAndPostList(const MultiAndPostList &)
Don&#39;t allow copying.