xapian-core  2.0.0
selectpostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 2017-2026 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "selectpostlist.h"
24 
25 #include "estimateop.h"
26 #include "omassert.h"
27 #include "postlisttree.h"
28 
29 bool
30 SelectPostList::vet(double w_min)
31 {
32  if (pl->at_end()) {
33  delete pl;
34  pl = NULL;
35  return true;
36  }
37 
38  // We assume that test_doc() is expensive compared to calculating the
39  // weight.
40  if (w_min <= 0.0) {
41  cached_weight = -HUGE_VAL;
42  } else {
43  Xapian::termcount doclen = 0;
44  Xapian::termcount unique_terms = 0;
45  Xapian::termcount wdfdocmax = 0;
46  pltree->get_doc_stats(pl->get_docid(), doclen, unique_terms, wdfdocmax);
47  cached_weight = pl->get_weight(doclen, unique_terms, wdfdocmax);
48  if (cached_weight < w_min)
49  return false;
50  }
51  return test_doc();
52 }
53 
55 {
56  if (estimate_op && (accepted || rejected)) {
57  // Only call report_ratio() if there are counts. During the building
58  // of the PostList tree we sometimes need to delete PostList objects
59  // and their associated EstimateOp and it's hard to arrange that they
60  // are always deleted in the correct order.
62  }
63 }
64 
65 double
67  Xapian::termcount unique_terms,
68  Xapian::termcount wdfdocmax) const
69 {
70  if (cached_weight >= 0)
71  return cached_weight;
72  return pl->get_weight(doclen, unique_terms, wdfdocmax);
73 }
74 
75 bool
77 {
78  return pl == NULL;
79 }
80 
81 PostList*
82 SelectPostList::next(double w_min)
83 {
84  do {
85  PostList* result = pl->next(w_min);
86  if (result) {
87  delete pl;
88  pl = result;
89  }
90  } while (!vet(w_min));
91  return NULL;
92 }
93 
94 PostList*
96 {
97  if (did > pl->get_docid()) {
98  PostList* result = pl->skip_to(did, w_min);
99  if (result) {
100  delete pl;
101  pl = result;
102  }
103  if (!vet(w_min)) {
104  // Advance to the next match.
105  return SelectPostList::next(w_min);
106  }
107  }
108  return NULL;
109 }
110 
111 PostList*
112 SelectPostList::check(Xapian::docid did, double w_min, bool& valid)
113 {
114  PostList* result = pl->check(did, w_min, valid);
115  if (result) {
116  delete pl;
117  pl = result;
118  }
119  if (valid) {
120  // For check() we can simply indicate !valid if the vetting fails.
121  valid = vet(w_min);
122  }
123  return NULL;
124 }
void report_ratio(Xapian::doccount accepted, Xapian::doccount rejected)
Definition: estimateop.h:147
void get_doc_stats(Xapian::docid shard_did, Xapian::termcount &doclen, Xapian::termcount &unique_terms, Xapian::termcount &wdfdocmax) const
Definition: postlisttree.h:186
PostList * check(Xapian::docid did, double w_min, bool &valid)
Check if the specified docid occurs in this postlist.
bool vet(double w_min)
Check if the current document is suitable.
Xapian::doccount accepted
Number of times test_doc() returned true.
PostListTree * pltree
Xapian::doccount rejected
Number of times test_doc() returned false.
virtual bool test_doc()=0
Check if the current document should be selected.
double cached_weight
Used to avoid calculating the weight twice for a given document.
EstimateOp * estimate_op
Object to report accepted/rejected counts to.
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
bool at_end() const
Return true if the current position is past the last entry in this list.
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::docid get_docid() const =0
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
virtual PostList * check(Xapian::docid did, double w_min, bool &valid)
Check if the specified docid occurs in this postlist.
Definition: postlist.cc:52
virtual double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const =0
Return the weight contribution for the current position.
Calculated bounds on and estimate of number of matches.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
Class for managing a tree of PostList objects.
Base class for classes which filter another PostList.