xapian-core  2.0.0
postlisttree.h
Go to the documentation of this file.
1 
4 /* Copyright 2017,2019 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_POSTLISTTREE_H
22 #define XAPIAN_INCLUDED_POSTLISTTREE_H
23 
24 #include "backends/multi.h"
25 #include "backends/postlist.h"
26 #include "valuestreamdocument.h"
27 
28 class PostListTree {
29  PostList* pl = NULL;
30 
31  bool use_cached_max_weight = false;
32 
34 
36 
38 
39  double max_weight;
40 
43 
50  PostList** shard_pls = NULL;
51 
54 
61 
63 
65 
66  public:
68  Xapian::Database& db_,
69  const Xapian::Weight& wtscheme)
70  : need_doclength(wtscheme.get_sumpart_needs_doclength_()),
71  need_unique_terms(wtscheme.get_sumpart_needs_uniqueterms_()),
72  need_wdfdocmax(wtscheme.get_sumpart_needs_wdfdocmax_()),
73  vsdoc(vsdoc_),
74  db(db_) {}
75 
78  for (Xapian::doccount i = 0; i != n_shards; ++i)
79  delete shard_pls[i];
80  n_shards = 0;
81  shard_pls = nullptr;
82  }
83 
86  }
87 
93 
94  void set_postlists(PostList** pls, Xapian::doccount n_shards_) {
95  shard_pls = pls;
96  n_shards = n_shards_;
97  while (shard_pls[current_shard] == NULL) {
98  ++current_shard;
100  }
102  shard_db = db.internal.get();
103  if (n_shards > 1) {
104  auto multidb = static_cast<const MultiDatabase*>(shard_db);
105  shard_db = multidb->shards[current_shard];
106  }
107  if (current_shard > 0)
109  }
110 
111  double recalc_maxweight() {
112  if (!use_cached_max_weight) {
113  use_cached_max_weight = true;
114  double w = 0.0;
115  // Start at the current shard.
116  for (Xapian::doccount i = current_shard; i != n_shards; ++i) {
117  if (shard_pls[i])
118  w = std::max(w, shard_pls[i]->recalc_maxweight());
119  }
120  max_weight = w;
121  }
122  return max_weight;
123  }
124 
125  void force_recalc() {
126  use_cached_max_weight = false;
127  }
128 
131  }
132 
134  return shard_db->get_doclength(shard_did);
135  }
136 
137  double get_weight() const {
138  Xapian::termcount doclen = 0, unique_terms = 0, wdfdocmax = 0;
139  get_doc_stats(pl->get_docid(), doclen, unique_terms, wdfdocmax);
140  return pl->get_weight(doclen, unique_terms, wdfdocmax);
141  }
142 
144  bool next(double w_min) {
145  if (w_min > 0.0 && recalc_maxweight() < w_min) {
146  // We can't now achieve w_min so we're done.
147  return false;
148  }
149 
150  while (true) {
151  PostList* result = pl->next(w_min);
152  if (rare(result)) {
153  delete pl;
154  shard_pls[current_shard] = pl = result;
155  if (usual(!pl->at_end())) {
156  if (w_min > 0.0) {
157  use_cached_max_weight = false;
158  if (recalc_maxweight() < w_min) {
159  // We can't now achieve w_min so we're done.
160  return false;
161  }
162  }
163  return true;
164  }
165  } else {
166  if (usual(!pl->at_end())) {
167  return true;
168  }
169  }
170 
171  do {
172  if (++current_shard == n_shards)
173  return false;
174  } while (shard_pls[current_shard] == NULL);
176  shard_db = db.internal.get();
177  if (n_shards > 1) {
178  auto multidb = static_cast<const MultiDatabase*>(shard_db);
179  shard_db = multidb->shards[current_shard];
180  }
182  use_cached_max_weight = false;
183  }
184  }
185 
186  void get_doc_stats(Xapian::docid shard_did,
187  Xapian::termcount& doclen,
188  Xapian::termcount& unique_terms,
189  Xapian::termcount& wdfdocmax) const {
190  // Fetching the document length and number of unique terms is work we
191  // can avoid if the weighting scheme doesn't use them.
193  if (need_doclength)
194  doclen = shard_db->get_doclength(shard_did);
195  if (need_unique_terms)
196  unique_terms = shard_db->get_unique_terms(shard_did);
197  if (need_wdfdocmax)
198  wdfdocmax = shard_db->get_wdfdocmax(shard_did);
199  }
200  }
201 
203  return pl->count_matching_subqs();
204  }
205 
206  std::string get_description() const {
207  std::string desc = "PostListTree(";
208  for (Xapian::doccount i = 0; i != n_shards; ++i) {
209  if (i == current_shard)
210  desc += '*';
211  if (shard_pls[i]) {
212  desc += shard_pls[i]->get_description();
213  desc += ',';
214  } else {
215  desc += "NULL,";
216  }
217  }
218  desc.back() = ')';
219  return desc;
220  }
221 };
222 
223 #endif // XAPIAN_INCLUDED_POSTLISTTREE_H
Sharded database backend.
Xapian::Database::Internal * shard_db
Definition: postlisttree.h:64
Xapian::doccount n_shards
The number of shards.
Definition: postlisttree.h:53
Xapian::Database & db
Definition: postlisttree.h:62
bool need_wdfdocmax
Definition: postlisttree.h:37
PostList * pl
Definition: postlisttree.h:29
bool need_doclength
Definition: postlisttree.h:33
PostList ** shard_pls
The postlists for the shards.
Definition: postlisttree.h:50
void set_postlists(PostList **pls, Xapian::doccount n_shards_)
Definition: postlisttree.h:94
bool need_unique_terms
Definition: postlisttree.h:35
Xapian::termcount get_doclength(Xapian::docid shard_did) const
Definition: postlisttree.h:133
void force_recalc()
Definition: postlisttree.h:125
Xapian::docid get_docid() const
Definition: postlisttree.h:129
bool * get_max_weight_cached_flag_ptr()
Return pointer to flag to set to false to invalidate cached max weight.
Definition: postlisttree.h:92
ValueStreamDocument & vsdoc
Document proxy used for valuestream caching.
Definition: postlisttree.h:60
double recalc_maxweight()
Definition: postlisttree.h:111
double max_weight
Definition: postlisttree.h:39
Xapian::termcount count_matching_subqs() const
Definition: postlisttree.h:202
void get_doc_stats(Xapian::docid shard_did, Xapian::termcount &doclen, Xapian::termcount &unique_terms, Xapian::termcount &wdfdocmax) const
Definition: postlisttree.h:186
std::string get_description() const
Definition: postlisttree.h:206
bool next(double w_min)
Return false if we're done.
Definition: postlisttree.h:144
double get_weight() const
Definition: postlisttree.h:137
Xapian::doccount current_shard
The current shard.
Definition: postlisttree.h:42
PostListTree(ValueStreamDocument &vsdoc_, Xapian::Database &db_, const Xapian::Weight &wtscheme)
Definition: postlisttree.h:67
void delete_postlists()
Delete all the PostList objects.
Definition: postlisttree.h:77
bool use_cached_max_weight
Definition: postlisttree.h:31
A document which gets its values from a ValueStreamManager.
void new_shard(Xapian::doccount n)
Virtual base class for Database internals.
virtual termcount get_wdfdocmax(docid did) const =0
Get the max wdf in document.
virtual termcount get_doclength(docid did) const =0
virtual termcount get_unique_terms(docid did) const =0
Get the number of unique terms in document.
An indexed database of documents.
Definition: database.h:75
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
Definition: database.h:95
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::docid get_docid() const =0
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
virtual std::string get_description() const =0
Return a string description of this object.
virtual double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const =0
Return the weight contribution for the current position.
virtual Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: postlist.cc:59
Abstract base class for weighting schemes.
Definition: weight.h:38
#define usual(COND)
Definition: config.h:608
#define rare(COND)
Definition: config.h:607
Multi-database support functions.
Xapian::docid unshard(Xapian::docid shard_did, Xapian::doccount shard, Xapian::doccount n_shards)
Convert shard number and shard docid to docid in multi-db.
Definition: multi.h:64
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define Assert(COND)
Definition: omassert.h:122
Abstract base class for postlists.
A document which gets its values from a ValueStreamManager.