xapian-core  2.0.0
multi_postlist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2011,2017,2018,2020 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, see
16  * <https://www.gnu.org/licenses/>.
17  */
18 
19 #include <config.h>
20 
21 #include "multi_postlist.h"
22 
23 #include <xapian/database.h>
24 
25 #include "backends/multi.h"
26 #include "backends/postlist.h"
27 #include "heap.h"
28 #include "omassert.h"
29 
30 #include <algorithm>
31 #include <functional>
32 
33 using namespace std;
34 
36 {
37  while (n_shards)
38  delete postlists[--n_shards];
39  delete [] postlists;
40  delete [] docids;
41 }
42 
45 {
46  return docids[0];
47 }
48 
51 {
52  return postlists[shard_number(docids[0], n_shards)]->get_wdf();
53 }
54 
55 double
58  Xapian::termcount) const
59 {
60  // MultiPostList is only used by PostingIterator which should never call
61  // this method.
62  Assert(false);
63  return 0;
64 }
65 
66 bool
68 {
69  return docids_size == 0;
70 }
71 
72 double
74 {
75  // MultiPostList is only used by PostingIterator which should never call
76  // this method.
77  Assert(false);
78  return 0;
79 }
80 
83 {
84  return postlists[shard_number(docids[0], n_shards)]->open_position_list();
85 }
86 
87 PostList*
88 MultiPostList::next(double w_min)
89 {
90  if (docids_size == 0) {
91  // Make a heap of the mapped docids so that the smallest is at the top
92  // of the heap.
93  Xapian::doccount j = 0;
94  for (Xapian::doccount i = 0; i != n_shards; ++i) {
95  PostList* pl = postlists[i];
96  if (!pl) continue;
97  pl->next(w_min);
98  if (pl->at_end()) {
99  delete pl;
100  postlists[i] = NULL;
101  } else {
102  docids[j++] = unshard(pl->get_docid(), i, n_shards);
103  }
104  }
105  docids_size = j;
106  Heap::make(docids, docids + docids_size,
107  std::greater<Xapian::docid>());
108  } else {
109  Xapian::docid old_did = docids[0];
110  Xapian::doccount shard = shard_number(old_did, n_shards);
111  PostList* pl = postlists[shard];
112  pl->next(w_min);
113  if (pl->at_end()) {
114  Heap::pop(docids, docids + docids_size,
115  std::greater<Xapian::docid>());
116  delete pl;
117  postlists[shard] = NULL;
118  --docids_size;
119  } else {
120  docids[0] = unshard(pl->get_docid(), shard, n_shards);
121  Heap::replace(docids, docids + docids_size,
122  std::greater<Xapian::docid>());
123  }
124  }
125 
126  return NULL;
127 }
128 
129 PostList*
131 {
132  Xapian::doccount j = 0;
133  if (docids_size == 0) {
134  // Make a heap of the mapped docids so that the smallest is at the top
135  // of the heap.
136  Xapian::docid shard_did = shard_docid(did, n_shards);
137  Xapian::doccount shard = shard_number(did, n_shards);
138  for (Xapian::doccount i = 0; i != n_shards; ++i) {
139  PostList* pl = postlists[i];
140  if (!pl) continue;
141  pl->skip_to(shard_did + (i < shard), w_min);
142  if (pl->at_end()) {
143  delete pl;
144  postlists[i] = NULL;
145  } else {
146  docids[j++] = unshard(pl->get_docid(), i, n_shards);
147  }
148  }
149  } else {
150  if (did <= docids[0])
151  return NULL;
152  // For a skip by < n_shards docids, pop/push may be more efficient than
153  // rebuilding the heap. For now, always just rebuild the heap unless
154  // we're just skipping the next docid, in which case do next() instead.
155  if (did == docids[0] + 1)
156  return MultiPostList::next(w_min);
157  Xapian::docid shard_did = shard_docid(did, n_shards);
158  Xapian::doccount shard = shard_number(did, n_shards);
159  for (Xapian::doccount i = 0; i != docids_size; ++i) {
160  Xapian::docid old_did = docids[i];
161  if (old_did < did) {
162  Xapian::doccount old_shard = shard_number(old_did, n_shards);
163  PostList* pl = postlists[old_shard];
164  pl->skip_to(shard_did + (old_shard < shard), w_min);
165  if (pl->at_end()) {
166  delete pl;
167  postlists[old_shard] = NULL;
168  } else {
169  docids[j++] = unshard(pl->get_docid(), old_shard, n_shards);
170  }
171  } else {
172  docids[j++] = old_did;
173  }
174  }
175  }
176  docids_size = j;
177  Heap::make(docids, docids + docids_size, std::greater<Xapian::docid>());
178 
179  return NULL;
180 }
181 
182 std::string
184 {
185  string desc = "MultiPostList(";
186  for (Xapian::doccount i = 0; i != n_shards; ++i) {
187  if (postlists[i]) {
188  desc += postlists[i]->get_description();
189  desc += ',';
190  } else {
191  desc += "NULL,";
192  }
193  }
194  desc.back() = ')';
195  return desc;
196 }
bool at_end() const
Return true if the current position is past the last entry in this list.
std::string get_description() const
Return a string description of this object.
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
~MultiPostList()
Destructor.
PositionList * open_position_list() const
Read the position list for the term in the current document and.
Xapian::docid get_docid() const
Return the current docid.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Abstract base class for postlists.
Definition: postlist.h:40
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::docid get_docid() const =0
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
An indexed database of documents.
C++ STL heap implementation with extensions.
Multi-database support functions.
Xapian::doccount shard_number(Xapian::docid did, Xapian::doccount n_shards)
Convert docid in the multi-db to shard number.
Definition: multi.h:49
Xapian::docid shard_docid(Xapian::docid did, Xapian::doccount n_shards)
Convert docid in the multi-db to the docid in the shard.
Definition: multi.h:35
Xapian::docid unshard(Xapian::docid shard_did, Xapian::doccount shard, Xapian::doccount n_shards)
Convert shard number and shard docid to docid in multi-db.
Definition: multi.h:64
Class for merging PostList objects from subdatabases.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:213
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:230
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:259
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
Abstract base class for postlists.