xapian-core  2.0.0
multi_alltermslist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2011,2017,2018,2020,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "multi_alltermslist.h"
24 
25 #include <xapian/database.h>
26 
28 #include "heap.h"
29 #include "omassert.h"
30 
31 using namespace std;
32 
36  bool operator()(const TermList *a, const TermList *b) const {
37  return a->get_termname() > b->get_termname();
38  }
39 };
40 
41 MultiAllTermsList::MultiAllTermsList(size_t count_, TermList** termlists_)
42  : count(count_), termlists(termlists_)
43 {
44 }
45 
47 {
48  while (count)
49  delete termlists[--count];
50  delete [] termlists;
51 }
52 
55 {
56  // This should never get called.
57  Assert(false);
58  return 0;
59 }
60 
63 {
64  if (current_termfreq == 0 && count != 0) {
65  while (true) {
66  TermList * tl = termlists[0];
67  if (tl->get_termname() != current_term)
68  break;
70  if (tl->next()) {
71  // Out of entries.
74  delete tl;
75  if (--count == 0)
76  break;
77  } else {
80  }
81  }
82  }
83  return current_termfreq;
84 }
85 
86 TermList *
88 {
89  if (current_term.empty()) {
90  // Make termlists into a heap so that the one (or one of the ones) with
91  // earliest sorting term is at the top of the heap.
92  size_t j = 0;
93  for (size_t i = 0; i != count; ++i) {
94  if (termlists[i]->next() == NULL) {
95  if (i != j)
96  swap(termlists[i], termlists[j]);
97  ++j;
98  }
99  }
100  while (count > j)
101  delete termlists[--count];
104  } else if (current_termfreq == 0 && count != 0) {
105  // Skip over current_term if we haven't already.
106  while (true) {
107  TermList* tl = termlists[0];
108  if (tl->get_termname() != current_term)
109  break;
110  if (tl->next()) {
111  // Out of entries.
114  delete tl;
115  if (--count == 0)
116  break;
117  } else {
120  }
121  }
122  }
123 
124  current_termfreq = 0;
125 
126  if (count <= 1) {
127  if (count == 0) {
128  return this;
129  }
130  count = 0;
131  // Prune.
132  return termlists[0];
133  }
134 
136  return NULL;
137 }
138 
139 TermList*
141 {
142  // Assume the skip is likely to be a long distance, and rebuild the heap
143  // from scratch. FIXME: It would be useful to profile this against an
144  // approach more like that next() uses if this ever gets heavy use.
145  size_t j = 0;
146  for (size_t i = 0; i != count; ++i) {
147  if (termlists[i]->skip_to(term) == NULL) {
148  if (i != j)
149  swap(termlists[i], termlists[j]);
150  ++j;
151  }
152  }
153  while (count > j)
154  delete termlists[--count];
155 
156  current_termfreq = 0;
157 
158  if (count <= 1) {
159  if (count == 0) {
160  return this;
161  }
162  count = 0;
163  // Prune.
164  return termlists[0];
165  }
166 
168 
170  return NULL;
171 }
size_t count
Number of TermList* entries in termlists.
MultiAllTermsList(const MultiAllTermsList &)
Don't allow copying.
Xapian::doccount current_termfreq
Current termfreq (or 0 if not yet calculated).
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
TermList * next()
Advance the current position to the next term in the termlist.
~MultiAllTermsList()
Destructor.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
TermList ** termlists
Sub-termlists which we use as a heap.
TermList * skip_to(std::string_view term)
Skip forward to the specified term.
Abstract base class for termlists.
Definition: termlist.h:42
std::string current_term
The current term.
Definition: termlist.h:54
virtual Xapian::doccount get_termfreq() const =0
Return the term frequency for the term at the current position.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
const std::string & get_termname() const
Return the termname at the current position.
Definition: termlist.h:69
An indexed database of documents.
string term
Virtual base class for Database internals.
C++ STL heap implementation with extensions.
Class for merging AllTermsList objects from subdatabases.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:213
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:230
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:259
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
Comparison functor which orders TermList* by ascending term name.
bool operator()(const TermList *a, const TermList *b) const
Order by ascending term name.