xapian-core  1.4.27
multi_alltermslist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2011 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include "multi_alltermslist.h"
24 
25 #include <xapian/error.h>
26 
27 #include "omassert.h"
28 
29 #include <algorithm>
30 
31 using namespace std;
33 
37  bool operator()(const TermList *a, const TermList *b) const {
38  return a->get_termname() > b->get_termname();
39  }
40 };
41 
42 template<class CLASS> struct delete_ptr {
43  void operator()(CLASS *p) const { delete p; }
44 };
45 
47  const string & prefix)
48 {
49  // The 0 and 1 cases should be handled by our caller.
50  AssertRel(dbs.size(), >=, 2);
51  termlists.reserve(dbs.size());
52  try {
53  vector<intrusive_ptr<Xapian::Database::Internal> >::const_iterator i;
54  for (i = dbs.begin(); i != dbs.end(); ++i) {
55  termlists.push_back((*i)->open_allterms(prefix));
56  }
57  } catch (...) {
58  for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
59  throw;
60  }
61 }
62 
64 {
65  for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
66 }
67 
68 string
70 {
71  return current_term;
72 }
73 
76 {
77  if (termlists.empty()) return 0;
78  vector<TermList *>::const_iterator i = termlists.begin();
79  Xapian::doccount total_tf = (*i)->get_termfreq();
80  while (++i != termlists.end()) {
81  if ((*i)->get_termname() == current_term)
82  total_tf += (*i)->get_termfreq();
83  }
84  return total_tf;
85 }
86 
87 TermList *
89 {
90  if (current_term.empty()) {
91  // Make termlists into a heap so that the one (or one of the ones) with
92  // earliest sorting term is at the top of the heap.
93  vector<TermList*>::iterator i = termlists.begin();
94  while (i != termlists.end()) {
95  (*i)->next();
96  if ((*i)->at_end()) {
97  delete *i;
98  i = termlists.erase(i);
99  } else {
100  ++i;
101  }
102  }
103  make_heap(termlists.begin(), termlists.end(),
105  } else {
106  // Advance to the next termname.
107  do {
108  TermList * tl = termlists.front();
109  pop_heap(termlists.begin(), termlists.end(),
111  tl->next();
112  if (tl->at_end()) {
113  delete tl;
114  termlists.pop_back();
115  } else {
116  termlists.back() = tl;
117  push_heap(termlists.begin(), termlists.end(),
119  }
120  } while (!termlists.empty() &&
121  termlists.front()->get_termname() == current_term);
122  }
123 
124  if (termlists.size() <= 1) {
125  if (termlists.empty()) return NULL;
126  TermList * tl = termlists[0];
127  termlists.clear();
128  return tl;
129  }
130 
131  current_term = termlists.front()->get_termname();
132  return NULL;
133 }
134 
135 TermList *
136 MultiAllTermsList::skip_to(const std::string &term)
137 {
138  // Assume the skip is likely to be a long distance, and rebuild the heap
139  // from scratch. FIXME: It would be useful to profile this against an
140  // approach more like that next() uses if this ever gets heavy use.
141  vector<TermList*>::iterator i = termlists.begin();
142  while (i != termlists.end()) {
143  (*i)->skip_to(term);
144  if ((*i)->at_end()) {
145  delete *i;
146  i = termlists.erase(i);
147  } else {
148  ++i;
149  }
150  }
151 
152  if (termlists.size() <= 1) {
153  if (termlists.empty()) return NULL;
154  TermList * tl = termlists[0];
155  termlists.clear();
156  return tl;
157  }
158 
159  make_heap(termlists.begin(), termlists.end(), CompareTermListsByTerm());
160 
161  current_term = termlists.front()->get_termname();
162  return NULL;
163 }
164 
165 bool
167 {
168  return termlists.empty();
169 }
~MultiAllTermsList()
Destructor.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
virtual Internal * skip_to(const std::string &term)=0
Skip forward to the specified term.
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
bool operator()(const TermList *a, const TermList *b) const
Order by ascending term name.
Hierarchy of classes which Xapian can throw as exceptions.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Class for merging AllTermsList objects from subdatabases.
Comparison functor which orders TermList* by ascending term name.
bool at_end() const
Return true if the current position is past the last term in this list.
virtual bool at_end() const =0
Return true if the current position is past the last term in this list.
virtual std::string get_termname() const =0
Return the termname at the current position.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
std::string get_termname() const
Return the termname at the current position.
Various assertion macros.
TermList * next()
Advance the current position to the next term in the termlist.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
TermList * skip_to(const std::string &term)
Skip forward to the specified term.
MultiAllTermsList(const MultiAllTermsList &)
Don&#39;t allow copying.
void operator()(CLASS *p) const