00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include "multialltermslist.h"
00024
00025 #include <xapian/error.h>
00026
00027 #include "omassert.h"
00028
00029 #include <algorithm>
00030
00031 using namespace std;
00032
00034 struct CompareTermListsByTerm {
00036 bool operator()(const TermList *a, const TermList *b) {
00037 return a->get_termname() > b->get_termname();
00038 }
00039 };
00040
00041 template<class CLASS> struct delete_ptr {
00042 void operator()(CLASS *p) { delete p; }
00043 };
00044
00045 MultiAllTermsList::MultiAllTermsList(const vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> > & dbs,
00046 const string & prefix)
00047 {
00048
00049 AssertRel(dbs.size(), >=, 2);
00050 termlists.reserve(dbs.size());
00051 try {
00052 vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> >::const_iterator i;
00053 for (i = dbs.begin(); i != dbs.end(); ++i) {
00054 termlists.push_back((*i)->open_allterms(prefix));
00055 }
00056 } catch (...) {
00057 for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00058 throw;
00059 }
00060 }
00061
00062 MultiAllTermsList::~MultiAllTermsList()
00063 {
00064 for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00065 }
00066
00067 string
00068 MultiAllTermsList::get_termname() const
00069 {
00070 return current_term;
00071 }
00072
00073 Xapian::doccount
00074 MultiAllTermsList::get_termfreq() const
00075 {
00076 if (termlists.empty()) return 0;
00077 vector<TermList *>::const_iterator i = termlists.begin();
00078 Xapian::doccount total_tf = (*i)->get_termfreq();
00079 while (++i != termlists.end()) {
00080 if ((*i)->get_termname() == current_term)
00081 total_tf += (*i)->get_termfreq();
00082 }
00083 return total_tf;
00084 }
00085
00086 Xapian::termcount
00087 MultiAllTermsList::get_collection_freq() const
00088 {
00089 if (termlists.empty()) return 0;
00090 vector<TermList *>::const_iterator i = termlists.begin();
00091 Xapian::termcount total_cf = (*i)->get_collection_freq();
00092 while (++i != termlists.end()) {
00093 if ((*i)->get_termname() == current_term)
00094 total_cf += (*i)->get_collection_freq();
00095 }
00096 return total_cf;
00097 }
00098
00099 TermList *
00100 MultiAllTermsList::next()
00101 {
00102 if (current_term.empty()) {
00103
00104
00105 vector<TermList*>::iterator i = termlists.begin();
00106 while (i != termlists.end()) {
00107 (*i)->next();
00108 if ((*i)->at_end()) {
00109 delete *i;
00110 i = termlists.erase(i);
00111 } else {
00112 ++i;
00113 }
00114 }
00115 make_heap(termlists.begin(), termlists.end(),
00116 CompareTermListsByTerm());
00117 } else {
00118
00119 do {
00120 TermList * tl = termlists.front();
00121 pop_heap(termlists.begin(), termlists.end(),
00122 CompareTermListsByTerm());
00123 tl->next();
00124 if (tl->at_end()) {
00125 delete tl;
00126 termlists.pop_back();
00127 } else {
00128 termlists.back() = tl;
00129 push_heap(termlists.begin(), termlists.end(),
00130 CompareTermListsByTerm());
00131 }
00132 } while (!termlists.empty() &&
00133 termlists.front()->get_termname() == current_term);
00134 }
00135
00136 if (termlists.size() <= 1) {
00137 if (termlists.empty()) return NULL;
00138 TermList * tl = termlists[0];
00139 termlists.clear();
00140 return tl;
00141 }
00142
00143 current_term = termlists.front()->get_termname();
00144 return NULL;
00145 }
00146
00147 TermList *
00148 MultiAllTermsList::skip_to(const std::string &term)
00149 {
00150
00151
00152
00153 vector<TermList*>::iterator i = termlists.begin();
00154 while (i != termlists.end()) {
00155 (*i)->skip_to(term);
00156 if ((*i)->at_end()) {
00157 delete *i;
00158 i = termlists.erase(i);
00159 } else {
00160 ++i;
00161 }
00162 }
00163
00164 if (termlists.size() <= 1) {
00165 if (termlists.empty()) return NULL;
00166 TermList * tl = termlists[0];
00167 termlists.clear();
00168 return tl;
00169 }
00170
00171 make_heap(termlists.begin(), termlists.end(), CompareTermListsByTerm());
00172
00173 current_term = termlists.front()->get_termname();
00174 return NULL;
00175 }
00176
00177 bool
00178 MultiAllTermsList::at_end() const
00179 {
00180 return termlists.empty();
00181 }