00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "esetinternal.h"
00025
00026 #include "xapian/enquire.h"
00027 #include "xapian/expanddecider.h"
00028 #include "database.h"
00029 #include "debuglog.h"
00030 #include "omenquireinternal.h"
00031 #include "expandweight.h"
00032 #include "omassert.h"
00033 #include "ortermlist.h"
00034 #include "str.h"
00035 #include "termlist.h"
00036
00037 #include "autoptr.h"
00038 #include <set>
00039 #include <string>
00040 #include <vector>
00041
00042 using namespace std;
00043
00044 namespace Xapian {
00045
00046 string
00047 Internal::ExpandTerm::get_description() const
00048 {
00049 string desc("ExpandTerm(");
00050 desc += str(wt);
00051 desc += ", ";
00052 desc += term;
00053 desc += ')';
00054 return desc;
00055 }
00056
00057 template<class CLASS> struct delete_ptr {
00058 void operator()(CLASS *p) { delete p; }
00059 };
00060
00061 struct CompareTermListSizeAscending {
00062 bool operator()(const TermList *a, const TermList *b) {
00063 return a->get_approx_size() > b->get_approx_size();
00064 }
00065 };
00066
00070 static TermList *
00071 build_termlist_tree(const Xapian::Database &db, const RSet & rset)
00072 {
00073 Assert(!rset.empty());
00074
00075 const set<Xapian::docid> & docids = rset.internal->get_items();
00076
00077 vector<TermList*> termlists;
00078 termlists.reserve(docids.size());
00079
00080 try {
00081 const size_t multiplier = db.internal.size();
00082 set<Xapian::docid>::const_iterator i;
00083 for (i = docids.begin(); i != docids.end(); ++i) {
00084 Xapian::docid realdid = (*i - 1) / multiplier + 1;
00085 Xapian::doccount dbnumber = (*i - 1) % multiplier;
00086
00087
00088
00089 termlists.push_back(0);
00090 termlists.back() = db.internal[dbnumber]->open_term_list(realdid);
00091 }
00092
00093 Assert(!termlists.empty());
00094 if (termlists.size() == 1) return termlists[0];
00095
00096
00097
00098 make_heap(termlists.begin(), termlists.end(),
00099 CompareTermListSizeAscending());
00100
00101
00102
00103
00104
00105
00106
00107 while (true) {
00108 AssertRel(termlists.size(), >=, 2);
00109
00110
00111
00112
00113
00114
00115 TermList * r = termlists.front();
00116 pop_heap(termlists.begin(), termlists.end(),
00117 CompareTermListSizeAscending());
00118 termlists.pop_back();
00119 TermList * l = termlists.front();
00120
00121 TermList * pl = new OrTermList(l, r);
00122
00123 if (termlists.size() == 1) return pl;
00124
00125 pop_heap(termlists.begin(), termlists.end(),
00126 CompareTermListSizeAscending());
00127 termlists.back() = pl;
00128 push_heap(termlists.begin(), termlists.end(),
00129 CompareTermListSizeAscending());
00130 }
00131 } catch (...) {
00132 for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00133 throw;
00134 }
00135 }
00136
00137 void
00138 ESet::Internal::expand(Xapian::termcount max_esize,
00139 const Xapian::Database & db,
00140 const RSet & rset,
00141 const Xapian::ExpandDecider * edecider,
00142 const Xapian::Internal::ExpandWeight & eweight,
00143 Xapian::weight min_wt)
00144 {
00145 LOGCALL_VOID(EXPAND, "ESet::Internal::expand", max_esize | db | rset | edecider | eweight);
00146
00147 Assert(max_esize);
00148 Assert(!rset.empty());
00149
00150
00151 Assert(ebound == 0);
00152 Assert(items.empty());
00153
00154 AutoPtr<TermList> tree(build_termlist_tree(db, rset));
00155 Assert(tree.get());
00156
00157 bool is_heap = false;
00158 while (true) {
00159
00160 TermList * new_root = tree->next();
00161 if (new_root) {
00162 LOGLINE(EXPAND, "Replacing the root of the termlist tree");
00163 tree.reset(new_root);
00164 }
00165
00166 if (tree->at_end()) break;
00167
00168 string term = tree->get_termname();
00169
00170
00171 if (edecider && !(*edecider)(term)) continue;
00172
00173 ++ebound;
00174
00175 Xapian::weight wt = eweight.get_weight(tree.get(), term);
00176
00177
00178 if (wt <= min_wt) continue;
00179
00180 items.push_back(Xapian::Internal::ExpandTerm(wt, term));
00181
00182
00183
00184 if (items.size() > max_esize) {
00185 if (rare(!is_heap)) {
00186 is_heap = true;
00187 make_heap(items.begin(), items.end());
00188 } else {
00189 push_heap(items.begin(), items.end());
00190 }
00191 pop_heap(items.begin(), items.end());
00192 items.pop_back();
00193 min_wt = items.front().wt;
00194 }
00195 }
00196
00197
00198 if (is_heap) {
00199 sort_heap(items.begin(), items.end());
00200 } else {
00201 sort(items.begin(), items.end());
00202 }
00203 }
00204
00205 string
00206 ESet::Internal::get_description() const
00207 {
00208 string desc("ESet::Internal(ebound=");
00209 desc += str(ebound);
00210
00211 vector<Xapian::Internal::ExpandTerm>::const_iterator i;
00212 for (i = items.begin(); i != items.end(); ++i) {
00213 desc += ", ";
00214 desc += i->get_description();
00215 }
00216 desc += ')';
00217
00218 return desc;
00219 }
00220
00221 }