backends/multi/multi_alltermslist.cc

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007,2008,2009 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #include <config.h>
00022 
00023 #include "multialltermslist.h"
00024 
00025 #include <xapian/error.h>
00026 
00027 #include "omassert.h"
00028 
00029 #include <algorithm>
00030 
00031 using namespace std;
00032 
00034 struct CompareTermListsByTerm {
00036     bool operator()(const TermList *a, const TermList *b) {
00037         return a->get_termname() > b->get_termname();
00038     }
00039 };
00040 
00041 template<class CLASS> struct delete_ptr {
00042     void operator()(CLASS *p) { delete p; }
00043 };
00044 
00045 MultiAllTermsList::MultiAllTermsList(const vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> > & dbs,
00046                                      const string & prefix)
00047 {
00048     // The 0 and 1 cases should be handled by our caller.
00049     AssertRel(dbs.size(), >=, 2);
00050     termlists.reserve(dbs.size());
00051     try {
00052         vector<Xapian::Internal::RefCntPtr<Xapian::Database::Internal> >::const_iterator i;
00053         for (i = dbs.begin(); i != dbs.end(); ++i) {
00054             termlists.push_back((*i)->open_allterms(prefix));
00055         }
00056     } catch (...) {
00057         for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00058         throw;
00059     }
00060 }
00061 
00062 MultiAllTermsList::~MultiAllTermsList()
00063 {
00064     for_each(termlists.begin(), termlists.end(), delete_ptr<TermList>());
00065 }
00066 
00067 string
00068 MultiAllTermsList::get_termname() const
00069 {
00070     return current_term;
00071 }
00072 
00073 Xapian::doccount
00074 MultiAllTermsList::get_termfreq() const
00075 {
00076     if (termlists.empty()) return 0;
00077     vector<TermList *>::const_iterator i = termlists.begin();
00078     Xapian::doccount total_tf = (*i)->get_termfreq();
00079     while (++i != termlists.end()) {
00080         if ((*i)->get_termname() == current_term)
00081             total_tf += (*i)->get_termfreq();
00082     }
00083     return total_tf;
00084 }
00085 
00086 Xapian::termcount
00087 MultiAllTermsList::get_collection_freq() const
00088 {
00089     if (termlists.empty()) return 0;
00090     vector<TermList *>::const_iterator i = termlists.begin();
00091     Xapian::termcount total_cf = (*i)->get_collection_freq();
00092     while (++i != termlists.end()) {
00093         if ((*i)->get_termname() == current_term)
00094             total_cf += (*i)->get_collection_freq();
00095     }
00096     return total_cf;
00097 }
00098 
00099 TermList *
00100 MultiAllTermsList::next()
00101 {
00102     if (current_term.empty()) {
00103         // Make termlists into a heap so that the one (or one of the ones) with
00104         // earliest sorting term is at the top of the heap.
00105         vector<TermList*>::iterator i = termlists.begin();
00106         while (i != termlists.end()) {
00107             (*i)->next();
00108             if ((*i)->at_end()) {
00109                 delete *i;
00110                 i = termlists.erase(i);
00111             } else {
00112                 ++i;
00113             }
00114         }
00115         make_heap(termlists.begin(), termlists.end(),
00116                   CompareTermListsByTerm());
00117     } else {
00118         // Advance to the next termname.
00119         do {
00120             TermList * tl = termlists.front();
00121             pop_heap(termlists.begin(), termlists.end(),
00122                      CompareTermListsByTerm());
00123             tl->next();
00124             if (tl->at_end()) {
00125                 delete tl;
00126                 termlists.pop_back();
00127             } else {
00128                 termlists.back() = tl;
00129                 push_heap(termlists.begin(), termlists.end(),
00130                           CompareTermListsByTerm());
00131             }
00132         } while (!termlists.empty() &&
00133                  termlists.front()->get_termname() == current_term);
00134     }
00135 
00136     if (termlists.size() <= 1) {
00137         if (termlists.empty()) return NULL;
00138         TermList * tl = termlists[0];
00139         termlists.clear();
00140         return tl;
00141     }
00142 
00143     current_term = termlists.front()->get_termname();
00144     return NULL;
00145 }
00146 
00147 TermList *
00148 MultiAllTermsList::skip_to(const std::string &term)
00149 {
00150     // Assume the skip is likely to be a long distance, and rebuild the heap
00151     // from scratch.  FIXME: It would be useful to profile this against an
00152     // approach more like that next() uses if this ever gets heavy use.
00153     vector<TermList*>::iterator i = termlists.begin();
00154     while (i != termlists.end()) {
00155         (*i)->skip_to(term);
00156         if ((*i)->at_end()) {
00157             delete *i;
00158             i = termlists.erase(i);
00159         } else {
00160             ++i;
00161         }
00162     }
00163 
00164     if (termlists.size() <= 1) {
00165         if (termlists.empty()) return NULL;
00166         TermList * tl = termlists[0];
00167         termlists.clear();
00168         return tl;
00169     }
00170 
00171     make_heap(termlists.begin(), termlists.end(), CompareTermListsByTerm());
00172     
00173     current_term = termlists.front()->get_termname();
00174     return NULL;
00175 }
00176 
00177 bool
00178 MultiAllTermsList::at_end() const
00179 {
00180     return termlists.empty();
00181 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.