matcher/mergepostlist.cc

Go to the documentation of this file.
00001 /* mergepostlist.cc: MERGE of two posting lists
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2002,2003,2004,2006 Olly Betts
00006  * Copyright 2007 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 #include "multimatch.h"
00026 #include "emptypostlist.h"
00027 #include "mergepostlist.h"
00028 #include "branchpostlist.h"
00029 #include "omassert.h"
00030 #include "omdebug.h"
00031 #include <xapian/errorhandler.h>
00032 
00033 // NB don't prune - even with one sublist we still translate docids...
00034 
00035 MergePostList::MergePostList(std::vector<PostList *> plists_,
00036                              MultiMatch *matcher_,
00037                              Xapian::ErrorHandler * errorhandler_)
00038         : plists(plists_), current(-1), matcher(matcher_),
00039           errorhandler(errorhandler_)
00040 {
00041     DEBUGCALL(MATCH, void, "MergePostList::MergePostList", "std::vector<PostList *>");
00042 }
00043 
00044 MergePostList::~MergePostList()
00045 {
00046     DEBUGCALL(MATCH, void, "MergePostList::~MergePostList", "");
00047     std::vector<PostList *>::const_iterator i;
00048     for (i = plists.begin(); i != plists.end(); i++) {
00049         delete *i;
00050     }
00051 }
00052 
00053 PostList *
00054 MergePostList::next(Xapian::weight w_min)
00055 {
00056     DEBUGCALL(MATCH, PostList *, "MergePostList::next", w_min);
00057     DEBUGLINE(MATCH, "current = " << current);
00058     if (current == -1) current = 0;
00059     while (true) {
00060         // FIXME: should skip over Remote matchers which aren't ready yet
00061         // and come back to them later...
00062         try {
00063             next_handling_prune(plists[current], w_min, matcher);
00064             if (!plists[current]->at_end()) break;
00065             current++;
00066         } catch (Xapian::Error & e) {
00067             if (errorhandler) {
00068                 DEBUGLINE(EXCEPTION, "Calling error handler in MergePostList::next().");
00069                 (*errorhandler)(e);
00070                 // Continue match without this sub-postlist.
00071                 delete plists[current];
00072                 plists[current] = new EmptyPostList;
00073             } else {
00074                 throw;
00075             }
00076         }
00077         if (unsigned(current) >= plists.size()) break;
00078         if (matcher) matcher->recalc_maxweight();
00079     }
00080     DEBUGLINE(MATCH, "current = " << current);
00081     RETURN(NULL);
00082 }
00083 
00084 PostList *
00085 MergePostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00086 {
00087     DEBUGCALL(MATCH, PostList *, "MergePostList::skip_to", did << ", " << w_min);
00088     (void)did;
00089     (void)w_min;
00090     // MergePostList doesn't return documents in docid order, so skip_to
00091     // isn't a meaningful operation.
00092     throw Xapian::InvalidOperationError("MergePostList doesn't support skip_to");
00093 }
00094 
00095 Xapian::termcount
00096 MergePostList::get_wdf() const
00097 {
00098     DEBUGCALL(MATCH, Xapian::termcount, "MergePostList::get_wdf", "");
00099     RETURN(plists[current]->get_wdf());
00100 }
00101 
00102 Xapian::doccount
00103 MergePostList::get_termfreq_max() const
00104 {
00105     DEBUGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_max", "");
00106     // sum of termfreqs for all children
00107     Xapian::doccount total = 0;
00108     vector<PostList *>::const_iterator i;
00109     for (i = plists.begin(); i != plists.end(); i++) {
00110         total += (*i)->get_termfreq_max();
00111     }
00112     return total;
00113 }
00114 
00115 Xapian::doccount
00116 MergePostList::get_termfreq_min() const
00117 {
00118     DEBUGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_min", "");
00119     // sum of termfreqs for all children
00120     Xapian::doccount total = 0;
00121     vector<PostList *>::const_iterator i;
00122     for (i = plists.begin(); i != plists.end(); i++) {
00123         total += (*i)->get_termfreq_min();
00124     }
00125     return total;
00126 }
00127 
00128 Xapian::doccount
00129 MergePostList::get_termfreq_est() const
00130 {
00131     DEBUGCALL(MATCH, Xapian::doccount, "MergePostList::get_termfreq_est", "");
00132     // sum of termfreqs for all children
00133     Xapian::doccount total = 0;
00134     vector<PostList *>::const_iterator i;
00135     for (i = plists.begin(); i != plists.end(); i++) {
00136         total += (*i)->get_termfreq_est();
00137     }
00138     return total;
00139 }
00140 
00141 Xapian::docid
00142 MergePostList::get_docid() const
00143 {
00144     DEBUGCALL(MATCH, Xapian::docid, "MergePostList::get_docid", "");
00145     Assert(current != -1);
00146     // FIXME: this needs fixing so we can prune plists - see MultiPostlist
00147     // for code which does this...
00148     RETURN((plists[current]->get_docid() - 1) * plists.size() + current + 1);
00149 }
00150 
00151 Xapian::weight
00152 MergePostList::get_weight() const
00153 {
00154     DEBUGCALL(MATCH, Xapian::weight, "MergePostList::get_weight", "");
00155     Assert(current != -1);
00156     return plists[current]->get_weight();
00157 }
00158 
00159 const string *
00160 MergePostList::get_collapse_key() const
00161 {
00162     DEBUGCALL(MATCH, string *, "MergePostList::get_collapse_key", "");
00163     Assert(current != -1);
00164     return plists[current]->get_collapse_key();
00165 }
00166 
00167 Xapian::weight
00168 MergePostList::get_maxweight() const
00169 {
00170     DEBUGCALL(MATCH, Xapian::weight, "MergePostList::get_maxweight", "");
00171     return w_max;
00172 }
00173 
00174 Xapian::weight
00175 MergePostList::recalc_maxweight()
00176 {
00177     DEBUGCALL(MATCH, Xapian::weight, "MergePostList::recalc_maxweight", "");
00178     w_max = 0;
00179     vector<PostList *>::iterator i;
00180     for (i = plists.begin(); i != plists.end(); i++) {
00181         try {
00182             Xapian::weight w = (*i)->recalc_maxweight();
00183             if (w > w_max) w_max = w;
00184         } catch (Xapian::Error & e) {
00185             if (errorhandler) {
00186                 DEBUGLINE(EXCEPTION, "Calling error handler in MergePostList::recalc_maxweight().");
00187                 (*errorhandler)(e);
00188 
00189                 if (current == i - plists.begin()) {
00190                     // Fatal error
00191                     throw;
00192                 }
00193                 // Continue match without this sub-postlist.
00194                 delete (*i);
00195                 *i = new EmptyPostList;
00196             } else {
00197                 throw;
00198             }
00199         }
00200     }
00201     return w_max;
00202 }
00203 
00204 bool
00205 MergePostList::at_end() const
00206 {
00207     DEBUGCALL(MATCH, bool, "MergePostList::at_end", "");
00208     Assert(current != -1);
00209     return unsigned(current) >= plists.size();    
00210 }
00211 
00212 string
00213 MergePostList::get_description() const
00214 {
00215     string desc = "( Merge ";
00216     vector<PostList *>::const_iterator i;
00217     for (i = plists.begin(); i != plists.end(); i++) {
00218         desc += (*i)->get_description() + " ";
00219     }
00220     return desc + ")";
00221 }
00222 
00223 Xapian::doclength
00224 MergePostList::get_doclength() const
00225 {
00226     DEBUGCALL(MATCH, Xapian::doclength, "MergePostList::get_doclength", "");
00227     Assert(current != -1);
00228     return plists[current]->get_doclength();
00229 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.