00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include "localmatch.h"
00027
00028 #include "autoptr.h"
00029 #include "extraweightpostlist.h"
00030 #include "leafpostlist.h"
00031 #include "omdebug.h"
00032 #include "omqueryinternal.h"
00033 #include "queryoptimiser.h"
00034 #include "scaleweight.h"
00035 #include "weightinternal.h"
00036 #include "stats.h"
00037
00038 #include <cfloat>
00039 #include <cmath>
00040 #include <map>
00041
00042 LocalSubMatch::LocalSubMatch(const Xapian::Database::Internal *db_,
00043 const Xapian::Query::Internal * query, Xapian::termcount qlen_,
00044 const Xapian::RSet & omrset,
00045 const Xapian::Weight *wt_factory_)
00046 : orig_query(*query), qlen(qlen_), db(db_),
00047 rset(db, omrset), wt_factory(wt_factory_), term_info(NULL)
00048 {
00049 DEBUGCALL(MATCH, void, "LocalSubMatch::LocalSubMatch",
00050 db << ", " << query << ", " << qlen_ << ", " << omrset << ", " <<
00051 ", [wt_factory]");
00052 }
00053
00054 bool
00055 LocalSubMatch::prepare_match(bool , Stats & total_stats)
00056 {
00057 DEBUGCALL(MATCH, bool, "LocalSubMatch::prepare_match", "/*nowait*/");
00058 Stats my_stats;
00059
00060
00061 my_stats.collection_size = db->get_doccount();
00062 my_stats.average_length = db->get_avlength();
00063
00064
00065 Xapian::TermIterator titer = orig_query.get_terms();
00066 Xapian::TermIterator terms_end(NULL);
00067 for ( ; titer != terms_end; ++titer) {
00068 Assert(!(*titer).empty());
00069 my_stats.set_termfreq(*titer, db->get_termfreq(*titer));
00070 rset.will_want_reltermfreq(*titer);
00071 }
00072 rset.contribute_stats(my_stats);
00073
00074
00075 total_stats += my_stats;
00076 RETURN(true);
00077 }
00078
00079 void
00080 LocalSubMatch::start_match(Xapian::doccount, Xapian::doccount,
00081 Xapian::doccount, const Stats & total_stats)
00082 {
00083
00084 stats = &total_stats;
00085 }
00086
00087 PostList *
00088 LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,
00089 map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts)
00090 {
00091 DEBUGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info",
00092 matcher << ", [termfreqandwts]");
00093 term_info = termfreqandwts;
00094
00095
00096
00097
00098 QueryOptimiser opt(*db, *this, matcher);
00099 PostList * pl = opt.optimise_query(&orig_query);
00100
00101
00102
00103 AutoPtr<Xapian::Weight> extra_wt;
00104
00105 extra_wt = wt_factory->create(stats->create_weight_internal(), qlen, 1, "");
00106 if (extra_wt->get_maxextra() != 0.0) {
00107 pl = new ExtraWeightPostList(pl, extra_wt.release(), matcher);
00108 }
00109
00110 RETURN(pl);
00111 }
00112
00113 PostList *
00114 LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query,
00115 double factor)
00116 {
00117 DEBUGCALL(MATCH, PostList *, "LocalSubMatch::postlist_from_op_leaf_query",
00118 query << ", " << factor);
00119 Assert(query);
00120 AssertEq(query->op, Xapian::Query::Internal::OP_LEAF);
00121 Assert(query->subqs.empty());
00122 bool boolean = (factor == 0.0);
00123 AutoPtr<Xapian::Weight> wt;
00124 if (!boolean) {
00125
00126
00127
00128
00129 wt = wt_factory->create(stats->create_weight_internal(query->tname),
00130 qlen, query->wqf, query->tname);
00131 if (fabs(factor - 1.0) > DBL_EPSILON) {
00132 wt = new ScaleWeight(wt.release(), factor);
00133 }
00134 }
00135
00136 if (term_info) {
00137 map<string, Xapian::MSet::Internal::TermFreqAndWeight>::iterator i;
00138 i = term_info->find(query->tname);
00139 if (i == term_info->end()) {
00140 Xapian::doccount tf = stats->get_termfreq(query->tname);
00141 Xapian::weight weight = boolean ? 0 : wt->get_maxpart();
00142 Xapian::MSet::Internal::TermFreqAndWeight info(tf, weight);
00143 DEBUGLINE(MATCH, "Setting term_info[" << query->tname << "] "
00144 "to (" << tf << ", " << weight << ")");
00145 term_info->insert(make_pair(query->tname, info));
00146 } else if (!boolean) {
00147 i->second.termweight += wt->get_maxpart();
00148 AssertEq(stats->get_termfreq(query->tname), i->second.termfreq);
00149 DEBUGLINE(MATCH, "Increasing term_info[" << query->tname << "] "
00150 "to (" << i->second.termfreq << ", " <<
00151 i->second.termweight << ")");
00152 }
00153 }
00154
00155 LeafPostList * pl = db->open_post_list(query->tname);
00156
00157
00158 if (!boolean) pl->set_termweight(wt.release());
00159 RETURN(pl);
00160 }