00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "expandweight.h"
00025
00026 #include "debuglog.h"
00027 #include "omassert.h"
00028 #include "termlist.h"
00029
00030 #include <cmath>
00031
00032 using namespace std;
00033
00034 namespace Xapian {
00035 namespace Internal {
00036
00037 Xapian::weight
00038 ExpandWeight::get_weight(TermList * merger, const string & term) const
00039 {
00040 LOGCALL(MATCH, Xapian::weight, "ExpandWeight::get_weight", merger | term);
00041
00042
00043 ExpandStats stats(avlen, expand_k);
00044 merger->accumulate_stats(stats);
00045
00046 double termfreq = stats.termfreq;
00047 double rtermfreq = stats.rtermfreq;
00048
00049 LOGVALUE(EXPAND, rsize);
00050 LOGVALUE(EXPAND, rtermfreq);
00051
00052 LOGVALUE(EXPAND, dbsize);
00053 LOGVALUE(EXPAND, stats.dbsize);
00054 if (stats.dbsize == dbsize) {
00055
00056
00057
00058
00059 AssertEqParanoid(termfreq, db.get_termfreq(term));
00060 } else {
00061 AssertRel(stats.dbsize,<,dbsize);
00062
00063
00064
00065 if (use_exact_termfreq) {
00066 LOGLINE(EXPAND, "Had to request exact termfreq");
00067 termfreq = db.get_termfreq(term);
00068 } else {
00069
00070
00071 termfreq *= double(dbsize) / double(stats.dbsize);
00072 LOGLINE(EXPAND, "termfreq is approx " << stats.termfreq << " * " <<
00073 dbsize << " / " << stats.dbsize << " = " <<
00074 termfreq);
00075 LOGVALUE(EXPAND, db.get_termfreq(term));
00076 if (termfreq < rtermfreq) {
00077
00078
00079 LOGLINE(EXPAND, "termfreq must be at least rtermfreq");
00080 termfreq = rtermfreq;
00081 } else {
00082
00083
00084
00085
00086
00087
00088
00089 double termfreq_upper_bound = dbsize - (rsize - rtermfreq);
00090 if (termfreq > termfreq_upper_bound) {
00091 LOGLINE(EXPAND, "termfreq can't be more than "
00092 "dbsize - (rsize + rtermfreq)");
00093 termfreq = termfreq_upper_bound;
00094 }
00095 }
00096 }
00097 }
00098 LOGVALUE(EXPAND, termfreq);
00099
00100 double reldocs_without_term = rsize - rtermfreq;
00101 double num, denom;
00102 num = (rtermfreq + 0.5) * (dbsize - termfreq - reldocs_without_term + 0.5);
00103 AssertRel(num,>,0);
00104 denom = (termfreq - rtermfreq + 0.5) * (reldocs_without_term + 0.5);
00105 AssertRel(denom,>,0);
00106
00107 Xapian::weight tw = log(num / denom);
00108 LOGVALUE(EXPAND, tw);
00109 LOGVALUE(EXPAND, stats.multiplier);
00110 RETURN(stats.multiplier * tw);
00111 }
00112
00113 }
00114 }