00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include "xapian/weight.h"
00024
00025 #include "debuglog.h"
00026 #include "omassert.h"
00027 #include "serialise-double.h"
00028
00029 #include "xapian/error.h"
00030
00031 #include <cmath>
00032
00033 using namespace std;
00034
00035 namespace Xapian {
00036
00037 TradWeight *
00038 TradWeight::clone() const
00039 {
00040 return new TradWeight(param_k);
00041 }
00042
00043 void
00044 TradWeight::init(double factor)
00045 {
00046 Xapian::doccount tf = get_termfreq();
00047
00048 Xapian::weight tw = 0;
00049 if (get_rset_size() != 0) {
00050 Xapian::doccount reltermfreq = get_reltermfreq();
00051
00052
00053
00054 AssertRel(reltermfreq,<=,tf);
00055
00056
00057
00058 AssertRel(reltermfreq,<=,get_rset_size());
00059
00060 Xapian::doccount reldocs_not_indexed = get_rset_size() - reltermfreq;
00061
00062
00063
00064 AssertRel(reldocs_not_indexed,<=,get_collection_size() - tf);
00065
00066 Xapian::doccount Q = get_collection_size() - reldocs_not_indexed;
00067
00068 Xapian::doccount nonreldocs_indexed = tf - reltermfreq;
00069 double numerator = (reltermfreq + 0.5) * (Q - tf + 0.5);
00070 double denom = (reldocs_not_indexed + 0.5) * (nonreldocs_indexed + 0.5);
00071 tw = numerator / denom;
00072 } else {
00073 tw = (get_collection_size() - tf + 0.5) / (tf + 0.5);
00074 }
00075
00076 AssertRel(tw,>,0);
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 #if 0
00094 if (rare(tw <= 1.0)) {
00095 termweight = 0;
00096 } else {
00097 termweight = log(tw) * factor;
00098 }
00099 #else
00100 if (tw < 2) tw = tw * 0.5 + 1;
00101 termweight = log(tw) * factor;
00102 #endif
00103
00104 LOGVALUE(WTCALC, termweight);
00105
00106 if (param_k == 0) {
00107
00108 len_factor = 0;
00109 } else {
00110 len_factor = get_average_length();
00111
00112
00113 if (len_factor != 0) len_factor = param_k / len_factor;
00114 }
00115
00116 LOGVALUE(WTCALC, len_factor);
00117 }
00118
00119 string
00120 TradWeight::name() const
00121 {
00122 return "Xapian::TradWeight";
00123 }
00124
00125 string
00126 TradWeight::serialise() const
00127 {
00128 return serialise_double(param_k);
00129 }
00130
00131 TradWeight *
00132 TradWeight::unserialise(const string & s) const
00133 {
00134 const char *ptr = s.data();
00135 const char *end = ptr + s.size();
00136 double k = unserialise_double(&ptr, end);
00137 if (rare(ptr != end))
00138 throw Xapian::NetworkError("Extra data in TradWeight::unserialise()");
00139 return new TradWeight(k);
00140 }
00141
00142 Xapian::weight
00143 TradWeight::get_sumpart(Xapian::termcount wdf, Xapian::termcount len) const
00144 {
00145 double wdf_double(wdf);
00146 return termweight * (wdf_double / (len * len_factor + wdf_double));
00147 }
00148
00149 Xapian::weight
00150 TradWeight::get_maxpart() const
00151 {
00152
00153 double wdf_max(max(get_wdf_upper_bound(), Xapian::termcount(1)));
00154 Xapian::termcount doclen_lb = get_doclength_lower_bound();
00155 return termweight * (wdf_max / (doclen_lb * len_factor + wdf_max));
00156 }
00157
00158 Xapian::weight
00159 TradWeight::get_sumextra(Xapian::termcount) const
00160 {
00161 return 0;
00162 }
00163
00164 Xapian::weight
00165 TradWeight::get_maxextra() const
00166 {
00167 return 0;
00168 }
00169
00170 }