00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_WEIGHT_H
00023 #define XAPIAN_INCLUDED_WEIGHT_H
00024
00025 #include <string>
00026
00027 #include <xapian/types.h>
00028 #include <xapian/visibility.h>
00029
00030 namespace Xapian {
00031
00033 class XAPIAN_VISIBILITY_DEFAULT Weight {
00034 protected:
00036 typedef enum {
00037 COLLECTION_SIZE = 1,
00038 RSET_SIZE = 2,
00039 AVERAGE_LENGTH = 4,
00040 TERMFREQ = 8,
00041 RELTERMFREQ = 16,
00042 QUERY_LENGTH = 32,
00043 WQF = 64,
00044 WDF = 128,
00045 DOC_LENGTH = 256,
00046 DOC_LENGTH_MIN = 512,
00047 DOC_LENGTH_MAX = 1024,
00048 WDF_MAX = 2048
00049 } stat_flags;
00050
00060 void need_stat(stat_flags flag) {
00061 stats_needed = stat_flags(stats_needed | flag);
00062 }
00063
00073 virtual void init(double factor) = 0;
00074
00075 private:
00077 void operator=(const Weight &);
00078
00080 stat_flags stats_needed;
00081
00083 Xapian::doccount collection_size_;
00084
00086 Xapian::doccount rset_size_;
00087
00089 Xapian::doclength average_length_;
00090
00092 Xapian::doccount termfreq_;
00093
00095 Xapian::doccount reltermfreq_;
00096
00098 Xapian::termcount query_length_;
00099
00101 Xapian::termcount wqf_;
00102
00104 Xapian::termcount doclength_lower_bound_;
00105
00107 Xapian::termcount doclength_upper_bound_;
00108
00110 Xapian::termcount wdf_upper_bound_;
00111
00112 public:
00113 class Internal;
00114
00116 virtual ~Weight();
00117
00134 virtual Weight * clone() const = 0;
00135
00149 virtual std::string name() const;
00150
00156 virtual std::string serialise() const;
00157
00175 virtual Weight * unserialise(const std::string & s) const;
00176
00185 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00186 Xapian::termcount doclen) const = 0;
00187
00193 virtual Xapian::weight get_maxpart() const = 0;
00194
00202 virtual Xapian::weight get_sumextra(Xapian::termcount doclen) const = 0;
00203
00210 virtual Xapian::weight get_maxextra() const = 0;
00211
00221 void init_(const Internal & stats, Xapian::termcount query_len_,
00222 const std::string & term, Xapian::termcount wqf_,
00223 double factor);
00224
00234 void init_(const Internal & stats, Xapian::termcount query_len_,
00235 double factor, Xapian::doccount termfreq,
00236 Xapian::doccount reltermfreq);
00237
00244 void init_(const Internal & stats, Xapian::termcount query_len_);
00245
00252 bool get_sumpart_needs_doclength_() const {
00253 return stats_needed & DOC_LENGTH;
00254 }
00255
00261 bool get_sumpart_needs_wdf_() const {
00262 return stats_needed & WDF;
00263 }
00264
00265 protected:
00271 Weight(const Weight &);
00272
00274 Weight() : stats_needed() { }
00275
00277 Xapian::doccount get_collection_size() const { return collection_size_; }
00278
00280 Xapian::doccount get_rset_size() const { return rset_size_; }
00281
00283 Xapian::doclength get_average_length() const { return average_length_; }
00284
00286 Xapian::doccount get_termfreq() const { return termfreq_; }
00287
00289 Xapian::doccount get_reltermfreq() const { return reltermfreq_; }
00290
00292 Xapian::termcount get_query_length() const { return query_length_; }
00293
00295 Xapian::termcount get_wqf() const { return wqf_; }
00296
00301 Xapian::termcount get_doclength_upper_bound() const {
00302 return doclength_upper_bound_;
00303 }
00304
00311 Xapian::termcount get_doclength_lower_bound() const {
00312 return doclength_lower_bound_;
00313 }
00314
00319 Xapian::termcount get_wdf_upper_bound() const {
00320 return wdf_upper_bound_;
00321 }
00322 };
00323
00328 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
00329 BoolWeight * clone() const;
00330
00331 void init(double factor);
00332
00333 public:
00335 BoolWeight() { }
00336
00337 std::string name() const;
00338
00339 std::string serialise() const;
00340 BoolWeight * unserialise(const std::string & s) const;
00341
00342 Xapian::weight get_sumpart(Xapian::termcount wdf,
00343 Xapian::termcount doclen) const;
00344 Xapian::weight get_maxpart() const;
00345
00346 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00347 Xapian::weight get_maxextra() const;
00348 };
00349
00351 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
00353 mutable Xapian::doclength len_factor;
00354
00356 mutable Xapian::weight termweight;
00357
00359 double param_k1, param_k2, param_k3, param_b;
00360
00362 Xapian::doclength param_min_normlen;
00363
00364 BM25Weight * clone() const;
00365
00366 void init(double factor);
00367
00368 public:
00396 BM25Weight(double k1, double k2, double k3, double b, double min_normlen)
00397 : param_k1(k1), param_k2(k2), param_k3(k3), param_b(b),
00398 param_min_normlen(min_normlen)
00399 {
00400 if (param_k1 < 0) param_k1 = 0;
00401 if (param_k2 < 0) param_k2 = 0;
00402 if (param_k3 < 0) param_k3 = 0;
00403 if (param_b < 0) {
00404 param_b = 0;
00405 } else if (param_b > 1) {
00406 param_b = 1;
00407 }
00408 need_stat(COLLECTION_SIZE);
00409 need_stat(RSET_SIZE);
00410 need_stat(TERMFREQ);
00411 need_stat(RELTERMFREQ);
00412 need_stat(WDF);
00413 need_stat(WDF_MAX);
00414 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
00415 need_stat(DOC_LENGTH_MIN);
00416 need_stat(AVERAGE_LENGTH);
00417 }
00418 if (param_k1 != 0 && param_b != 0) need_stat(DOC_LENGTH);
00419 if (param_k2 != 0) need_stat(QUERY_LENGTH);
00420 if (param_k3 != 0) need_stat(WQF);
00421 }
00422
00423 BM25Weight()
00424 : param_k1(1), param_k2(0), param_k3(1), param_b(0.5),
00425 param_min_normlen(0.5)
00426 {
00427 need_stat(COLLECTION_SIZE);
00428 need_stat(RSET_SIZE);
00429 need_stat(TERMFREQ);
00430 need_stat(RELTERMFREQ);
00431 need_stat(WDF);
00432 need_stat(WDF_MAX);
00433 need_stat(DOC_LENGTH_MIN);
00434 need_stat(AVERAGE_LENGTH);
00435 need_stat(DOC_LENGTH);
00436 need_stat(WQF);
00437 }
00438
00439 std::string name() const;
00440
00441 std::string serialise() const;
00442 BM25Weight * unserialise(const std::string & s) const;
00443
00444 Xapian::weight get_sumpart(Xapian::termcount wdf,
00445 Xapian::termcount doclen) const;
00446 Xapian::weight get_maxpart() const;
00447
00448 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00449 Xapian::weight get_maxextra() const;
00450 };
00451
00461 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
00463 mutable Xapian::doclength len_factor;
00464
00466 mutable Xapian::weight termweight;
00467
00469 double param_k;
00470
00471 TradWeight * clone() const;
00472
00473 void init(double factor);
00474
00475 public:
00483 explicit TradWeight(double k = 1.0) : param_k(k) {
00484 if (param_k < 0) param_k = 0;
00485 if (param_k != 0.0) {
00486 need_stat(AVERAGE_LENGTH);
00487 need_stat(DOC_LENGTH);
00488 }
00489 need_stat(COLLECTION_SIZE);
00490 need_stat(RSET_SIZE);
00491 need_stat(TERMFREQ);
00492 need_stat(RELTERMFREQ);
00493 need_stat(DOC_LENGTH_MIN);
00494 need_stat(WDF);
00495 need_stat(WDF_MAX);
00496 }
00497
00498 std::string name() const;
00499
00500 std::string serialise() const;
00501 TradWeight * unserialise(const std::string & s) const;
00502
00503 Xapian::weight get_sumpart(Xapian::termcount wdf,
00504 Xapian::termcount doclen) const;
00505 Xapian::weight get_maxpart() const;
00506
00507 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00508 Xapian::weight get_maxextra() const;
00509 };
00510
00511 }
00512
00513 #endif // XAPIAN_INCLUDED_WEIGHT_H