00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_WEIGHT_H
00023 #define XAPIAN_INCLUDED_WEIGHT_H
00024
00025 #include <string>
00026
00027 #include <xapian/types.h>
00028 #include <xapian/visibility.h>
00029
00030 namespace Xapian {
00031
00033 class XAPIAN_VISIBILITY_DEFAULT Weight {
00034 protected:
00036 typedef enum {
00037 COLLECTION_SIZE = 1,
00038 RSET_SIZE = 2,
00039 AVERAGE_LENGTH = 4,
00040 TERMFREQ = 8,
00041 RELTERMFREQ = 16,
00042 QUERY_LENGTH = 32,
00043 WQF = 64,
00044 WDF = 128,
00045 DOC_LENGTH = 256,
00046 DOC_LENGTH_MIN = 512,
00047 DOC_LENGTH_MAX = 1024,
00048 WDF_MAX = 2048
00049 } stat_flags;
00050
00060 void need_stat(stat_flags flag) {
00061 stats_needed = stat_flags(stats_needed | flag);
00062 }
00063
00068 virtual void init(double factor) = 0;
00069
00070 private:
00072 void operator=(const Weight &);
00073
00075 stat_flags stats_needed;
00076
00078 Xapian::doccount collection_size_;
00079
00081 Xapian::doccount rset_size_;
00082
00084 Xapian::doclength average_length_;
00085
00087 Xapian::doccount termfreq_;
00088
00090 Xapian::doccount reltermfreq_;
00091
00093 Xapian::termcount query_length_;
00094
00096 Xapian::termcount wqf_;
00097
00099 Xapian::termcount doclength_lower_bound_;
00100
00102 Xapian::termcount doclength_upper_bound_;
00103
00105 Xapian::termcount wdf_upper_bound_;
00106
00107 public:
00108 class Internal;
00109
00111 virtual ~Weight();
00112
00129 virtual Weight * clone() const = 0;
00130
00144 virtual std::string name() const;
00145
00151 virtual std::string serialise() const;
00152
00170 virtual Weight * unserialise(const std::string & s) const;
00171
00180 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00181 Xapian::termcount doclen) const = 0;
00182
00188 virtual Xapian::weight get_maxpart() const = 0;
00189
00197 virtual Xapian::weight get_sumextra(Xapian::termcount doclen) const = 0;
00198
00205 virtual Xapian::weight get_maxextra() const = 0;
00206
00216 void init_(const Internal & stats, Xapian::termcount query_len_,
00217 const std::string & term, Xapian::termcount wqf_,
00218 double factor);
00219
00229 void init_(const Internal & stats, Xapian::termcount query_len_,
00230 double factor, Xapian::doccount termfreq,
00231 Xapian::doccount reltermfreq);
00232
00239 void init_(const Internal & stats, Xapian::termcount query_len_);
00240
00247 bool get_sumpart_needs_doclength_() const {
00248 return stats_needed & DOC_LENGTH;
00249 }
00250
00256 bool get_sumpart_needs_wdf_() const {
00257 return stats_needed & WDF;
00258 }
00259
00260 protected:
00266 Weight(const Weight &);
00267
00269 Weight() : stats_needed() { }
00270
00272 Xapian::doccount get_collection_size() const { return collection_size_; }
00273
00275 Xapian::doccount get_rset_size() const { return rset_size_; }
00276
00278 Xapian::doclength get_average_length() const { return average_length_; }
00279
00281 Xapian::doccount get_termfreq() const { return termfreq_; }
00282
00284 Xapian::doccount get_reltermfreq() const { return reltermfreq_; }
00285
00287 Xapian::termcount get_query_length() const { return query_length_; }
00288
00290 Xapian::termcount get_wqf() const { return wqf_; }
00291
00296 Xapian::termcount get_doclength_upper_bound() const {
00297 return doclength_upper_bound_;
00298 }
00299
00304 Xapian::termcount get_doclength_lower_bound() const {
00305 return doclength_lower_bound_;
00306 }
00307
00312 Xapian::termcount get_wdf_upper_bound() const {
00313 return wdf_upper_bound_;
00314 }
00315 };
00316
00321 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
00322 BoolWeight * clone() const;
00323
00324 void init(double factor);
00325
00326 public:
00328 BoolWeight() { }
00329
00330 std::string name() const;
00331
00332 std::string serialise() const;
00333 BoolWeight * unserialise(const std::string & s) const;
00334
00335 Xapian::weight get_sumpart(Xapian::termcount wdf,
00336 Xapian::termcount doclen) const;
00337 Xapian::weight get_maxpart() const;
00338
00339 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00340 Xapian::weight get_maxextra() const;
00341 };
00342
00344 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
00346 mutable Xapian::doclength len_factor;
00347
00349 mutable Xapian::weight termweight;
00350
00352 double param_k1, param_k2, param_k3, param_b;
00353
00355 Xapian::doclength param_min_normlen;
00356
00357 BM25Weight * clone() const;
00358
00359 void init(double factor);
00360
00361 public:
00389 BM25Weight(double k1, double k2, double k3, double b, double min_normlen)
00390 : param_k1(k1), param_k2(k2), param_k3(k3), param_b(b),
00391 param_min_normlen(min_normlen)
00392 {
00393 if (param_k1 < 0) param_k1 = 0;
00394 if (param_k2 < 0) param_k2 = 0;
00395 if (param_k3 < 0) param_k3 = 0;
00396 if (param_b < 0) {
00397 param_b = 0;
00398 } else if (param_b > 1) {
00399 param_b = 1;
00400 }
00401 need_stat(COLLECTION_SIZE);
00402 need_stat(RSET_SIZE);
00403 need_stat(TERMFREQ);
00404 need_stat(RELTERMFREQ);
00405 need_stat(WDF);
00406 need_stat(WDF_MAX);
00407 need_stat(WDF);
00408 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
00409 need_stat(DOC_LENGTH_MIN);
00410 need_stat(AVERAGE_LENGTH);
00411 }
00412 if (param_k1 != 0 && param_b != 0) need_stat(DOC_LENGTH);
00413 if (param_k2 != 0) need_stat(QUERY_LENGTH);
00414 if (param_k3 != 0) need_stat(WQF);
00415 }
00416
00417 BM25Weight()
00418 : param_k1(1), param_k2(0), param_k3(1), param_b(0.5),
00419 param_min_normlen(0.5)
00420 {
00421 need_stat(COLLECTION_SIZE);
00422 need_stat(RSET_SIZE);
00423 need_stat(TERMFREQ);
00424 need_stat(RELTERMFREQ);
00425 need_stat(WDF);
00426 need_stat(WDF_MAX);
00427 need_stat(WDF);
00428 need_stat(DOC_LENGTH_MIN);
00429 need_stat(AVERAGE_LENGTH);
00430 need_stat(DOC_LENGTH);
00431 need_stat(WQF);
00432 }
00433
00434 std::string name() const;
00435
00436 std::string serialise() const;
00437 BM25Weight * unserialise(const std::string & s) const;
00438
00439 Xapian::weight get_sumpart(Xapian::termcount wdf,
00440 Xapian::termcount doclen) const;
00441 Xapian::weight get_maxpart() const;
00442
00443 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00444 Xapian::weight get_maxextra() const;
00445 };
00446
00456 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
00458 mutable Xapian::doclength len_factor;
00459
00461 mutable Xapian::weight termweight;
00462
00464 double param_k;
00465
00466 TradWeight * clone() const;
00467
00468 void init(double factor);
00469
00470 public:
00478 explicit TradWeight(double k = 1.0) : param_k(k) {
00479 if (param_k < 0) param_k = 0;
00480 if (param_k != 0.0) {
00481 need_stat(AVERAGE_LENGTH);
00482 need_stat(DOC_LENGTH);
00483 }
00484 need_stat(COLLECTION_SIZE);
00485 need_stat(RSET_SIZE);
00486 need_stat(TERMFREQ);
00487 need_stat(RELTERMFREQ);
00488 need_stat(DOC_LENGTH_MIN);
00489 need_stat(WDF);
00490 need_stat(WDF_MAX);
00491 need_stat(WDF);
00492 }
00493
00494 std::string name() const;
00495
00496 std::string serialise() const;
00497 TradWeight * unserialise(const std::string & s) const;
00498
00499 Xapian::weight get_sumpart(Xapian::termcount wdf,
00500 Xapian::termcount doclen) const;
00501 Xapian::weight get_maxpart() const;
00502
00503 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00504 Xapian::weight get_maxextra() const;
00505 };
00506
00507 }
00508
00509 #endif // XAPIAN_INCLUDED_WEIGHT_H