00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "xapian/postingsource.h"
00026
00027 #include "autoptr.h"
00028
00029 #include "database.h"
00030 #include "document.h"
00031 #include "multimatch.h"
00032
00033 #include "xapian/document.h"
00034 #include "xapian/error.h"
00035 #include "xapian/queryparser.h"
00036
00037 #include "omassert.h"
00038 #include "serialise.h"
00039 #include "serialise-double.h"
00040 #include "str.h"
00041
00042 #include <cfloat>
00043
00044 using namespace std;
00045
00046 namespace Xapian {
00047
00048 PostingSource::~PostingSource() { }
00049
00050 void
00051 PostingSource::set_maxweight(Xapian::weight max_weight)
00052 {
00053 if (usual(matcher_)) {
00054 MultiMatch * multimatch = static_cast<MultiMatch*>(matcher_);
00055 multimatch->recalc_maxweight();
00056 }
00057 max_weight_ = max_weight;
00058 }
00059
00060 Xapian::weight
00061 PostingSource::get_weight() const
00062 {
00063 return 0;
00064 }
00065
00066 void
00067 PostingSource::skip_to(Xapian::docid did, Xapian::weight min_wt)
00068 {
00069 while (!at_end() && get_docid() < did) {
00070 next(min_wt);
00071 }
00072 }
00073
00074 bool
00075 PostingSource::check(Xapian::docid did, Xapian::weight min_wt)
00076 {
00077 skip_to(did, min_wt);
00078 return true;
00079 }
00080
00081 PostingSource *
00082 PostingSource::clone() const
00083 {
00084 return NULL;
00085 }
00086
00087 string
00088 PostingSource::name() const
00089 {
00090 return string();
00091 }
00092
00093 string
00094 PostingSource::serialise() const
00095 {
00096 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
00097 }
00098
00099 PostingSource *
00100 PostingSource::unserialise(const string &) const
00101 {
00102 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
00103 }
00104
00105 string
00106 PostingSource::get_description() const
00107 {
00108 return "Xapian::PostingSource subclass";
00109 }
00110
00111
00112 ValuePostingSource::ValuePostingSource(Xapian::valueno slot_)
00113 : slot(slot_)
00114 {
00115 }
00116
00117 Xapian::doccount
00118 ValuePostingSource::get_termfreq_min() const
00119 {
00120 return termfreq_min;
00121 }
00122
00123 Xapian::doccount
00124 ValuePostingSource::get_termfreq_est() const
00125 {
00126 return termfreq_est;
00127 }
00128
00129 Xapian::doccount
00130 ValuePostingSource::get_termfreq_max() const
00131 {
00132 return termfreq_max;
00133 }
00134
00135 void
00136 ValuePostingSource::next(Xapian::weight min_wt)
00137 {
00138 if (!started) {
00139 started = true;
00140 value_it = db.valuestream_begin(slot);
00141 } else {
00142 ++value_it;
00143 }
00144
00145 if (value_it == db.valuestream_end(slot)) return;
00146
00147 if (min_wt > get_maxweight()) {
00148 value_it = db.valuestream_end(slot);
00149 return;
00150 }
00151 }
00152
00153 void
00154 ValuePostingSource::skip_to(Xapian::docid min_docid, Xapian::weight min_wt)
00155 {
00156 if (!started) {
00157 started = true;
00158 value_it = db.valuestream_begin(slot);
00159
00160 if (value_it == db.valuestream_end(slot)) return;
00161 }
00162
00163 if (min_wt > get_maxweight()) {
00164 value_it = db.valuestream_end(slot);
00165 return;
00166 }
00167 value_it.skip_to(min_docid);
00168 }
00169
00170 bool
00171 ValuePostingSource::check(Xapian::docid min_docid,
00172 Xapian::weight min_wt)
00173 {
00174 if (!started) {
00175 started = true;
00176 value_it = db.valuestream_begin(slot);
00177
00178 if (value_it == db.valuestream_end(slot)) return true;
00179 }
00180
00181 if (min_wt > get_maxweight()) {
00182 value_it = db.valuestream_end(slot);
00183 return true;
00184 }
00185 return value_it.check(min_docid);
00186 }
00187
00188 bool
00189 ValuePostingSource::at_end() const
00190 {
00191 return started && value_it == db.valuestream_end(slot);
00192 }
00193
00194 Xapian::docid
00195 ValuePostingSource::get_docid() const
00196 {
00197 return value_it.get_docid();
00198 }
00199
00200 void
00201 ValuePostingSource::init(const Database & db_)
00202 {
00203 db = db_;
00204 started = false;
00205 set_maxweight(DBL_MAX);
00206 try {
00207 termfreq_max = db.get_value_freq(slot);
00208 termfreq_est = termfreq_max;
00209 termfreq_min = termfreq_max;
00210 } catch (const Xapian::UnimplementedError &) {
00211 termfreq_max = db.get_doccount();
00212 termfreq_est = termfreq_max / 2;
00213 termfreq_min = 0;
00214 }
00215 }
00216
00217
00218 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_)
00219 : ValuePostingSource(slot_)
00220 {
00221 }
00222
00223 Xapian::weight
00224 ValueWeightPostingSource::get_weight() const
00225 {
00226 Assert(!at_end());
00227 Assert(started);
00228 return sortable_unserialise(*value_it);
00229 }
00230
00231 ValueWeightPostingSource *
00232 ValueWeightPostingSource::clone() const
00233 {
00234 return new ValueWeightPostingSource(slot);
00235 }
00236
00237 string
00238 ValueWeightPostingSource::name() const
00239 {
00240 return string("Xapian::ValueWeightPostingSource");
00241 }
00242
00243 string
00244 ValueWeightPostingSource::serialise() const
00245 {
00246 return encode_length(slot);
00247 }
00248
00249 ValueWeightPostingSource *
00250 ValueWeightPostingSource::unserialise(const string &s) const
00251 {
00252 const char * p = s.data();
00253 const char * end = p + s.size();
00254
00255 Xapian::valueno new_slot = decode_length(&p, end, false);
00256 if (p != end) {
00257 throw Xapian::NetworkError("Bad serialised ValueWeightPostingSource - junk at end");
00258 }
00259
00260 return new ValueWeightPostingSource(new_slot);
00261 }
00262
00263 void
00264 ValueWeightPostingSource::init(const Database & db_)
00265 {
00266 ValuePostingSource::init(db_);
00267
00268 string upper_bound;
00269 try {
00270 upper_bound = db.get_value_upper_bound(slot);
00271 } catch (const Xapian::UnimplementedError &) {
00272
00273 return;
00274 }
00275
00276 if (upper_bound.empty()) {
00277
00278
00279 set_maxweight(0.0);
00280 } else {
00281 set_maxweight(sortable_unserialise(upper_bound));
00282 }
00283 }
00284
00285 string
00286 ValueWeightPostingSource::get_description() const
00287 {
00288 string desc("Xapian::ValueWeightPostingSource(slot=");
00289 desc += str(slot);
00290 desc += ")";
00291 return desc;
00292 }
00293
00294
00295 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_)
00296 : ValuePostingSource(slot_),
00297 default_weight(0.0),
00298 max_weight_in_map(0.0)
00299 {
00300 }
00301
00302 void
00303 ValueMapPostingSource::add_mapping(const string & key, double wt)
00304 {
00305 weight_map[key] = wt;
00306 max_weight_in_map = max(wt, max_weight_in_map);
00307 }
00308
00309 void
00310 ValueMapPostingSource::clear_mappings()
00311 {
00312 weight_map.clear();
00313 max_weight_in_map = 0.0;
00314 }
00315
00316 void
00317 ValueMapPostingSource::set_default_weight(double wt)
00318 {
00319 default_weight = wt;
00320 }
00321
00322 Xapian::weight
00323 ValueMapPostingSource::get_weight() const
00324 {
00325 map<string, double>::const_iterator wit = weight_map.find(*value_it);
00326 if (wit == weight_map.end()) {
00327 return default_weight;
00328 }
00329 return wit->second;
00330 }
00331
00332 ValueMapPostingSource *
00333 ValueMapPostingSource::clone() const
00334 {
00335 AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(slot));
00336 map<string, double>::const_iterator i;
00337 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
00338 res->add_mapping(i->first, i->second);
00339 }
00340 res->set_default_weight(default_weight);
00341 return res.release();
00342 }
00343
00344 string
00345 ValueMapPostingSource::name() const
00346 {
00347 return string("Xapian::ValueMapPostingSource");
00348 }
00349
00350 string
00351 ValueMapPostingSource::serialise() const
00352 {
00353 string result = encode_length(slot);
00354 result += serialise_double(default_weight);
00355
00356 map<string, double>::const_iterator i;
00357 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
00358 result.append(encode_length(i->first.size()));
00359 result.append(i->first);
00360 result.append(serialise_double(i->second));
00361 }
00362
00363 return result;
00364 }
00365
00366 ValueMapPostingSource *
00367 ValueMapPostingSource::unserialise(const string &s) const
00368 {
00369 const char * p = s.data();
00370 const char * end = p + s.size();
00371
00372 Xapian::valueno new_slot = decode_length(&p, end, false);
00373 AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(new_slot));
00374 res->set_default_weight(unserialise_double(&p, end));
00375 while (p != end) {
00376 size_t keylen = decode_length(&p, end, true);
00377 string key(p, keylen);
00378 p += keylen;
00379 res->add_mapping(key, unserialise_double(&p, end));
00380 }
00381 return res.release();
00382 }
00383
00384 void
00385 ValueMapPostingSource::init(const Database & db_)
00386 {
00387 ValuePostingSource::init(db_);
00388 set_maxweight(max(max_weight_in_map, default_weight));
00389 }
00390
00391 string
00392 ValueMapPostingSource::get_description() const
00393 {
00394 string desc("Xapian::ValueMapPostingSource(slot=");
00395 desc += str(slot);
00396 desc += ")";
00397 return desc;
00398 }
00399
00400 FixedWeightPostingSource::FixedWeightPostingSource(Xapian::weight wt)
00401 : started(false)
00402 {
00403
00404
00405 set_maxweight(wt);
00406 }
00407
00408 Xapian::doccount
00409 FixedWeightPostingSource::get_termfreq_min() const
00410 {
00411 return termfreq;
00412 }
00413
00414 Xapian::doccount
00415 FixedWeightPostingSource::get_termfreq_est() const
00416 {
00417 return termfreq;
00418 }
00419
00420 Xapian::doccount
00421 FixedWeightPostingSource::get_termfreq_max() const
00422 {
00423 return termfreq;
00424 }
00425
00426 Xapian::weight
00427 FixedWeightPostingSource::get_weight() const
00428 {
00429 return get_maxweight();
00430 }
00431
00432 void
00433 FixedWeightPostingSource::next(Xapian::weight min_wt)
00434 {
00435 if (!started) {
00436 started = true;
00437 it = db.postlist_begin(string());
00438 } else {
00439 ++it;
00440 }
00441
00442 if (it == db.postlist_end(string())) return;
00443
00444 if (check_docid) {
00445 it.skip_to(check_docid + 1);
00446 check_docid = 0;
00447 }
00448
00449 if (min_wt > get_maxweight()) {
00450 it = db.postlist_end(string());
00451 }
00452 }
00453
00454 void
00455 FixedWeightPostingSource::skip_to(Xapian::docid min_docid,
00456 Xapian::weight min_wt)
00457 {
00458 if (!started) {
00459 started = true;
00460 it = db.postlist_begin(string());
00461
00462 if (it == db.postlist_end(string())) return;
00463 }
00464
00465 if (check_docid) {
00466 if (min_docid < check_docid)
00467 min_docid = check_docid + 1;
00468 check_docid = 0;
00469 }
00470
00471 if (min_wt > get_maxweight()) {
00472 it = db.postlist_end(string());
00473 return;
00474 }
00475 it.skip_to(min_docid);
00476 }
00477
00478 bool
00479 FixedWeightPostingSource::check(Xapian::docid min_docid,
00480 Xapian::weight)
00481 {
00482
00483
00484 check_docid = min_docid;
00485 return true;
00486 }
00487
00488 bool
00489 FixedWeightPostingSource::at_end() const
00490 {
00491 if (check_docid != 0) return false;
00492 return started && it == db.postlist_end(string());
00493 }
00494
00495 Xapian::docid
00496 FixedWeightPostingSource::get_docid() const
00497 {
00498 if (check_docid != 0) return check_docid;
00499 return *it;
00500 }
00501
00502 FixedWeightPostingSource *
00503 FixedWeightPostingSource::clone() const
00504 {
00505 return new FixedWeightPostingSource(get_maxweight());
00506 }
00507
00508 string
00509 FixedWeightPostingSource::name() const
00510 {
00511 return string("Xapian::FixedWeightPostingSource");
00512 }
00513
00514 string
00515 FixedWeightPostingSource::serialise() const
00516 {
00517 return serialise_double(get_maxweight());
00518 }
00519
00520 FixedWeightPostingSource *
00521 FixedWeightPostingSource::unserialise(const string &s) const
00522 {
00523 const char * p = s.data();
00524 const char * s_end = p + s.size();
00525 double new_wt = unserialise_double(&p, s_end);
00526 if (p != s_end) {
00527 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
00528 }
00529 return new FixedWeightPostingSource(new_wt);
00530 }
00531
00532 void
00533 FixedWeightPostingSource::init(const Xapian::Database & db_)
00534 {
00535 db = db_;
00536 termfreq = db_.get_doccount();
00537 started = false;
00538 check_docid = 0;
00539 }
00540
00541 string
00542 FixedWeightPostingSource::get_description() const
00543 {
00544 string desc("Xapian::FixedWeightPostingSource(wt=");
00545 desc += str(get_maxweight());
00546 desc += ")";
00547 return desc;
00548 }
00549
00550 }