21 #ifndef XAPIAN_INCLUDED_PROTOMSET_H
22 #define XAPIAN_INCLUDED_PROTOMSET_H
144 int percent_threshold_,
145 double percent_threshold_factor_,
146 double max_possible_,
147 bool stop_once_full_,
208 double new_min_weight = HUGE_VAL;
221 if (weight_first &&
results[j].get_weight() < new_min_weight) {
222 new_min_weight =
results[j].get_weight();
242 if (!finalising && min_elt != 0 && !
collapser) {
251 bool calculated_weight,
272 spymaster(doc, weight);
307 add(std::move(new_item));
330 auto elt =
add(std::move(new_item));
359 results.push_back(std::move(item));
370 results.push_back(std::move(item));
397 results[worst_idx] = std::move(item);
409 results[old_item] = std::move(b);
492 const std::vector<std::unique_ptr<LocalSubMatch>>& locals,
526 matches_lower_bound = matches_estimated = matches_upper_bound = m;
529 uncollapsed_lower_bound = matches_lower_bound;
530 uncollapsed_estimated = matches_estimated;
531 uncollapsed_upper_bound = matches_upper_bound;
533 matches_lower_bound = 0;
534 matches_estimated = 0;
535 matches_upper_bound = 0;
536 for (
size_t i = 0; i != estimates.
size(); ++i) {
539 Estimates e = locals[i]->resolve(estimates[i]);
540 matches_lower_bound += e.
min;
541 matches_estimated += e.
est;
542 matches_upper_bound += e.
max;
546 AssertRel(matches_estimated, >=, matches_lower_bound);
547 AssertRel(matches_estimated, <=, matches_upper_bound);
549 uncollapsed_lower_bound = matches_lower_bound;
550 uncollapsed_estimated = matches_estimated;
551 uncollapsed_upper_bound = matches_upper_bound;
557 matches_lower_bound =
size();
558 matches_estimated = matches_lower_bound;
559 matches_upper_bound = matches_lower_bound;
569 if (matches_lower_bound > uncollapsed_lower_bound) {
572 uncollapsed_lower_bound = matches_lower_bound;
578 double estimate_scale = 1.0;
579 double unique_rate = 1.0;
587 if (docs_considered > 0) {
590 double unique = double(docs_considered - dups_ignored);
591 unique_rate = unique / double(docs_considered);
596 matches_upper_bound -= dups_ignored;
616 matches_lower_bound =
size();
619 uncollapsed_lower_bound = matches_lower_bound;
623 if (
collapser && estimate_scale != 1.0) {
624 uncollapsed_estimated =
629 estimate_scale *= unique_rate;
631 if (estimate_scale != 1.0) {
634 if (matches_estimated < matches_lower_bound)
635 matches_estimated = matches_lower_bound;
640 AssertRel(matches_lower_bound, <=, matches_upper_bound);
641 matches_estimated = std::clamp(matches_estimated,
643 matches_upper_bound);
659 if (matches_lower_bound > uncollapsed_lower_bound) {
662 uncollapsed_lower_bound = matches_lower_bound;
666 if (uncollapsed_estimated < uncollapsed_lower_bound) {
667 uncollapsed_estimated = uncollapsed_lower_bound;
668 }
else if (uncollapsed_estimated > uncollapsed_upper_bound) {
669 uncollapsed_estimated = uncollapsed_upper_bound;
673 uncollapsed_lower_bound = matches_lower_bound;
674 uncollapsed_estimated = matches_estimated;
675 uncollapsed_upper_bound = matches_upper_bound;
702 AssertRel(matches_lower_bound, <=, matches_estimated);
703 AssertRel(matches_estimated, <=, matches_upper_bound);
704 AssertRel(uncollapsed_lower_bound, <=, uncollapsed_estimated);
705 AssertRel(uncollapsed_estimated, <=, uncollapsed_upper_bound);
708 AssertRel(matches_lower_bound, <=, uncollapsed_lower_bound);
709 AssertRel(matches_estimated, <=, uncollapsed_estimated);
710 AssertRel(matches_upper_bound, <=, uncollapsed_upper_bound);
716 uncollapsed_upper_bound,
717 uncollapsed_lower_bound,
718 uncollapsed_estimated,
The Collapser class tracks collapse keys and the documents they match.
Xapian::doccount get_docs_considered() const
collapse_result check(Result &result, Xapian::Document::Internal &vsdoc)
Check a new result.
void finalise(double min_weight, int percent_threshold)
Xapian::doccount old_item
Replaced item when REPLACE is returned by collapse().
Xapian::doccount get_dups_ignored() const
void process(collapse_result action, Xapian::doccount item)
Handle a new Result.
Xapian::doccount get_matches_lower_bound() const
void result_has_moved(Xapian::doccount from, Xapian::doccount to)
Process relocation of entry in results.
Xapian::termcount count_matching_subqs() const
double get_weight() const
Adapt MSetCmp to be usable with min_heap.
bool operator()(Xapian::doccount a, Xapian::doccount b) const
MCmpAdaptor(ProtoMSet *protomset_)
double max_weight
The highest document weight seen.
Collapser & get_collapser()
std::vector< Result > results
The items in the proto-MSet.
bool process(Result &&new_item, ValueStreamDocument &vsdoc)
Process new_item.
double percent_threshold_factor
void replace(Xapian::doccount old_item, Result &&b)
Xapian::doccount add(Result &&item)
ProtoMSet & operator=(const ProtoMSet &)=delete
Xapian::doccount first
First entry wanted in MSet.
double min_weight
Minimum threshold on the weight.
Xapian::termcount total_subqs
How many weighted leaf subqueries there are.
ProtoMSet(const ProtoMSet &)=delete
std::vector< Xapian::doccount > min_heap
A heap of offsets into results.
bool early_reject(Result &new_item, bool calculated_weight, SpyMaster &spymaster, const Xapian::Document &doc)
void set_new_min_weight(double min_wt)
Xapian::termcount max_weight_subqs_matched
The number of subqueries which matched to give max_weight.
Xapian::doccount check_at_least
Xapian::Enquire::Internal::sort_setting sort_by
ProtoMSet(Xapian::doccount first_, Xapian::doccount max_items, Xapian::doccount check_at_least_, MSetCmp mcmp_, Xapian::Enquire::Internal::sort_setting sort_by_, Xapian::termcount total_subqs_, PostListTree &pltree_, Xapian::valueno collapse_key, Xapian::doccount collapse_max, int percent_threshold_, double percent_threshold_factor_, double max_possible_, bool stop_once_full_, double time_limit)
Xapian::doccount known_matching_docs
Count of how many known matching documents have been processed so far.
void update_max_weight(double weight)
bool handle_min_weight_pending(bool finalising=false)
Resolve a pending min_weight change.
double get_min_weight() const
void finalise_percentages()
Xapian::doccount size() const
Xapian::MSet finalise(const Xapian::MatchDecider *mdecider, const std::vector< std::unique_ptr< LocalSubMatch >> &locals, const Xapian::VecUniquePtr< EstimateOp > &estimates)
Xapian::doccount max_size
Maximum size the ProtoMSet needs to grow to.
double get_weight() const
A document which gets its values from a ValueStreamManager.
Class representing a document.
A smart pointer that uses intrusive reference counting.
Class representing a list of search results.
Abstract base class for match deciders.
Suitable for "simple" type T.
Collapse documents with the same collapse key during the match.
Xapian::Enquire internals.
C++ STL heap implementation with extensions.
Time limits for the matcher.
Result comparison functions.
bool(* MSetCmp)(const Result &, const Result &)
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void siftdown(_RandomAccessIterator first, _RandomAccessIterator last, _RandomAccessIterator elt, _Compare comp)
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Various assertion macros.
#define AssertRel(A, REL, B)
Custom vector implementations using small vector optimisation.
Class for managing MatchSpy objects during the match.