21 #ifndef XAPIAN_INCLUDED_COLLAPSER_H
22 #define XAPIAN_INCLUDED_COLLAPSER_H
29 #include <unordered_map>
55 std::vector<std::pair<Xapian::doccount, Xapian::docid>>
items;
66 :
items(1, { item, did })
103 void add_item(
const std::vector<Result>& results,
117 for (
auto&& item :
items) {
118 if (item.first == from) {
137 std::unordered_map<std::string, CollapseData>
table;
186 std::vector<Result>& results_,
224 const std::string& collapse_key =
results[to].get_collapse_key();
225 if (collapse_key.empty()) {
228 auto it =
table.find(collapse_key);
240 int percent_threshold,
241 double min_weight)
const;
251 void finalise(
double min_weight,
int percent_threshold);
262 std::unordered_map<std::string, Xapian::doccount>
table;
301 bool add(
const std::string& key) {
309 auto r =
table.emplace(key, 1);
334 void finalise(std::vector<Result>& results,
int percent_threshold) {
335 if (
table.empty() || results.empty())
341 for (
Result& result : results) {
342 const std::string& key = result.get_collapse_key();
347 if (percent_threshold) {
349 result.set_collapse_count(1);
351 auto c = result.get_collapse_count() +
table[key];
352 result.set_collapse_count(c);
Class tracking information for a given value of the collapse key.
Xapian::doccount collapse_count
The number of documents we've rejected.
std::vector< std::pair< Xapian::doccount, Xapian::docid > > items
Currently kept MSet entries for this value of the collapse key.
CollapseData(Xapian::doccount item, Xapian::docid did)
Construct with the given item.
void set_item(Xapian::doccount item)
Set item after constructing with a placeholder.
Xapian::doccount get_collapse_count() const
The number of documents we've rejected.
void add_item(const std::vector< Result > &results, Xapian::doccount item, Xapian::doccount collapse_max, MSetCmp mcmp)
Complete update of new result with this collapse key value.
collapse_result check_item(const std::vector< Result > &results, const Result &result, Xapian::doccount collapse_max, MSetCmp mcmp, Xapian::doccount &old_item)
Check a new result with this collapse key value.
double next_best_weight
The highest weight of a document we've rejected.
double get_next_best_weight() const
The highest weight of a document we've rejected.
void result_has_moved(Xapian::doccount from, Xapian::doccount to)
Process relocation of entry in results.
Simpler version of Collapser used when merging MSet objects.
Xapian::doccount entry_count
How many items we're currently keeping in table.
std::unordered_map< std::string, Xapian::doccount > table
Map from collapse key values to collapse counts.
bool add(const std::string &key)
Try to add a new key.
Xapian::doccount get_docs_considered() const
Xapian::doccount dups_ignored
How many documents with duplicate collapse keys we have ignored.
CollapserLite(Xapian::doccount collapse_max_)
Xapian::doccount no_collapse_key
How many documents have we seen without a collapse key?
void finalise(std::vector< Result > &results, int percent_threshold)
Xapian::doccount docs_considered
How many documents we've considered for collapsing.
Xapian::doccount collapse_max
The maximum number of items to keep for each collapse key value.
Xapian::doccount get_matches_lower_bound() const
Xapian::doccount get_entries() const
Xapian::doccount get_dups_ignored() const
The Collapser class tracks collapse keys and the documents they match.
Xapian::doccount get_docs_considered() const
std::vector< Result > & results
Xapian::doccount no_collapse_key
How many documents have we seen without a collapse key?
Xapian::doccount dups_ignored
How many documents with duplicate collapse keys we have ignored.
collapse_result check(Result &result, Xapian::Document::Internal &vsdoc)
Check a new result.
void finalise(double min_weight, int percent_threshold)
Collapser(Xapian::valueno slot_, Xapian::doccount collapse_max_, std::vector< Result > &results_, MSetCmp mcmp_)
Xapian::doccount old_item
Replaced item when REPLACE is returned by collapse().
Xapian::doccount get_dups_ignored() const
Xapian::valueno slot
The value slot we're getting collapse keys from.
void process(collapse_result action, Xapian::doccount item)
Handle a new Result.
bool operator()(Xapian::doccount a, Xapian::doccount b) const
Adapt mcmp to be usable with min_heap.
Xapian::doccount collapse_max
The maximum number of items to keep for each collapse key value.
Xapian::doccount get_entries() const
std::unordered_map< std::string, CollapseData > table
Map from collapse key values to the items we're keeping for them.
Xapian::doccount get_matches_lower_bound() const
Xapian::doccount docs_considered
How many documents we've considered for collapsing.
Xapian::doccount get_collapse_count(const std::string &collapse_key, int percent_threshold, double min_weight) const
CollapseData * ptr
Pointer to CollapseData when NEW or ADD is in progress.
void result_has_moved(Xapian::doccount from, Xapian::doccount to)
Process relocation of entry in results.
Xapian::doccount entry_count
How many items we're currently keeping in table.
Abstract base class for a document.
collapse_result
Enumeration reporting how a result will be handled by the Collapser.
Abstract base class for a document.
Result comparison functions.
bool(* MSetCmp)(const Result &, const Result &)
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Various assertion macros.