00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_COLLAPSER_H
00022 #define XAPIAN_INCLUDED_COLLAPSER_H
00023
00024 #include "document.h"
00025 #include "msetcmp.h"
00026 #include "omenquireinternal.h"
00027 #include "postlist.h"
00028
00029 #include <map>
00030
00032 typedef enum {
00033 EMPTY,
00034 ADDED,
00035 REJECTED,
00036 REPLACED
00037 } collapse_result;
00038
00040 class CollapseData {
00050 vector<Xapian::Internal::MSetItem> items;
00051
00053 Xapian::weight next_best_weight;
00054
00056 Xapian::doccount collapse_count;
00057
00058 public:
00060 CollapseData(const Xapian::Internal::MSetItem & item)
00061 : items(1, item), next_best_weight(0), collapse_count(0) {
00062 items[0].collapse_key = string();
00063 }
00064
00074 collapse_result add_item(const Xapian::Internal::MSetItem & item,
00075 Xapian::doccount collapse_max,
00076 const MSetCmp & mcmp,
00077 Xapian::Internal::MSetItem & old_item);
00078
00080 Xapian::weight get_next_best_weight() const { return next_best_weight; }
00081
00083 Xapian::doccount get_collapse_count() const { return collapse_count; }
00084 };
00085
00087 class Collapser {
00089 std::map<std::string, CollapseData> table;
00090
00092 Xapian::doccount entry_count;
00093
00098 Xapian::doccount no_collapse_key;
00099
00105 Xapian::doccount dups_ignored;
00106
00112 Xapian::doccount docs_considered;
00113
00115 Xapian::valueno slot;
00116
00118 Xapian::doccount collapse_max;
00119
00120 public:
00122 Xapian::Internal::MSetItem old_item;
00123
00124 Collapser(Xapian::valueno slot_, Xapian::doccount collapse_max_)
00125 : entry_count(0), no_collapse_key(0), dups_ignored(0),
00126 docs_considered(0), slot(slot_), collapse_max(collapse_max_),
00127 old_item(0, 0) { }
00128
00130 operator bool() const { return collapse_max != 0; }
00131
00142 collapse_result process(Xapian::Internal::MSetItem & item,
00143 PostList * postlist,
00144 Xapian::Document::Internal & vsdoc,
00145 const MSetCmp & mcmp);
00146
00147 Xapian::doccount get_collapse_count(const std::string & collapse_key,
00148 int percent_cutoff,
00149 Xapian::weight min_weight) const;
00150
00151 Xapian::doccount get_docs_considered() const { return docs_considered; }
00152
00153 Xapian::doccount get_dups_ignored() const { return dups_ignored; }
00154
00155 Xapian::doccount entries() const { return entry_count; }
00156
00157 Xapian::doccount get_matches_lower_bound() const;
00158
00159 bool empty() const { return table.empty(); }
00160 };
00161
00162 #endif // XAPIAN_INCLUDED_COLLAPSER_H