xapian-core  1.4.26
collapser.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2011 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_COLLAPSER_H
22 #define XAPIAN_INCLUDED_COLLAPSER_H
23 
24 #include "backends/document.h"
25 #include "msetcmp.h"
26 #include "api/omenquireinternal.h"
27 #include "api/postlist.h"
28 
29 #include <map>
30 #include <vector>
31 
33 typedef enum {
39 
41 class CollapseData {
50  std::vector<Xapian::Internal::MSetItem> items;
51 
54 
57 
58  public:
61  : items(1, item), next_best_weight(0), collapse_count(0) {
62  items[0].collapse_key = std::string();
63  }
64 
75  Xapian::doccount collapse_max,
76  MSetCmp mcmp,
77  Xapian::Internal::MSetItem & old_item);
78 
80  double get_next_best_weight() const { return next_best_weight; }
81 
84 };
85 
87 class Collapser {
89  std::map<std::string, CollapseData> table;
90 
93 
99 
106 
113 
116 
119 
120  public:
123 
125  : entry_count(0), no_collapse_key(0), dups_ignored(0),
126  docs_considered(0), slot(slot_), collapse_max(collapse_max_),
127  old_item(0, 0) { }
128 
130  operator bool() const { return collapse_max != 0; }
131 
143  PostList * postlist,
145  MSetCmp mcmp);
146 
147  Xapian::doccount get_collapse_count(const std::string & collapse_key,
148  int percent_cutoff,
149  double min_weight) const;
150 
151  Xapian::doccount get_docs_considered() const { return docs_considered; }
152 
153  Xapian::doccount get_dups_ignored() const { return dups_ignored; }
154 
155  Xapian::doccount entries() const { return entry_count; }
156 
157  Xapian::doccount get_matches_lower_bound() const;
158 
159  bool empty() const { return table.empty(); }
160 };
161 
162 #endif // XAPIAN_INCLUDED_COLLAPSER_H
Xapian::valueno slot
The value slot we&#39;re getting collapse keys from.
Definition: collapser.h:115
Abstract base class for postlists.
Definition: postlist.h:37
Xapian::doccount no_collapse_key
How many documents have we seen without a collapse key?
Definition: collapser.h:98
collapse_result add_item(const Xapian::Internal::MSetItem &item, Xapian::doccount collapse_max, MSetCmp mcmp, Xapian::Internal::MSetItem &old_item)
Handle a new MSetItem with this collapse key value.
Definition: collapser.cc:32
std::map< std::string, CollapseData > table
Map from collapse key values to the items we&#39;re keeping for them.
Definition: collapser.h:89
std::vector< Xapian::Internal::MSetItem > items
Currently kept MSet entries for this value of the collapse key.
Definition: collapser.h:50
Collapser(Xapian::valueno slot_, Xapian::doccount collapse_max_)
Definition: collapser.h:124
A document in the database, possibly plus modifications.
Definition: document.h:43
CollapseData(const Xapian::Internal::MSetItem &item)
Construct with the given MSetItem item.
Definition: collapser.h:60
Xapian::doccount collapse_max
The maximum number of items to keep for each collapse key value.
Definition: collapser.h:118
Abstract base class for postlists.
Xapian::doccount get_dups_ignored() const
Definition: collapser.h:153
bool(* MSetCmp)(const Xapian::Internal::MSetItem &, const Xapian::Internal::MSetItem &)
Definition: msetcmp.h:28
An item resulting from a query.
double next_best_weight
The highest weight of a document we&#39;ve rejected.
Definition: collapser.h:53
Xapian::doccount entry_count
How many items we&#39;re currently keeping in table.
Definition: collapser.h:92
bool empty() const
Definition: collapser.h:159
double get_next_best_weight() const
The highest weight of a document we&#39;ve rejected.
Definition: collapser.h:80
collapse_result
Enumeration reporting how a document was handled by the Collapser.
Definition: collapser.h:33
Xapian::doccount collapse_count
The number of documents we&#39;ve rejected.
Definition: collapser.h:56
Xapian::doccount get_collapse_count() const
The number of documents we&#39;ve rejected.
Definition: collapser.h:83
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::doccount dups_ignored
How many documents with duplicate collapse keys we have ignored.
Definition: collapser.h:105
MSetItem comparison functions.
Xapian::doccount docs_considered
How many documents we&#39;ve considered for collapsing.
Definition: collapser.h:112
The Collapser class tracks collapse keys and the documents they match.
Definition: collapser.h:87
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian::doccount entries() const
Definition: collapser.h:155
Class tracking information for a given value of the collapse key.
Definition: collapser.h:41
Xapian::Internal::MSetItem old_item
Replaced item when REPLACED is returned by collapse().
Definition: collapser.h:122
Xapian::doccount get_docs_considered() const
Definition: collapser.h:151