xapian-core  1.4.30
collapser.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2011 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_COLLAPSER_H
22 #define XAPIAN_INCLUDED_COLLAPSER_H
23 
24 #include "backends/document.h"
25 #include "msetcmp.h"
26 #include "api/omenquireinternal.h"
27 #include "api/postlist.h"
28 
29 #include <map>
30 #include <vector>
31 
33 typedef enum {
37  REPLACED
39 
41 class CollapseData {
50  std::vector<Xapian::Internal::MSetItem> items;
51 
54 
57 
58  public:
61  : items(1, item), next_best_weight(0), collapse_count(0) {
62  items[0].collapse_key = std::string();
63  }
64 
75  Xapian::doccount collapse_max,
76  MSetCmp mcmp,
77  Xapian::Internal::MSetItem & old_item);
78 
80  double get_next_best_weight() const { return next_best_weight; }
81 
84 };
85 
87 class Collapser {
89  std::map<std::string, CollapseData> table;
90 
93 
99 
106 
113 
116 
119 
120  public:
123 
126  docs_considered(0), slot(slot_), collapse_max(collapse_max_),
127  old_item(0, 0) { }
128 
130  operator bool() const { return collapse_max != 0; }
131 
143  PostList * postlist,
145  MSetCmp mcmp);
146 
147  Xapian::doccount get_collapse_count(const std::string & collapse_key,
148  int percent_cutoff,
149  double min_weight) const;
150 
152 
154 
156 
158 
159  bool empty() const { return table.empty(); }
160 };
161 
162 #endif // XAPIAN_INCLUDED_COLLAPSER_H
class with document data
Class tracking information for a given value of the collapse key.
Definition: collapser.h:41
Xapian::doccount collapse_count
The number of documents we've rejected.
Definition: collapser.h:56
Xapian::doccount get_collapse_count() const
The number of documents we've rejected.
Definition: collapser.h:83
double next_best_weight
The highest weight of a document we've rejected.
Definition: collapser.h:53
double get_next_best_weight() const
The highest weight of a document we've rejected.
Definition: collapser.h:80
std::vector< Xapian::Internal::MSetItem > items
Currently kept MSet entries for this value of the collapse key.
Definition: collapser.h:50
collapse_result add_item(const Xapian::Internal::MSetItem &item, Xapian::doccount collapse_max, MSetCmp mcmp, Xapian::Internal::MSetItem &old_item)
Handle a new MSetItem with this collapse key value.
Definition: collapser.cc:32
CollapseData(const Xapian::Internal::MSetItem &item)
Construct with the given MSetItem item.
Definition: collapser.h:60
The Collapser class tracks collapse keys and the documents they match.
Definition: collapser.h:87
Xapian::doccount get_docs_considered() const
Definition: collapser.h:151
Xapian::doccount no_collapse_key
How many documents have we seen without a collapse key?
Definition: collapser.h:98
Collapser(Xapian::valueno slot_, Xapian::doccount collapse_max_)
Definition: collapser.h:124
Xapian::doccount dups_ignored
How many documents with duplicate collapse keys we have ignored.
Definition: collapser.h:105
Xapian::Internal::MSetItem old_item
Replaced item when REPLACED is returned by collapse().
Definition: collapser.h:122
Xapian::doccount get_dups_ignored() const
Definition: collapser.h:153
Xapian::valueno slot
The value slot we're getting collapse keys from.
Definition: collapser.h:115
Xapian::doccount get_collapse_count(const std::string &collapse_key, int percent_cutoff, double min_weight) const
Definition: collapser.cc:111
collapse_result process(Xapian::Internal::MSetItem &item, PostList *postlist, Xapian::Document::Internal &vsdoc, MSetCmp mcmp)
Handle a new MSetItem.
Definition: collapser.cc:70
std::map< std::string, CollapseData > table
Map from collapse key values to the items we're keeping for them.
Definition: collapser.h:89
Xapian::doccount collapse_max
The maximum number of items to keep for each collapse key value.
Definition: collapser.h:118
Xapian::doccount entries() const
Definition: collapser.h:155
bool empty() const
Definition: collapser.h:159
Xapian::doccount get_matches_lower_bound() const
Definition: collapser.cc:137
Xapian::doccount docs_considered
How many documents we've considered for collapsing.
Definition: collapser.h:112
Xapian::doccount entry_count
How many items we're currently keeping in table.
Definition: collapser.h:92
A document in the database, possibly plus modifications.
Definition: document.h:43
An item resulting from a query.
Abstract base class for postlists.
Definition: postlist.h:37
collapse_result
Enumeration reporting how a document was handled by the Collapser.
Definition: collapser.h:33
@ EMPTY
Definition: collapser.h:34
@ REPLACED
Definition: collapser.h:37
@ REJECTED
Definition: collapser.h:36
@ ADDED
Definition: collapser.h:35
MSetItem comparison functions.
bool(* MSetCmp)(const Xapian::Internal::MSetItem &, const Xapian::Internal::MSetItem &)
Definition: msetcmp.h:28
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Abstract base class for postlists.