xapian-core  1.4.19
omenquireinternal.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2001,2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7  * Copyright 2009 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
28 
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
34 
35 #include <algorithm>
36 #include <cmath>
37 #include <map>
38 #include <set>
39 #include <unordered_map>
40 
41 #include "weight/weightinternal.h"
42 
43 using namespace std;
44 
45 class OmExpand;
46 class MultiMatch;
47 
48 namespace Xapian {
49 
50 class TermIterator;
51 
52 namespace Internal {
53 
58 class MSetItem {
59  public:
60  MSetItem(double wt_, Xapian::docid did_)
61  : wt(wt_), did(did_), collapse_count(0) {}
62 
63  MSetItem(double wt_, Xapian::docid did_, const string &key_)
64  : wt(wt_), did(did_), collapse_key(key_), collapse_count(0) {}
65 
66  MSetItem(double wt_, Xapian::docid did_, const string &key_,
67  Xapian::doccount collapse_count_)
68  : wt(wt_), did(did_), collapse_key(key_),
69  collapse_count(collapse_count_) {}
70 
71  void swap(MSetItem & o) {
72  std::swap(wt, o.wt);
73  std::swap(did, o.did);
74  std::swap(collapse_key, o.collapse_key);
75  std::swap(collapse_count, o.collapse_count);
76  std::swap(sort_key, o.sort_key);
77  }
78 
80  double wt;
81 
84 
96  string collapse_key;
97 
105 
107  string sort_key;
108 
110  string get_description() const;
111 };
112 
113 }
114 
120  friend class MSet::Internal;
121  private:
124 
127 
130 
132  Internal(const Internal &);
134  void operator=(const Internal &);
135 
136  public:
137  typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;
138 
140 
142 
144 
146 
148 
150  sort_setting sort_by;
152 
154 
155  double time_limit;
156 
162  mutable Weight * weight;
163 
165  std::string eweightname;
166 
168  double expand_k;
169 
170  vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;
171 
172  explicit Internal(const Xapian::Database &databases);
173  ~Internal();
174 
177  void request_doc(const Xapian::Internal::MSetItem &item) const;
178 
181  Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const;
182 
183  Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const;
184 
185  void set_query(const Query & query_, termcount qlen_);
186  const Query & get_query() const;
187  MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
188  Xapian::doccount check_at_least,
189  const RSet *omrset,
190  const MatchDecider *mdecider) const;
191 
192  ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
193  const ExpandDecider *edecider, double min_wt) const;
194 
195  TermIterator get_matching_terms(Xapian::docid did) const;
196  TermIterator get_matching_terms(const Xapian::MSetIterator &it) const;
197 
198  Xapian::doccount get_termfreq(const string &tname) const;
199 
200  string get_description() const;
201 };
202 
204  public:
207 
208  private:
212  mutable set<Xapian::doccount> requested_docs;
213 
215  mutable map<Xapian::doccount, Xapian::Document> indexeddocs;
216 
218  void read_docs() const;
219 
221  Internal(const Internal &);
223  void operator=(const Internal &);
224 
225  mutable std::unordered_map<std::string, double> snippet_bg_relevance;
226 
227  public:
230 
233 
235  vector<Xapian::Internal::MSetItem> items;
236 
239 
241 
243 
245 
247 
249 
251 
252  double max_possible;
253 
254  double max_attained;
255 
257  : percent_factor(0),
258  stats(NULL),
259  firstitem(0),
260  matches_lower_bound(0),
261  matches_estimated(0),
262  matches_upper_bound(0),
263  uncollapsed_lower_bound(0),
264  uncollapsed_estimated(0),
265  uncollapsed_upper_bound(0),
266  max_possible(0),
267  max_attained(0) {}
268 
271  Xapian::doccount matches_upper_bound_,
272  Xapian::doccount matches_lower_bound_,
273  Xapian::doccount matches_estimated_,
274  Xapian::doccount uncollapsed_upper_bound_,
275  Xapian::doccount uncollapsed_lower_bound_,
276  Xapian::doccount uncollapsed_estimated_,
277  double max_possible_,
278  double max_attained_,
279  vector<Xapian::Internal::MSetItem> &items_,
280  double percent_factor_)
281  : percent_factor(percent_factor_),
282  stats(NULL),
283  firstitem(firstitem_),
284  matches_lower_bound(matches_lower_bound_),
285  matches_estimated(matches_estimated_),
286  matches_upper_bound(matches_upper_bound_),
287  uncollapsed_lower_bound(uncollapsed_lower_bound_),
288  uncollapsed_estimated(uncollapsed_estimated_),
289  uncollapsed_upper_bound(uncollapsed_upper_bound_),
290  max_possible(max_possible_),
291  max_attained(max_attained_) {
292  std::swap(items, items_);
293  }
294 
295  ~Internal() { delete stats; }
296 
298  Xapian::Document get_doc_by_index(Xapian::doccount index) const;
299 
301  int convert_to_percent_internal(double wt) const;
302 
303  std::string snippet(const std::string & text, size_t length,
304  const Xapian::Stem & stemmer,
305  unsigned flags,
306  const std::string & hi_start,
307  const std::string & hi_end,
308  const std::string & omit) const;
309 
311  string get_description() const;
312 
315  void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
316 };
317 
319  friend class Xapian::RSet;
320 
321  private:
323  set<Xapian::docid> items;
324 
325  public:
326  const set<Xapian::docid> & get_items() const { return items; }
327 
329  string get_description() const;
330 };
331 
332 }
333 
334 #endif // OM_HGUARD_OMENQUIREINTERNAL_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
std::string eweightname
The weighting scheme to use for query expansion.
const Xapian::Database db
The database which this enquire object uses.
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:123
Xapian::doccount firstitem
Rank of first item in MSet.
MSetItem(double wt_, Xapian::docid did_, const string &key_, Xapian::doccount collapse_count_)
This class is used to access a database, or a group of databases.
Definition: database.h:68
std::unordered_map< std::string, double > snippet_bg_relevance
Class representing a stemming algorithm.
Definition: stem.h:62
Xapian::doccount collapse_count
Count of collapses done on collapse_key so far.
Xapian::docid did
Document id.
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
Xapian::doccount matches_lower_bound
static Xapian::Stem stemmer
Definition: stemtest.cc:41
Xapian::Query API class.
string sort_key
Used when sorting by value.
Build key strings for MSet ordering or collapsing.
Xapian::Enquire::docid_order order
Xapian::Internal::intrusive_ptr< const Enquire::Internal > enquire
Xapian::Enquire reference, for getting documents.
Xapian::doccount uncollapsed_upper_bound
API for running queries.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Internal(Xapian::doccount firstitem_, Xapian::doccount matches_upper_bound_, Xapian::doccount matches_lower_bound_, Xapian::doccount matches_estimated_, Xapian::doccount uncollapsed_upper_bound_, Xapian::doccount uncollapsed_lower_bound_, Xapian::doccount uncollapsed_estimated_, double max_possible_, double max_attained_, vector< Xapian::Internal::MSetItem > &items_, double percent_factor_)
Note: destroys parameter items.
Iterator over a Xapian::MSet.
Definition: mset.h:351
Weight * weight
The weight to use for this query.
Xapian::Weight::Internal class, holding database and term statistics.
Query query
The user&#39;s query.
double wt
Weight calculated.
Class to hold statistics for a given collection.
string collapse_key
Value which was used to collapse upon.
map< Xapian::doccount, Xapian::Document > indexeddocs
Cache of documents, indexed by MSet index.
An item resulting from a query.
const Xapian::Enquire::Internal::sort_setting VAL
Definition: multimatch.cc:158
set< Xapian::doccount > requested_docs
The set of documents which have been requested but not yet collected.
API for working with Xapian databases.
termcount qlen
The query length.
Xapian::Internal::opt_intrusive_ptr< KeyMaker > sorter
Xapian::doccount matches_upper_bound
Base class for objects managed by intrusive_ptr.
Definition: intrusive_ptr.h:49
Xapian::Weight::Internal * stats
Provides the term frequency and weight for each term in the query.
const Xapian::Enquire::Internal::sort_setting REL_VAL
Definition: multimatch.cc:156
vector< Xapian::Internal::MSetItem > items
A list of items comprising the (selected part of the) MSet.
MSetItem(double wt_, Xapian::docid did_, const string &key_)
Base class for matcher decision functor.
Definition: enquire.h:118
set< Xapian::docid > items
Items in the relevance set.
const set< Xapian::docid > & get_items() const
MSetItem(double wt_, Xapian::docid did_)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::doccount matches_estimated
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
const Xapian::Enquire::Internal::sort_setting REL
Definition: multimatch.cc:154
Definition: quest.cc:110
Class representing a list of search results.
Definition: eset.h:43
double expand_k
The parameter required for TradWeight query expansion.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
vector< Xapian::Internal::opt_intrusive_ptr< MatchSpy > > spies
API for working with documents.
A smart pointer that optionally uses intrusive reference counting.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
double percent_factor
Factor to multiply weights by to convert them to percentages.
Xapian::doccount uncollapsed_lower_bound
Internals of enquire system.
docid_order
Ordering of docids.
Definition: enquire.h:322
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::doccount uncollapsed_estimated
A relevance set (R-Set).
Definition: enquire.h:60
Abstract base class for weighting schemes.
Definition: weight.h:35