xapian-core  1.4.26
omenquireinternal.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2001,2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7  * Copyright 2009 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
28 
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
34 
35 #include <algorithm>
36 #include <cmath>
37 #include <map>
38 #include <set>
39 #include <unordered_map>
40 
41 #include "weight/weightinternal.h"
42 
43 using std::map;
44 using std::set;
45 using std::string;
46 using std::vector;
47 
48 class OmExpand;
49 class MultiMatch;
50 
51 namespace Xapian {
52 
53 class TermIterator;
54 
55 namespace Internal {
56 
61 class MSetItem {
62  public:
63  MSetItem(double wt_, Xapian::docid did_)
64  : wt(wt_), did(did_), collapse_count(0) {}
65 
66  MSetItem(double wt_, Xapian::docid did_, const string &key_)
67  : wt(wt_), did(did_), collapse_count(0), collapse_key(key_) {}
68 
69  MSetItem(double wt_, Xapian::docid did_, const string &key_,
70  Xapian::doccount collapse_count_)
71  : wt(wt_), did(did_),
72  collapse_count(collapse_count_), collapse_key(key_) {}
73 
74  void swap(MSetItem & o) {
75  std::swap(wt, o.wt);
76  std::swap(did, o.did);
77  std::swap(collapse_count, o.collapse_count);
78  std::swap(collapse_key, o.collapse_key);
79  std::swap(sort_key, o.sort_key);
80  }
81 
83  double wt;
84 
87 
95 
107  string collapse_key;
108 
110  string sort_key;
111 
113  string get_description() const;
114 };
115 
116 }
117 
123  friend class MSet::Internal;
124  private:
127 
130 
133 
135  Internal(const Internal &);
137  void operator=(const Internal &);
138 
139  public:
140  typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;
141 
143 
145 
147 
149 
151 
153  sort_setting sort_by;
155 
157 
158  double time_limit;
159 
165  mutable Weight * weight;
166 
168  std::string eweightname;
169 
171  double expand_k;
172 
173  vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;
174 
175  explicit Internal(const Xapian::Database &databases);
176  ~Internal();
177 
180  void request_doc(const Xapian::Internal::MSetItem &item) const;
181 
184  Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const;
185 
186  Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const;
187 
188  void set_query(const Query & query_, termcount qlen_);
189  const Query & get_query() const;
190  MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
191  Xapian::doccount check_at_least,
192  const RSet *omrset,
193  const MatchDecider *mdecider) const;
194 
195  ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
196  const ExpandDecider *edecider, double min_wt) const;
197 
198  TermIterator get_matching_terms(Xapian::docid did) const;
199  TermIterator get_matching_terms(const Xapian::MSetIterator &it) const;
200 
201  Xapian::doccount get_termfreq(const string &tname) const;
202 
203  string get_description() const;
204 };
205 
207  public:
210 
211  private:
215  mutable set<Xapian::doccount> requested_docs;
216 
218  mutable map<Xapian::doccount, Xapian::Document> indexeddocs;
219 
221  void read_docs() const;
222 
224  Internal(const Internal &);
226  void operator=(const Internal &);
227 
228  mutable std::unordered_map<std::string, double> snippet_bg_relevance;
229 
230  public:
233 
236 
238  vector<Xapian::Internal::MSetItem> items;
239 
242 
244 
246 
248 
250 
252 
254 
255  double max_possible;
256 
257  double max_attained;
258 
260  : percent_factor(0),
261  stats(NULL),
262  firstitem(0),
263  matches_lower_bound(0),
264  matches_estimated(0),
265  matches_upper_bound(0),
266  uncollapsed_lower_bound(0),
267  uncollapsed_estimated(0),
268  uncollapsed_upper_bound(0),
269  max_possible(0),
270  max_attained(0) {}
271 
274  Xapian::doccount matches_upper_bound_,
275  Xapian::doccount matches_lower_bound_,
276  Xapian::doccount matches_estimated_,
277  Xapian::doccount uncollapsed_upper_bound_,
278  Xapian::doccount uncollapsed_lower_bound_,
279  Xapian::doccount uncollapsed_estimated_,
280  double max_possible_,
281  double max_attained_,
282  vector<Xapian::Internal::MSetItem> &items_,
283  double percent_factor_)
284  : percent_factor(percent_factor_),
285  stats(NULL),
286  firstitem(firstitem_),
287  matches_lower_bound(matches_lower_bound_),
288  matches_estimated(matches_estimated_),
289  matches_upper_bound(matches_upper_bound_),
290  uncollapsed_lower_bound(uncollapsed_lower_bound_),
291  uncollapsed_estimated(uncollapsed_estimated_),
292  uncollapsed_upper_bound(uncollapsed_upper_bound_),
293  max_possible(max_possible_),
294  max_attained(max_attained_) {
295  std::swap(items, items_);
296  }
297 
298  ~Internal() { delete stats; }
299 
301  Xapian::Document get_doc_by_index(Xapian::doccount index) const;
302 
304  int convert_to_percent_internal(double wt) const;
305 
306  std::string snippet(const std::string & text, size_t length,
307  const Xapian::Stem & stemmer,
308  unsigned flags,
309  const std::string & hi_start,
310  const std::string & hi_end,
311  const std::string & omit) const;
312 
314  string get_description() const;
315 
318  void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
319 };
320 
322  friend class Xapian::RSet;
323 
324  private:
326  set<Xapian::docid> items;
327 
328  public:
329  const set<Xapian::docid> & get_items() const { return items; }
330 
332  string get_description() const;
333 };
334 
335 }
336 
337 #endif // OM_HGUARD_OMENQUIREINTERNAL_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
std::string eweightname
The weighting scheme to use for query expansion.
string get_description() const
Return a string describing this object.
Definition: omenquire.cc:149
const Xapian::Database db
The database which this enquire object uses.
Xapian::doccount firstitem
Rank of first item in MSet.
MSetItem(double wt_, Xapian::docid did_, const string &key_, Xapian::doccount collapse_count_)
This class is used to access a database, or a group of databases.
Definition: database.h:68
std::unordered_map< std::string, double > snippet_bg_relevance
Class representing a stemming algorithm.
Definition: stem.h:62
Xapian::doccount collapse_count
Count of collapses done on collapse_key so far.
Xapian::docid did
Document id.
Class representing a list of search results.
Definition: mset.h:44
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
Xapian::doccount matches_lower_bound
static Xapian::Stem stemmer
Definition: stemtest.cc:41
Xapian::Query API class.
string sort_key
Used when sorting by value.
Build key strings for MSet ordering or collapsing.
Xapian::Enquire::docid_order order
Xapian::Internal::intrusive_ptr< const Enquire::Internal > enquire
Xapian::Enquire reference, for getting documents.
Xapian::doccount uncollapsed_upper_bound
API for running queries.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Internal(Xapian::doccount firstitem_, Xapian::doccount matches_upper_bound_, Xapian::doccount matches_lower_bound_, Xapian::doccount matches_estimated_, Xapian::doccount uncollapsed_upper_bound_, Xapian::doccount uncollapsed_lower_bound_, Xapian::doccount uncollapsed_estimated_, double max_possible_, double max_attained_, vector< Xapian::Internal::MSetItem > &items_, double percent_factor_)
Note: destroys parameter items.
Iterator over a Xapian::MSet.
Definition: mset.h:368
Weight * weight
The weight to use for this query.
Xapian::Weight::Internal class, holding database and term statistics.
Query query
The user&#39;s query.
double wt
Weight calculated.
Class to hold statistics for a given collection.
string collapse_key
Value which was used to collapse upon.
map< Xapian::doccount, Xapian::Document > indexeddocs
Cache of documents, indexed by MSet index.
An item resulting from a query.
const Xapian::Enquire::Internal::sort_setting VAL
Definition: multimatch.cc:158
set< Xapian::doccount > requested_docs
The set of documents which have been requested but not yet collected.
API for working with Xapian databases.
termcount qlen
The query length.
Xapian::Internal::opt_intrusive_ptr< KeyMaker > sorter
Xapian::doccount matches_upper_bound
Base class for objects managed by intrusive_ptr.
Definition: intrusive_ptr.h:49
Xapian::Weight::Internal * stats
Provides the term frequency and weight for each term in the query.
const Xapian::Enquire::Internal::sort_setting REL_VAL
Definition: multimatch.cc:156
vector< Xapian::Internal::MSetItem > items
A list of items comprising the (selected part of the) MSet.
MSetItem(double wt_, Xapian::docid did_, const string &key_)
Base class for matcher decision functor.
Definition: enquire.h:118
set< Xapian::docid > items
Items in the relevance set.
const set< Xapian::docid > & get_items() const
MSetItem(double wt_, Xapian::docid did_)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::doccount matches_estimated
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
const Xapian::Enquire::Internal::sort_setting REL
Definition: multimatch.cc:154
Class representing a list of search results.
Definition: eset.h:43
double expand_k
The parameter required for TradWeight query expansion.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
vector< Xapian::Internal::opt_intrusive_ptr< MatchSpy > > spies
API for working with documents.
A smart pointer that optionally uses intrusive reference counting.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
double percent_factor
Factor to multiply weights by to convert them to percentages.
Xapian::doccount uncollapsed_lower_bound
Internals of enquire system.
docid_order
Ordering of docids.
Definition: enquire.h:326
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::doccount uncollapsed_estimated
A relevance set (R-Set).
Definition: enquire.h:60
Abstract base class for weighting schemes.
Definition: weight.h:35