xapian-core  1.4.30
omenquireinternal.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2001,2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7  * Copyright 2009 Lemur Consulting Ltd
8  * Copyright 2011 Action Without Borders
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as
12  * published by the Free Software Foundation; either version 2 of the
13  * License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23  * USA
24  */
25 
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
28 
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
34 
35 #include <algorithm>
36 #include <cmath>
37 #include <map>
38 #include <set>
39 #include <unordered_map>
40 
41 #include "weight/weightinternal.h"
42 
43 using std::map;
44 using std::set;
45 using std::string;
46 using std::vector;
47 
48 class OmExpand;
49 class MultiMatch;
50 
51 namespace Xapian {
52 
53 class TermIterator;
54 
55 namespace Internal {
56 
61 class MSetItem {
62  public:
63  MSetItem(double wt_, Xapian::docid did_)
64  : wt(wt_), did(did_), collapse_count(0) {}
65 
66  MSetItem(double wt_, Xapian::docid did_, const string &key_)
67  : wt(wt_), did(did_), collapse_count(0), collapse_key(key_) {}
68 
69  MSetItem(double wt_, Xapian::docid did_, const string &key_,
70  Xapian::doccount collapse_count_)
71  : wt(wt_), did(did_),
72  collapse_count(collapse_count_), collapse_key(key_) {}
73 
74  void swap(MSetItem & o) {
75  std::swap(wt, o.wt);
76  std::swap(did, o.did);
77  std::swap(collapse_count, o.collapse_count);
78  std::swap(collapse_key, o.collapse_key);
79  std::swap(sort_key, o.sort_key);
80  }
81 
83  double wt;
84 
87 
95 
107  string collapse_key;
108 
110  string sort_key;
111 
113  string get_description() const;
114 };
115 
116 }
117 
123  friend class MSet::Internal;
124  private:
127 
130 
133 
135  Internal(const Internal &);
137  void operator=(const Internal &);
138 
139  public:
140  typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;
141 
143 
145 
147 
149 
151 
155 
157 
158  double time_limit;
159 
165  mutable Weight * weight;
166 
168  std::string eweightname;
169 
171  double expand_k;
172 
173  vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;
174 
175  explicit Internal(const Xapian::Database &databases);
176  ~Internal();
177 
180  void request_doc(const Xapian::Internal::MSetItem &item) const;
181 
185 
187 
188  void set_query(const Query & query_, termcount qlen_);
189  const Query & get_query() const;
191  Xapian::doccount check_at_least,
192  const RSet *omrset,
193  const MatchDecider *mdecider) const;
194 
195  ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
196  const ExpandDecider *edecider, double min_wt) const;
197 
200 
201  Xapian::doccount get_termfreq(const string &tname) const;
202 
203  string get_description() const;
204 };
205 
207  public:
210 
211  private:
215  mutable set<Xapian::doccount> requested_docs;
216 
218  mutable map<Xapian::doccount, Xapian::Document> indexeddocs;
219 
221  void read_docs() const;
222 
224  Internal(const Internal &);
226  void operator=(const Internal &);
227 
228  mutable std::unordered_map<std::string, double> snippet_bg_relevance;
229 
230  public:
233 
236 
238  vector<Xapian::Internal::MSetItem> items;
239 
242 
244 
246 
248 
250 
252 
254 
255  double max_possible;
256 
257  double max_attained;
258 
260  : percent_factor(0),
261  stats(NULL),
262  firstitem(0),
269  max_possible(0),
270  max_attained(0) {}
271 
274  Xapian::doccount matches_upper_bound_,
275  Xapian::doccount matches_lower_bound_,
276  Xapian::doccount matches_estimated_,
277  Xapian::doccount uncollapsed_upper_bound_,
278  Xapian::doccount uncollapsed_lower_bound_,
279  Xapian::doccount uncollapsed_estimated_,
280  double max_possible_,
281  double max_attained_,
282  vector<Xapian::Internal::MSetItem> &items_,
283  double percent_factor_)
284  : percent_factor(percent_factor_),
285  stats(NULL),
286  firstitem(firstitem_),
287  matches_lower_bound(matches_lower_bound_),
288  matches_estimated(matches_estimated_),
289  matches_upper_bound(matches_upper_bound_),
290  uncollapsed_lower_bound(uncollapsed_lower_bound_),
291  uncollapsed_estimated(uncollapsed_estimated_),
292  uncollapsed_upper_bound(uncollapsed_upper_bound_),
293  max_possible(max_possible_),
294  max_attained(max_attained_) {
295  std::swap(items, items_);
296  }
297 
298  ~Internal() { delete stats; }
299 
302 
304  int convert_to_percent_internal(double wt) const;
305 
306  std::string snippet(const std::string & text, size_t length,
307  const Xapian::Stem & stemmer,
308  unsigned flags,
309  const std::string & hi_start,
310  const std::string & hi_end,
311  const std::string & omit) const;
312 
314  string get_description() const;
315 
318  void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
319 };
320 
322  friend class Xapian::RSet;
323 
324  private:
326  set<Xapian::docid> items;
327 
328  public:
329  const set<Xapian::docid> & get_items() const { return items; }
330 
332  string get_description() const;
333 };
334 
335 }
336 
337 #endif // OM_HGUARD_OMENQUIREINTERNAL_H
This class is used to access a database, or a group of databases.
Definition: database.h:68
A handle representing a document in a Xapian database.
Definition: document.h:61
Class representing a list of search results.
Definition: eset.h:43
Internals of enquire system.
void operator=(const Internal &)
Assignment not allowed.
void set_query(const Query &query_, termcount qlen_)
Definition: omenquire.cc:526
vector< Xapian::Internal::opt_intrusive_ptr< MatchSpy > > spies
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, const RSet *omrset, const MatchDecider *mdecider) const
Definition: omenquire.cc:539
const Xapian::Database db
The database which this enquire object uses.
Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const
Read a previously requested document from the database.
Definition: omenquire.cc:732
const Query & get_query() const
Definition: omenquire.cc:533
TermIterator get_matching_terms(Xapian::docid did) const
Definition: omenquire.cc:659
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags, const ExpandDecider *edecider, double min_wt) const
Definition: omenquire.cc:594
string get_description() const
Definition: omenquire.cc:710
Internal(const Internal &)
Copy not allowed.
void request_doc(const Xapian::Internal::MSetItem &item) const
Request a document from the database.
Definition: omenquire.cc:721
termcount qlen
The query length.
double expand_k
The parameter required for TradWeight query expansion.
Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const
Definition: omenquire.cc:745
std::string eweightname
The weighting scheme to use for query expansion.
Xapian::doccount get_termfreq(const string &tname) const
Definition: omenquire.cc:704
Xapian::Internal::opt_intrusive_ptr< KeyMaker > sorter
Xapian::Enquire::docid_order order
Query query
The user's query.
Weight * weight
The weight to use for this query.
docid_order
Ordering of docids.
Definition: enquire.h:326
Virtual base class for expand decider functor.
Definition: expanddecider.h:38
An item resulting from a query.
Xapian::doccount collapse_count
Count of collapses done on collapse_key so far.
MSetItem(double wt_, Xapian::docid did_, const string &key_)
string get_description() const
Return a string describing this object.
Definition: omenquire.cc:149
double wt
Weight calculated.
MSetItem(double wt_, Xapian::docid did_, const string &key_, Xapian::doccount collapse_count_)
string collapse_key
Value which was used to collapse upon.
Xapian::docid did
Document id.
string sort_key
Used when sorting by value.
MSetItem(double wt_, Xapian::docid did_)
Base class for objects managed by intrusive_ptr.
Definition: intrusive_ptr.h:49
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:82
A smart pointer that optionally uses intrusive reference counting.
Iterator over a Xapian::MSet.
Definition: mset.h:368
void operator=(const Internal &)
Assignment not allowed.
Xapian::doccount uncollapsed_upper_bound
int convert_to_percent_internal(double wt) const
Converts a weight to a percentage weight.
Definition: omenquire.cc:332
Xapian::Internal::intrusive_ptr< const Enquire::Internal > enquire
Xapian::Enquire reference, for getting documents.
std::unordered_map< std::string, double > snippet_bg_relevance
double percent_factor
Factor to multiply weights by to convert them to percentages.
string get_description() const
Return a string describing this object.
Definition: omenquire.cc:403
map< Xapian::doccount, Xapian::Document > indexeddocs
Cache of documents, indexed by MSet index.
void read_docs() const
Read and cache the documents so far requested.
Definition: omenquire.cc:426
Xapian::doccount uncollapsed_lower_bound
Xapian::doccount matches_estimated
Xapian::Document get_doc_by_index(Xapian::doccount index) const
get a document by index in MSet, via the cache.
Definition: omenquire.cc:350
Xapian::doccount firstitem
Rank of first item in MSet.
void fetch_items(Xapian::doccount first, Xapian::doccount last) const
Fetch items specified into the document cache.
Definition: omenquire.cc:377
Xapian::doccount matches_lower_bound
vector< Xapian::Internal::MSetItem > items
A list of items comprising the (selected part of the) MSet.
Internal(const Internal &)
Copy not allowed.
set< Xapian::doccount > requested_docs
The set of documents which have been requested but not yet collected.
std::string snippet(const std::string &text, size_t length, const Xapian::Stem &stemmer, unsigned flags, const std::string &hi_start, const std::string &hi_end, const std::string &omit) const
Internal(Xapian::doccount firstitem_, Xapian::doccount matches_upper_bound_, Xapian::doccount matches_lower_bound_, Xapian::doccount matches_estimated_, Xapian::doccount uncollapsed_upper_bound_, Xapian::doccount uncollapsed_lower_bound_, Xapian::doccount uncollapsed_estimated_, double max_possible_, double max_attained_, vector< Xapian::Internal::MSetItem > &items_, double percent_factor_)
Note: destroys parameter items.
Xapian::doccount matches_upper_bound
Xapian::Weight::Internal * stats
Provides the term frequency and weight for each term in the query.
Xapian::doccount uncollapsed_estimated
Class representing a list of search results.
Definition: mset.h:44
Base class for matcher decision functor.
Definition: enquire.h:118
Class representing a query.
Definition: query.h:46
string get_description() const
Return a string describing this object.
Definition: omenquire.cc:129
set< Xapian::docid > items
Items in the relevance set.
const set< Xapian::docid > & get_items() const
A relevance set (R-Set).
Definition: enquire.h:60
Class representing a stemming algorithm.
Definition: stem.h:62
Class for iterating over a list of terms.
Definition: termiterator.h:41
Class to hold statistics for a given collection.
Abstract base class for weighting schemes.
Definition: weight.h:35
API for running queries.
API for working with Xapian databases.
API for working with documents.
Build key strings for MSet ordering or collapsing.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::Query API class.
static Xapian::Stem stemmer
Definition: stemtest.cc:42
Xapian::Weight::Internal class, holding database and term statistics.