xapian-core  1.4.26
synonympostlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2014,2016,2018 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "synonympostlist.h"
26 
27 #include "branchpostlist.h"
28 #include "debuglog.h"
29 #include "omassert.h"
30 
32 {
33  delete wt;
34  delete subtree;
35 }
36 
37 void
39 {
40  delete wt;
41  wt = wt_;
45 }
46 
47 PostList *
48 SynonymPostList::next(double w_min)
49 {
50  LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min);
51  (void)w_min;
53  RETURN(NULL);
54 }
55 
56 PostList *
58 {
59  LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did | w_min);
60  (void)w_min;
62  RETURN(NULL);
63 }
64 
65 double
67 {
68  LOGCALL(MATCH, double, "SynonymPostList::get_weight", NO_ARGS);
69  // The wdf returned can be higher than the doclength. In particular, this
70  // can currently occur if the query contains a term more than once; the wdf
71  // of each occurrence is added up.
72  //
73  // However, it's reasonable for weighting algorithms to optimise by
74  // assuming that get_wdf() will never return more than get_doclength(),
75  // since the doclength is the sum of the wdfs.
76  //
77  // Therefore, we simply clamp the wdf value to the doclength, to ensure
78  // that this is true. Note that this requires the doclength to be
79  // calculated even if the weight object doesn't want it.
80 
81  Xapian::termcount unique_terms = 0;
83  unique_terms = get_unique_terms();
84  if (want_wdf) {
85  Xapian::termcount wdf = get_wdf();
86  Xapian::termcount doclen = 0;
87  if (want_doclength || (!wdf_disjoint && wdf > doclen_lower_bound)) {
88  doclen = get_doclength();
89  if (wdf > doclen) wdf = doclen;
90  }
91  double sumpart = wt->get_sumpart(wdf, doclen, unique_terms);
92  AssertRel(sumpart, <=, wt->get_maxpart());
93  RETURN(sumpart);
94  }
96  RETURN(wt->get_sumpart(0, doclen, unique_terms));
97 }
98 
99 double
101 {
102  LOGCALL(MATCH, double, "SynonymPostList::get_maxweight", NO_ARGS);
103  RETURN(wt->get_maxpart());
104 }
105 
106 double
108 {
109  LOGCALL(MATCH, double, "SynonymPostList::recalc_maxweight", NO_ARGS);
111 }
112 
115  LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_wdf", NO_ARGS);
116  RETURN(subtree->get_wdf());
117 }
118 
121  LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", NO_ARGS);
123 }
124 
127  LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_est", NO_ARGS);
129 }
130 
133  LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_max", NO_ARGS);
135 }
136 
139  LOGCALL(MATCH, Xapian::docid, "SynonymPostList::get_docid", NO_ARGS);
141 }
142 
145  LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_doclength", NO_ARGS);
147 }
148 
151  LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_unique_terms", NO_ARGS);
153 }
154 
155 bool
157  LOGCALL(MATCH, bool, "SynonymPostList::at_end", NO_ARGS);
158  RETURN(subtree->at_end());
159 }
160 
163 {
164  return 1;
165 }
166 
167 std::string
169 {
170  return "(Synonym " + subtree->get_description() + ")";
171 }
#define RETURN(A)
Definition: debuglog.h:493
Abstract base class for postlists.
Definition: postlist.h:37
std::string get_description() const
Return a string description of this object.
bool want_doclength
Flag indicating whether the weighting object needs the doclength.
virtual Xapian::docid get_docid() const =0
Return the current docid.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
bool want_unique_terms
Flag indicating whether the weighting object needs the number of unique terms.
Xapian::doccount get_termfreq_max() const
Get an upper bound on the number of documents indexed by this term.
bool get_sumpart_needs_wdf_() const
Definition: weight.h:342
bool at_end() const
Return true if the current position is past the last entry in this list.
bool get_sumpart_needs_uniqueterms_() const
Definition: weight.h:352
virtual Xapian::termcount get_unique_terms() const =0
Return the number of unique terms in the current document.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Xapian::docid get_docid() const
Return the current docid.
bool get_sumpart_needs_doclength_() const
Definition: weight.h:333
virtual double get_maxpart() const =0
Return an upper bound on what get_sumpart() can return for any document.
bool skip_to_handling_prune(PostList *&pl, Xapian::docid did, double w_min, MultiMatch *matcher)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool wdf_disjoint
Are the subquery&#39;s wdf contributions disjoint?
Xapian::termcount get_doclength() const
Return the length of current document.
const Xapian::Weight * wt
Weighting object used for calculating the synonym weights.
virtual Xapian::doccount get_termfreq_max() const =0
Get an upper bound on the number of documents indexed by this term.
virtual Xapian::doccount get_termfreq_est() const =0
Get an estimate of the number of documents indexed by this term.
virtual base class for branched types of postlist
PostList * subtree
The subtree, which starts as an OR of all the sub-postlists being joined with Synonym, but may decay into something else.
virtual Xapian::doccount get_termfreq_min() const =0
Get a lower bound on the number of documents indexed by this term.
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
Combine subqueries, weighting as if they are synonyms.
void set_weight(const Xapian::Weight *wt_)
Set the weight object to be used for the synonym postlist.
bool next_handling_prune(PostList *&pl, double w_min, MultiMatch *matcher)
virtual Xapian::termcount get_doclength() const =0
Return the length of current document.
virtual std::string get_description() const =0
Return a string description of this object.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
virtual double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqterms) const =0
Calculate the weight contribution for this object&#39;s term to a document.
MultiMatch * matcher
The object which is using this postlist to perform a match.
Various assertion macros.
double get_weight() const
Return the weight contribution for the current position.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Xapian::termcount doclen_lower_bound
Lower bound on doclength in the subdatabase we&#39;re working over.
bool want_wdf
Flag indicating whether the weighting object needs the wdf.
virtual Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Definition: postlist.cc:44
Xapian::doccount get_termfreq_min() const
Get a lower bound on the number of documents indexed by this term.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
Abstract base class for weighting schemes.
Definition: weight.h:35
double get_maxweight() const
Return an upper bound on what get_weight() can return.
Xapian::doccount get_termfreq_est() const
Get an estimate of the number of documents indexed by this term.