xapian-core  1.4.26
weight.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2014,2017,2019 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "xapian/weight.h"
25 
26 #include "api/leafpostlist.h"
27 #include "weightinternal.h"
28 
29 #include "omassert.h"
30 #include "debuglog.h"
31 
32 #include "xapian/error.h"
33 
34 using namespace std;
35 
36 namespace Xapian {
37 
38 void
39 Weight::init_(const Internal & stats, Xapian::termcount query_length)
40 {
41  LOGCALL_VOID(MATCH, "Weight::init_", stats | query_length);
42  collection_size_ = stats.collection_size;
43  rset_size_ = stats.rset_size;
44  if (stats_needed & AVERAGE_LENGTH)
45  average_length_ = stats.get_average_length();
46  if (stats_needed & DOC_LENGTH_MAX)
47  doclength_upper_bound_ = stats.db.get_doclength_upper_bound();
48  if (stats_needed & DOC_LENGTH_MIN)
49  doclength_lower_bound_ = stats.db.get_doclength_lower_bound();
50  collectionfreq_ = 0;
51  wdf_upper_bound_ = 0;
52  termfreq_ = 0;
53  reltermfreq_ = 0;
54  query_length_ = query_length;
55  wqf_ = 1;
56  init(0.0);
57 }
58 
59 void
60 Weight::init_(const Internal & stats, Xapian::termcount query_length,
61  const string & term, Xapian::termcount wqf, double factor,
62  void* postlist_void)
63 {
64  LOGCALL_VOID(MATCH, "Weight::init_", stats | query_length | term | wqf | factor | postlist_void);
65  collection_size_ = stats.collection_size;
66  rset_size_ = stats.rset_size;
67  if (stats_needed & AVERAGE_LENGTH)
68  average_length_ = stats.get_average_length();
69  if (stats_needed & DOC_LENGTH_MAX)
70  doclength_upper_bound_ = stats.db.get_doclength_upper_bound();
71  if (stats_needed & DOC_LENGTH_MIN)
72  doclength_lower_bound_ = stats.db.get_doclength_lower_bound();
73  if (stats_needed & WDF_MAX) {
74  if (usual(postlist_void != nullptr)) {
75  auto postlist = static_cast<LeafPostList*>(postlist_void);
76  wdf_upper_bound_ = postlist->get_wdf_upper_bound();
77  } else {
78  wdf_upper_bound_ = stats.db.get_wdf_upper_bound(term);
79  }
80  }
81  if (stats_needed & (TERMFREQ | RELTERMFREQ | COLLECTION_FREQ)) {
82  bool ok = stats.get_stats(term,
83  termfreq_, reltermfreq_, collectionfreq_);
84  (void)ok;
85  Assert(ok);
86  }
87  query_length_ = query_length;
88  wqf_ = wqf;
89  init(factor);
90 }
91 
92 void
93 Weight::init_(const Internal & stats, Xapian::termcount query_length,
94  const string & term, Xapian::termcount wqf, double factor)
95 {
96  init_(stats, query_length, term, wqf, factor, nullptr);
97 }
98 
99 void
100 Weight::init_(const Internal & stats, Xapian::termcount query_length,
101  double factor, Xapian::doccount termfreq,
102  Xapian::doccount reltermfreq, Xapian::termcount collection_freq)
103 {
104  LOGCALL_VOID(MATCH, "Weight::init_", stats | query_length | factor | termfreq | reltermfreq | collection_freq);
105  // Synonym case.
106  collection_size_ = stats.collection_size;
107  rset_size_ = stats.rset_size;
108  if (stats_needed & AVERAGE_LENGTH)
109  average_length_ = stats.get_average_length();
110  if (stats_needed & (DOC_LENGTH_MAX | WDF_MAX)) {
111  doclength_upper_bound_ = stats.db.get_doclength_upper_bound();
112  // The doclength is an upper bound on the wdf. This is obviously true
113  // for normal terms, but SynonymPostList ensures that it is also true
114  // for synonym terms by clamping the wdf values returned to the
115  // doclength.
116  //
117  // (This clamping is only actually necessary in cases where a
118  // constituent term of the synonym is repeated.)
119  wdf_upper_bound_ = doclength_upper_bound_;
120  }
121  if (stats_needed & DOC_LENGTH_MIN)
122  doclength_lower_bound_ = stats.db.get_doclength_lower_bound();
123 
124  termfreq_ = termfreq;
125  reltermfreq_ = reltermfreq;
126  query_length_ = query_length;
127  collectionfreq_ = collection_freq;
128  wqf_ = 1;
129  init(factor);
130 }
131 
132 Weight::~Weight() { }
133 
134 string
136 {
137  return string();
138 }
139 
140 string
141 Weight::serialise() const
142 {
143  throw Xapian::UnimplementedError("serialise() not supported for this Xapian::Weight subclass");
144 }
145 
146 Weight *
147 Weight::unserialise(const string &) const
148 {
149  throw Xapian::UnimplementedError("unserialise() not supported for this Xapian::Weight subclass");
150 }
151 
152 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
#define Assert(COND)
Definition: omassert.h:122
virtual Xapian::termcount get_wdf_upper_bound() const =0
Xapian::Database db
Database to get the bounds on doclength and wdf from.
#define usual(COND)
Definition: config.h:576
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
Definition: omdatabase.cc:401
bool get_stats(const std::string &term, Xapian::doccount &termfreq, Xapian::doccount &reltermfreq, Xapian::termcount &collfreq) const
Get the frequencies for the given term.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
STL namespace.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:39
Abstract base class for leaf postlists.
Xapian::doccount collection_size
Number of documents in the collection.
Xapian::doccount rset_size
Number of relevant documents in the collection.
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Definition: omdatabase.cc:421
Xapian::Weight::Internal class, holding database and term statistics.
Class to hold statistics for a given collection.
Weighting scheme API.
Xapian::doclength get_average_length() const
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Various assertion macros.
Debug logging macros.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Abstract base class for weighting schemes.
Definition: weight.h:35
Xapian::termcount get_wdf_upper_bound(const std::string &term) const
Get an upper bound on the wdf of term term.
Definition: omdatabase.cc:435