xapian-core  2.0.0
diceweight.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2018 Guruprasad Hegde
5  * Copyright (C) 2024 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "xapian/weight.h"
25 
26 #include "xapian/error.h"
27 
28 using namespace std;
29 
30 namespace Xapian {
31 
32 DiceWeight*
33 DiceWeight::clone() const
34 {
35  return new DiceWeight();
36 }
37 
38 void
39 DiceWeight::init(double factor)
40 {
41  if (factor == 0.0) {
42  // This object is for the term-independent contribution, and that's
43  // always zero for this scheme.
44  return;
45  }
46 
47  numerator = get_wqf() * 2 * factor;
48 
49  // The Dice Coefficient formula is
50  //
51  // dice_coeff(q, d) = 2.0 * (q ∩ d) / (|q| + |d|)
52  //
53  // where q is the set of query terms and d the set of document terms.
54  //
55  // The value of (q ∩ d) is the sum of wqf for query terms matching the
56  // current document. That summing is done by the matcher, and each term
57  // needs to contribute:
58  //
59  // 2.0 * wqf / (query_length + unique_term_count)
60  //
61  // We multiply that by factor, which is 1.0 unless OP_SCALE_WEIGHT has
62  // been applied.
63  //
64  // factor * 2.0 * wqf / (query_length + unique_term_count)
65  //
66  // We need an upper bound on this for any document in a given database.
67  // Note that wdf and query_length are determined by the query, and only
68  // unique_term_count varies by document. We want to minimise the
69  // denominator and so minimise unique_term_count.
70  auto denominator = get_query_length() + get_unique_terms_lower_bound();
71  upper_bound = numerator / denominator;
72 }
73 
74 string
76 {
77  return "dice";
78 }
79 
80 string
81 DiceWeight::serialise() const
82 {
83  return string();
84 }
85 
87 DiceWeight::unserialise(const string& s) const
88 {
89  if (rare(!s.empty())) {
90  throw Xapian::SerialisationError("Extra data in "
91  "DiceWeight::unserialise()");
92  }
93  return new DiceWeight;
94 }
95 
96 double
97 DiceWeight::get_sumpart(Xapian::termcount,
99  Xapian::termcount uniqterms,
100  Xapian::termcount) const
101 {
102  return numerator / (get_query_length() + uniqterms);
103 }
104 
105 double
106 DiceWeight::get_maxpart() const
107 {
108  return upper_bound;
109 }
110 
111 DiceWeight*
112 DiceWeight::create_from_parameters(const char* p) const
113 {
114  if (*p != '\0') {
115  throw InvalidArgumentError("No parameters are required for DiceWeight");
116  }
117  return new Xapian::DiceWeight;
118 }
119 
120 }
char name[9]
Definition: dbcheck.cc:57
Xapian::Weight subclass implementing Dice Coefficient.
Definition: weight.h:2207
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
Indicates an error in the std::string serialisation of an object.
Definition: error.h:917
#define rare(COND)
Definition: config.h:607
PositionList * p
Hierarchy of classes which Xapian can throw as exceptions.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
Weighting scheme API.