xapian-core  2.0.0
latlong_posting_source.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2010,2011 Richard Boulton
6  * Copyright 2012,2015 Olly Betts
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see
20  * <https://www.gnu.org/licenses/>.
21  */
22 
23 #include <config.h>
24 
25 #include "xapian/geospatial.h"
26 
27 #include "xapian/error.h"
28 #include "xapian/registry.h"
29 
30 #include "pack.h"
31 #include "serialise-double.h"
32 #include "str.h"
33 
34 #include <cmath>
35 
36 using namespace Xapian;
37 using namespace std;
38 
39 static double
40 weight_from_distance(double dist, double k1, double k2)
41 {
42  // k2 defaults to 1.0, so handle that case with a fast path which avoids
43  // calling pow().
44  if (k2 == 1.0)
45  return k1 / (dist + k1);
46  return k1 * pow(dist + k1, -k2);
47 }
48 
49 void
51 {
52  dist = (*metric)(centre, get_value());
53 }
54 
56 static void
57 validate_postingsource_params(double k1, double k2) {
58  if (k1 <= 0) {
59  string msg("k1 parameter to LatLongDistancePostingSource must be "
60  "greater than 0; was ");
61  msg += str(k1);
62  throw InvalidArgumentError(msg);
63  }
64  if (k2 <= 0) {
65  string msg("k2 parameter to LatLongDistancePostingSource must be "
66  "greater than 0; was ");
67  msg += str(k2);
68  throw InvalidArgumentError(msg);
69  }
70 }
71 
73  valueno slot_,
74  const LatLongCoords & centre_,
75  const LatLongMetric * metric_,
76  double max_range_,
77  double k1_,
78  double k2_)
79  : ValuePostingSource(slot_),
80  centre(centre_),
81  metric(metric_),
82  max_range(max_range_),
83  k1(k1_),
84  k2(k2_)
85 {
88 }
89 
91  valueno slot_,
92  const LatLongCoords & centre_,
93  const LatLongMetric & metric_,
94  double max_range_,
95  double k1_,
96  double k2_)
97  : ValuePostingSource(slot_),
98  centre(centre_),
99  metric(metric_.clone()),
100  max_range(max_range_),
101  k1(k1_),
102  k2(k2_)
103 {
106 }
107 
109  valueno slot_,
110  const LatLongCoords & centre_,
111  double max_range_,
112  double k1_,
113  double k2_)
114  : ValuePostingSource(slot_),
115  centre(centre_),
116  metric(new Xapian::GreatCircleMetric()),
117  max_range(max_range_),
118  k1(k1_),
119  k2(k2_)
120 {
123 }
124 
126 {
127  delete metric;
128 }
129 
130 void
132 {
133  ValuePostingSource::next(min_wt);
134 
135  while (!ValuePostingSource::at_end()) {
136  calc_distance();
137  if (max_range == 0 || dist <= max_range)
138  break;
139  ValuePostingSource::next(min_wt);
140  }
141 }
142 
143 void
145  double min_wt)
146 {
147  ValuePostingSource::skip_to(min_docid, min_wt);
148 
149  while (!ValuePostingSource::at_end()) {
150  calc_distance();
151  if (max_range == 0 || dist <= max_range)
152  break;
153  ValuePostingSource::next(min_wt);
154  }
155 }
156 
157 bool
159  double min_wt)
160 {
161  if (!ValuePostingSource::check(min_docid, min_wt)) {
162  // check returned false, so we know the document is not in the source.
163  return false;
164  }
166  // return true, since we're definitely at the end of the list.
167  return true;
168  }
169 
170  calc_distance();
171  if (max_range > 0 && dist > max_range) {
172  return false;
173  }
174  return true;
175 }
176 
177 double
179 {
180  return weight_from_distance(dist, k1, k2);
181 }
182 
185 {
187  metric->clone(),
188  max_range, k1, k2);
189 }
190 
191 string
193 {
194  return "Xapian::LatLongDistancePostingSource";
195 }
196 
197 string
199 {
200  string result;
201  result += serialise_double(max_range);
202  result += serialise_double(k1);
203  result += serialise_double(k2);
204  pack_uint(result, get_slot());
205  pack_string(result, centre.serialise());
206  pack_string(result, metric->name());
207  result += metric->serialise();
208  return result;
209 }
210 
213  const Registry & registry) const
214 {
215  const char * p = s.data();
216  const char * end = p + s.size();
217 
218  double new_max_range = unserialise_double(&p, end);
219  double new_k1 = unserialise_double(&p, end);
220  double new_k2 = unserialise_double(&p, end);
221 
222  valueno new_slot;
223  string new_serialised_centre;
224  string new_metric_name;
225  if (!unpack_uint(&p, end, &new_slot) ||
226  !unpack_string(&p, end, new_serialised_centre) ||
227  !unpack_string(&p, end, new_metric_name)) {
228  throw SerialisationError("Bad serialised LatLongDistancePostingSource");
229  }
230 
231  string new_serialised_metric(p, end - p);
232 
233  LatLongCoords new_centre;
234  new_centre.unserialise(new_serialised_centre);
235 
236  const Xapian::LatLongMetric * metric_type =
237  registry.get_lat_long_metric(new_metric_name);
238  if (metric_type == NULL) {
239  string msg("LatLongMetric ");
240  msg += new_metric_name;
241  msg += " not registered";
242  throw InvalidArgumentError(msg);
243  }
244  LatLongMetric * new_metric =
245  metric_type->unserialise(new_serialised_metric);
246 
247  return new LatLongDistancePostingSource(new_slot, new_centre,
248  new_metric,
249  new_max_range, new_k1, new_k2);
250 }
251 
252 void
254  Xapian::doccount shard_index)
255 {
256  ValuePostingSource::reset(db_, shard_index);
257  if (max_range > 0.0) {
258  // Possible that no documents are in range.
259  set_termfreq_min(0);
260  // Note - would be good to improve termfreq_est here, too, but
261  // I can't think of anything we can do with the information
262  // available.
263  }
264 }
265 
266 string
268 {
269  string result("Xapian::LatLongDistancePostingSource(slot=");
270  result += str(get_slot());
271  result += ")";
272  return result;
273 }
An indexed database of documents.
Definition: database.h:75
Calculate the great-circle distance between two coordinates on a sphere.
Definition: geospatial.h:398
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
A sequence of latitude-longitude coordinates.
Definition: geospatial.h:231
std::string serialise() const
Return a serialised form of the coordinate list.
void unserialise(std::string_view serialised)
Unserialise a string and set this object to the coordinates in it.
Definition: latlongcoord.cc:95
Posting source which returns a weight based on geospatial distance.
Definition: geospatial.h:454
void next(double min_wt)
Advance the current position to the next matching document.
std::string get_description() const
Return a string describing this object.
LatLongDistancePostingSource(Xapian::valueno slot_, const LatLongCoords &centre_, const LatLongMetric *metric_, double max_range_, double k1_, double k2_)
Internal constructor; used by clone() and serialise().
double k1
Constant used in weighting function.
Definition: geospatial.h:468
double get_weight() const
Return the weight contribution for the current document.
double k2
Constant used in weighting function.
Definition: geospatial.h:471
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
double dist
Current distance from centre.
Definition: geospatial.h:456
double max_range
Maximum range to allow. If set to 0, there is no maximum range.
Definition: geospatial.h:465
LatLongCoords centre
Centre, to compute distance from.
Definition: geospatial.h:459
std::string serialise() const
Serialise object parameters into a string.
LatLongDistancePostingSource * clone() const
Clone the posting source.
const LatLongMetric * metric
Metric to compute the distance with.
Definition: geospatial.h:462
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
void reset(const Database &db_, Xapian::doccount shard_index)
Set this PostingSource to the start of the list of postings.
std::string name() const
Name of the posting source class.
LatLongDistancePostingSource * unserialise_with_registry(const std::string &serialised, const Registry &registry) const
Create object given string serialisation returned by serialise().
void calc_distance()
Calculate the distance for the current document.
Base class for calculating distances between two lat/long coordinates.
Definition: geospatial.h:302
virtual LatLongMetric * unserialise(const std::string &serialised) const =0
Create object given string serialisation returned by serialise().
virtual std::string serialise() const =0
Serialise object parameters into a string.
virtual LatLongMetric * clone() const =0
Clone the metric.
virtual std::string name() const =0
Return the full name of the metric.
void set_maxweight(double max_weight)
Specify an upper bound on what get_weight() will return from now on.
Registry for user subclasses.
Definition: registry.h:47
const Xapian::LatLongMetric * get_lat_long_metric(std::string_view name) const
Get a lat-long metric given a name.
Definition: registry.cc:359
Indicates an error in the std::string serialisation of an object.
Definition: error.h:917
A posting source which generates weights from a value slot.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
void set_termfreq_min(Xapian::doccount termfreq_min_)
Set a lower bound on the term frequency.
bool at_end() const
Return true if the current position is past the last entry in this list.
void next(double min_wt)
Advance the current position to the next matching document.
void reset(const Database &db_, Xapian::doccount shard_index)
Set this PostingSource to the start of the list of postings.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
Xapian::valueno get_slot() const
The slot we're reading values from.
PositionList * p
Hierarchy of classes which Xapian can throw as exceptions.
Geospatial search support routines.
static double weight_from_distance(double dist, double k1, double k2)
static void validate_postingsource_params(double k1, double k2)
Validate the parameters supplied to LatLongDistancePostingSource.
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Pack types into strings and unpack them again.
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
Definition: pack.h:468
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
Definition: pack.h:442
Class for looking up user subclasses during unserialisation.
string serialise_double(double v)
Serialise a double to a string.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
functions to serialise and unserialise a double
Convert types to std::string.