xapian-core  1.4.26
latlong_posting_source.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2010,2011 Richard Boulton
6  * Copyright 2012,2015 Olly Betts
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "xapian/geospatial.h"
27 
28 #include "xapian/error.h"
29 #include "xapian/registry.h"
30 
31 #include "net/length.h"
32 #include "serialise-double.h"
33 #include "str.h"
34 
35 #include <cmath>
36 
37 using namespace Xapian;
38 using namespace std;
39 
40 static double
41 weight_from_distance(double dist, double k1, double k2)
42 {
43  // k2 defaults to 1.0, so handle that case with a fast path which avoids
44  // calling pow().
45  if (k2 == 1.0)
46  return k1 / (dist + k1);
47  return k1 * pow(dist + k1, -k2);
48 }
49 
50 void
52 {
53  dist = (*metric)(centre, get_value());
54 }
55 
57 static void
58 validate_postingsource_params(double k1, double k2) {
59  if (k1 <= 0) {
60  string msg("k1 parameter to LatLongDistancePostingSource must be "
61  "greater than 0; was ");
62  msg += str(k1);
63  throw InvalidArgumentError(msg);
64  }
65  if (k2 <= 0) {
66  string msg("k2 parameter to LatLongDistancePostingSource must be "
67  "greater than 0; was ");
68  msg += str(k2);
69  throw InvalidArgumentError(msg);
70  }
71 }
72 
74  valueno slot_,
75  const LatLongCoords & centre_,
76  const LatLongMetric * metric_,
77  double max_range_,
78  double k1_,
79  double k2_)
80  : ValuePostingSource(slot_),
81  centre(centre_),
82  metric(metric_),
83  max_range(max_range_),
84  k1(k1_),
85  k2(k2_)
86 {
89 }
90 
92  valueno slot_,
93  const LatLongCoords & centre_,
94  const LatLongMetric & metric_,
95  double max_range_,
96  double k1_,
97  double k2_)
98  : ValuePostingSource(slot_),
99  centre(centre_),
100  metric(metric_.clone()),
101  max_range(max_range_),
102  k1(k1_),
103  k2(k2_)
104 {
107 }
108 
110  valueno slot_,
111  const LatLongCoords & centre_,
112  double max_range_,
113  double k1_,
114  double k2_)
115  : ValuePostingSource(slot_),
116  centre(centre_),
118  max_range(max_range_),
119  k1(k1_),
120  k2(k2_)
121 {
124 }
125 
127 {
128  delete metric;
129 }
130 
131 void
133 {
134  ValuePostingSource::next(min_wt);
135 
136  while (!ValuePostingSource::at_end()) {
137  calc_distance();
138  if (max_range == 0 || dist <= max_range)
139  break;
140  ValuePostingSource::next(min_wt);
141  }
142 }
143 
144 void
146  double min_wt)
147 {
148  ValuePostingSource::skip_to(min_docid, min_wt);
149 
150  while (!ValuePostingSource::at_end()) {
151  calc_distance();
152  if (max_range == 0 || dist <= max_range)
153  break;
154  ValuePostingSource::next(min_wt);
155  }
156 }
157 
158 bool
160  double min_wt)
161 {
162  if (!ValuePostingSource::check(min_docid, min_wt)) {
163  // check returned false, so we know the document is not in the source.
164  return false;
165  }
167  // return true, since we're definitely at the end of the list.
168  return true;
169  }
170 
171  calc_distance();
172  if (max_range > 0 && dist > max_range) {
173  return false;
174  }
175  return true;
176 }
177 
178 double
180 {
181  return weight_from_distance(dist, k1, k2);
182 }
183 
186 {
188  metric->clone(),
189  max_range, k1, k2);
190 }
191 
192 string
194 {
195  return "Xapian::LatLongDistancePostingSource";
196 }
197 
198 string
200 {
201  string serialised_centre = centre.serialise();
202  string metric_name = metric->name();
203  string serialised_metric = metric->serialise();
204 
205  string result = encode_length(get_slot());
206  result += encode_length(serialised_centre.size());
207  result += serialised_centre;
208  result += encode_length(metric_name.size());
209  result += metric_name;
210  result += encode_length(serialised_metric.size());
211  result += serialised_metric;
212  result += serialise_double(max_range);
213  result += serialise_double(k1);
214  result += serialise_double(k2);
215  return result;
216 }
217 
220  const Registry & registry) const
221 {
222  const char * p = s.data();
223  const char * end = p + s.size();
224 
225  valueno new_slot;
226  decode_length(&p, end, new_slot);
227  size_t len;
228  decode_length_and_check(&p, end, len);
229  string new_serialised_centre(p, len);
230  p += len;
231  decode_length_and_check(&p, end, len);
232  string new_metric_name(p, len);
233  p += len;
234  decode_length_and_check(&p, end, len);
235  string new_serialised_metric(p, len);
236  p += len;
237  double new_max_range = unserialise_double(&p, end);
238  double new_k1 = unserialise_double(&p, end);
239  double new_k2 = unserialise_double(&p, end);
240  if (p != end) {
241  throw NetworkError("Bad serialised LatLongDistancePostingSource - junk at end");
242  }
243 
244  LatLongCoords new_centre;
245  new_centre.unserialise(new_serialised_centre);
246 
247  const Xapian::LatLongMetric * metric_type =
248  registry.get_lat_long_metric(new_metric_name);
249  if (metric_type == NULL) {
250  string msg("LatLongMetric ");
251  msg += new_metric_name;
252  msg += " not registered";
253  throw InvalidArgumentError(msg);
254  }
255  LatLongMetric * new_metric =
256  metric_type->unserialise(new_serialised_metric);
257 
258  return new LatLongDistancePostingSource(new_slot, new_centre,
259  new_metric,
260  new_max_range, new_k1, new_k2);
261 }
262 
263 void
265 {
267  if (max_range > 0.0) {
268  // Possible that no documents are in range.
269  set_termfreq_min(0);
270  // Note - would be good to improve termfreq_est here, too, but
271  // I can't think of anything we can do with the information
272  // available.
273  }
274 }
275 
276 string
278 {
279  string result("Xapian::LatLongDistancePostingSource(slot=");
280  result += str(get_slot());
281  result += ")";
282  return result;
283 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
virtual LatLongMetric * clone() const =0
Clone the metric.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
length encoded as a string
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_maxweight(double max_weight)
Specify an upper bound on what get_weight() will return from now on.
LatLongCoords centre
Centre, to compute distance from.
Definition: geospatial.h:460
std::string get_description() const
Return a string describing this object.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
bool at_end() const
Return true if the current position is past the last entry in this list.
static void validate_postingsource_params(double k1, double k2)
Validate the parameters supplied to LatLongDistancePostingSource.
double dist
Current distance from centre.
Definition: geospatial.h:457
void calc_distance()
Calculate the distance for the current document.
STL namespace.
Convert types to std::string.
std::string encode_length(T len)
Encode a length as a variable-length string.
Definition: length.h:36
std::string serialise() const
Return a serialised form of the coordinate list.
Hierarchy of classes which Xapian can throw as exceptions.
functions to serialise and unserialise a double
Posting source which returns a weight based on geospatial distance.
Definition: geospatial.h:454
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
double max_range
Maximum range to allow. If set to 0, there is no maximum range.
Definition: geospatial.h:466
static double weight_from_distance(double dist, double k1, double k2)
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Geospatial search support routines.
Registry for user subclasses.
Definition: registry.h:47
virtual LatLongMetric * unserialise(const std::string &serialised) const =0
Create object given string serialisation returned by serialise().
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Base class for calculating distances between two lat/long coordinates.
Definition: geospatial.h:303
LatLongDistancePostingSource * unserialise_with_registry(const std::string &serialised, const Registry &registry) const
Create object given string serialisation returned by serialise().
string str(int value)
Convert int to std::string.
Definition: str.cc:90
virtual std::string serialise() const =0
Serialise object parameters into a string.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
const LatLongMetric * metric
Metric to compute the distance with.
Definition: geospatial.h:463
double k1
Constant used in weighting function.
Definition: geospatial.h:469
A sequence of latitude-longitude coordinates.
Definition: geospatial.h:232
std::string name() const
Name of the posting source class.
A posting source which generates weights from a value slot.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
double get_weight() const
Return the weight contribution for the current document.
std::string serialise_double(double v)
Serialise a double to a string.
void decode_length_and_check(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:112
LatLongDistancePostingSource * clone() const
Clone the posting source.
Indicates a problem communicating with a remote database.
Definition: error.h:803
virtual std::string name() const =0
Return the full name of the metric.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
LatLongDistancePostingSource(Xapian::valueno slot_, const LatLongCoords &centre_, const LatLongMetric *metric_, double max_range_, double k1_, double k2_)
Internal constructor; used by clone() and serialise().
double k2
Constant used in weighting function.
Definition: geospatial.h:472
std::string serialise() const
Serialise object parameters into a string.
Calculate the great-circle distance between two coordinates on a sphere.
Definition: geospatial.h:399
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
void unserialise(const std::string &serialised)
Unserialise a string and set this object to the coordinates in it.
Definition: latlongcoord.cc:94
void decode_length(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:94
void next(double min_wt)
Advance the current position to the next matching document.
Class for looking up user subclasses during unserialisation.
Xapian::valueno get_slot() const
The slot we&#39;re reading values from.
void set_termfreq_min(Xapian::doccount termfreq_min_)
Set a lower bound on the term frequency.
const Xapian::LatLongMetric * get_lat_long_metric(const std::string &name) const
Get a lat-long metric given a name.
Definition: registry.cc:314
void next(double min_wt)
Advance the current position to the next matching document.