xapian-core  1.4.21
latlong_posting_source.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2010,2011 Richard Boulton
6  * Copyright 2012,2015 Olly Betts
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "xapian/geospatial.h"
27 
28 #include "xapian/error.h"
29 #include "xapian/registry.h"
30 
31 #include "net/length.h"
32 #include "serialise-double.h"
33 #include "str.h"
34 
35 #include <cmath>
36 
37 using namespace Xapian;
38 using namespace std;
39 
40 static double
41 weight_from_distance(double dist, double k1, double k2)
42 {
43  return k1 * pow(dist + k1, -k2);
44 }
45 
46 void
48 {
49  dist = (*metric)(centre, get_value());
50 }
51 
53 static void
54 validate_postingsource_params(double k1, double k2) {
55  if (k1 <= 0) {
56  string msg("k1 parameter to LatLongDistancePostingSource must be "
57  "greater than 0; was ");
58  msg += str(k1);
59  throw InvalidArgumentError(msg);
60  }
61  if (k2 <= 0) {
62  string msg("k2 parameter to LatLongDistancePostingSource must be "
63  "greater than 0; was ");
64  msg += str(k2);
65  throw InvalidArgumentError(msg);
66  }
67 }
68 
70  valueno slot_,
71  const LatLongCoords & centre_,
72  const LatLongMetric * metric_,
73  double max_range_,
74  double k1_,
75  double k2_)
76  : ValuePostingSource(slot_),
77  centre(centre_),
78  metric(metric_),
79  max_range(max_range_),
80  k1(k1_),
81  k2(k2_)
82 {
85 }
86 
88  valueno slot_,
89  const LatLongCoords & centre_,
90  const LatLongMetric & metric_,
91  double max_range_,
92  double k1_,
93  double k2_)
94  : ValuePostingSource(slot_),
95  centre(centre_),
96  metric(metric_.clone()),
97  max_range(max_range_),
98  k1(k1_),
99  k2(k2_)
100 {
103 }
104 
106  valueno slot_,
107  const LatLongCoords & centre_,
108  double max_range_,
109  double k1_,
110  double k2_)
111  : ValuePostingSource(slot_),
112  centre(centre_),
114  max_range(max_range_),
115  k1(k1_),
116  k2(k2_)
117 {
120 }
121 
123 {
124  delete metric;
125 }
126 
127 void
129 {
130  ValuePostingSource::next(min_wt);
131 
132  while (!ValuePostingSource::at_end()) {
133  calc_distance();
134  if (max_range == 0 || dist <= max_range)
135  break;
136  ValuePostingSource::next(min_wt);
137  }
138 }
139 
140 void
142  double min_wt)
143 {
144  ValuePostingSource::skip_to(min_docid, min_wt);
145 
146  while (!ValuePostingSource::at_end()) {
147  calc_distance();
148  if (max_range == 0 || dist <= max_range)
149  break;
150  ValuePostingSource::next(min_wt);
151  }
152 }
153 
154 bool
156  double min_wt)
157 {
158  if (!ValuePostingSource::check(min_docid, min_wt)) {
159  // check returned false, so we know the document is not in the source.
160  return false;
161  }
163  // return true, since we're definitely at the end of the list.
164  return true;
165  }
166 
167  calc_distance();
168  if (max_range > 0 && dist > max_range) {
169  return false;
170  }
171  return true;
172 }
173 
174 double
176 {
177  return weight_from_distance(dist, k1, k2);
178 }
179 
182 {
184  metric->clone(),
185  max_range, k1, k2);
186 }
187 
188 string
190 {
191  return "Xapian::LatLongDistancePostingSource";
192 }
193 
194 string
196 {
197  string serialised_centre = centre.serialise();
198  string metric_name = metric->name();
199  string serialised_metric = metric->serialise();
200 
201  string result = encode_length(get_slot());
202  result += encode_length(serialised_centre.size());
203  result += serialised_centre;
204  result += encode_length(metric_name.size());
205  result += metric_name;
206  result += encode_length(serialised_metric.size());
207  result += serialised_metric;
208  result += serialise_double(max_range);
209  result += serialise_double(k1);
210  result += serialise_double(k2);
211  return result;
212 }
213 
216  const Registry & registry) const
217 {
218  const char * p = s.data();
219  const char * end = p + s.size();
220 
221  valueno new_slot;
222  decode_length(&p, end, new_slot);
223  size_t len;
224  decode_length_and_check(&p, end, len);
225  string new_serialised_centre(p, len);
226  p += len;
227  decode_length_and_check(&p, end, len);
228  string new_metric_name(p, len);
229  p += len;
230  decode_length_and_check(&p, end, len);
231  string new_serialised_metric(p, len);
232  p += len;
233  double new_max_range = unserialise_double(&p, end);
234  double new_k1 = unserialise_double(&p, end);
235  double new_k2 = unserialise_double(&p, end);
236  if (p != end) {
237  throw NetworkError("Bad serialised LatLongDistancePostingSource - junk at end");
238  }
239 
240  LatLongCoords new_centre;
241  new_centre.unserialise(new_serialised_centre);
242 
243  const Xapian::LatLongMetric * metric_type =
244  registry.get_lat_long_metric(new_metric_name);
245  if (metric_type == NULL) {
246  string msg("LatLongMetric ");
247  msg += new_metric_name;
248  msg += " not registered";
249  throw InvalidArgumentError(msg);
250  }
251  LatLongMetric * new_metric =
252  metric_type->unserialise(new_serialised_metric);
253 
254  return new LatLongDistancePostingSource(new_slot, new_centre,
255  new_metric,
256  new_max_range, new_k1, new_k2);
257 }
258 
259 void
261 {
263  if (max_range > 0.0) {
264  // Possible that no documents are in range.
265  set_termfreq_min(0);
266  // Note - would be good to improve termfreq_est here, too, but
267  // I can't think of anything we can do with the information
268  // available.
269  }
270 }
271 
272 string
274 {
275  string result("Xapian::LatLongDistancePostingSource(slot=");
276  result += str(get_slot());
277  result += ")";
278  return result;
279 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
virtual LatLongMetric * clone() const =0
Clone the metric.
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
length encoded as a string
This class is used to access a database, or a group of databases.
Definition: database.h:68
void set_maxweight(double max_weight)
Specify an upper bound on what get_weight() will return from now on.
LatLongCoords centre
Centre, to compute distance from.
Definition: geospatial.h:460
std::string get_description() const
Return a string describing this object.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
bool at_end() const
Return true if the current position is past the last entry in this list.
static void validate_postingsource_params(double k1, double k2)
Validate the parameters supplied to LatLongDistancePostingSource.
double dist
Current distance from centre.
Definition: geospatial.h:457
void calc_distance()
Calculate the distance for the current document.
STL namespace.
Convert types to std::string.
std::string encode_length(T len)
Encode a length as a variable-length string.
Definition: length.h:36
std::string serialise() const
Return a serialised form of the coordinate list.
Hierarchy of classes which Xapian can throw as exceptions.
functions to serialise and unserialise a double
Posting source which returns a weight based on geospatial distance.
Definition: geospatial.h:454
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
double max_range
Maximum range to allow. If set to 0, there is no maximum range.
Definition: geospatial.h:466
static double weight_from_distance(double dist, double k1, double k2)
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Geospatial search support routines.
Registry for user subclasses.
Definition: registry.h:47
virtual LatLongMetric * unserialise(const std::string &serialised) const =0
Create object given string serialisation returned by serialise().
void init(const Database &db_)
Set this PostingSource to the start of the list of postings.
Base class for calculating distances between two lat/long coordinates.
Definition: geospatial.h:303
LatLongDistancePostingSource * unserialise_with_registry(const std::string &serialised, const Registry &registry) const
Create object given string serialisation returned by serialise().
string str(int value)
Convert int to std::string.
Definition: str.cc:90
virtual std::string serialise() const =0
Serialise object parameters into a string.
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
const LatLongMetric * metric
Metric to compute the distance with.
Definition: geospatial.h:463
double k1
Constant used in weighting function.
Definition: geospatial.h:469
A sequence of latitude-longitude coordinates.
Definition: geospatial.h:232
std::string name() const
Name of the posting source class.
A posting source which generates weights from a value slot.
bool check(Xapian::docid min_docid, double min_wt)
Check if the specified docid occurs.
double get_weight() const
Return the weight contribution for the current document.
std::string serialise_double(double v)
Serialise a double to a string.
void decode_length_and_check(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:112
LatLongDistancePostingSource * clone() const
Clone the posting source.
Indicates a problem communicating with a remote database.
Definition: error.h:803
virtual std::string name() const =0
Return the full name of the metric.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
LatLongDistancePostingSource(Xapian::valueno slot_, const LatLongCoords &centre_, const LatLongMetric *metric_, double max_range_, double k1_, double k2_)
Internal constructor; used by clone() and serialise().
double k2
Constant used in weighting function.
Definition: geospatial.h:472
std::string serialise() const
Serialise object parameters into a string.
Calculate the great-circle distance between two coordinates on a sphere.
Definition: geospatial.h:399
void skip_to(Xapian::docid min_docid, double min_wt)
Advance to the specified docid.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
void unserialise(const std::string &serialised)
Unserialise a string and set this object to the coordinates in it.
Definition: latlongcoord.cc:94
void decode_length(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
Definition: length.cc:94
void next(double min_wt)
Advance the current position to the next matching document.
Class for looking up user subclasses during unserialisation.
Xapian::valueno get_slot() const
The slot we&#39;re reading values from.
void set_termfreq_min(Xapian::doccount termfreq_min_)
Set a lower bound on the term frequency.
const Xapian::LatLongMetric * get_lat_long_metric(const std::string &name) const
Get a lat-long metric given a name.
Definition: registry.cc:314
void next(double min_wt)
Advance the current position to the next matching document.