xapian-core  1.4.31
valuerangeproc.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007-2026 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include <xapian/queryparser.h>
24 
25 #include <cerrno>
26 #include <cstdlib> // For atoi().
27 
28 #include <string>
29 #include "stringutils.h"
30 
31 using namespace std;
32 
33 namespace Xapian {
34 
36 StringValueRangeProcessor::operator()(string &begin, string &end)
37 {
38  if (str.size()) {
39  if (prefix && begin.empty()) {
40  // Handle empty start and prefix on end, e.g.: ..$20
41  if (!startswith(end, str)) {
42  return Xapian::BAD_VALUENO;
43  }
44  end.erase(0, str.size());
45  } else if (!prefix && end.empty()) {
46  // Handle empty end and suffix on start, e.g.: 20kg..
47  if (!endswith(begin, str)) {
48  return Xapian::BAD_VALUENO;
49  }
50  begin.resize(begin.size() - str.size());
51  } else if (prefix) {
52  // If there's a prefix, require it on the start of the range.
53  if (!startswith(begin, str)) {
54  // Prefix not given.
55  return Xapian::BAD_VALUENO;
56  }
57  begin.erase(0, str.size());
58  // But it's optional on the end of the range, e.g. $10..50
59  if (startswith(end, str)) {
60  end.erase(0, str.size());
61  }
62  } else {
63  // If there's a suffix, require it on the end of the range.
64  if (!endswith(end, str)) {
65  // Suffix not given.
66  return Xapian::BAD_VALUENO;
67  }
68  end.resize(end.size() - str.size());
69  // But it's optional on the start of the range, e.g. 10..50kg
70  if (endswith(begin, str)) {
71  begin.resize(begin.size() - str.size());
72  }
73  }
74  }
75  return valno;
76 }
77 
78 static bool
79 decode_xxy(const string & s, int & x1, int &x2, int &y)
80 {
81  if (s.size() == 0) {
82  x1 = x2 = y = -1;
83  return true;
84  }
85  if (s.size() < 5 || s.size() > 10) return false;
86  size_t i = s.find_first_not_of("0123456789");
87  if (i < 1 || i > 2 || !(s[i] == '/' || s[i] == '-' || s[i] == '.'))
88  return false;
89  size_t j = s.find_first_not_of("0123456789", i + 1);
90  if (j - (i + 1) < 1 || j - (i + 1) > 2 ||
91  !(s[j] == '/' || s[j] == '-' || s[j] == '.'))
92  return false;
93  if (s.size() - j > 4 + 1) return false;
94  if (s.find_first_not_of("0123456789", j + 1) != string::npos)
95  return false;
96  x1 = atoi(s.c_str());
97  if (x1 < 1 || x1 > 31) return false;
98  x2 = atoi(s.c_str() + i + 1);
99  if (x2 < 1 || x2 > 31) return false;
100  y = atoi(s.c_str() + j + 1);
101  return true;
102 }
103 
104 // We just use this to decide if an ambiguous aa/bb/cc date could be a
105 // particular format, so there's no need to be anal about the exact number of
106 // days in February. The most useful check is that the month field is <= 12
107 // so we could just check the day is <= 31 really.
108 static const char max_month_length[12] = {
109  31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
110 };
111 
112 static bool
113 vet_dm(int d, int m)
114 {
115  if (m == -1) return true;
116  if (m > 12 || m < 1) return false;
117  if (d < 1 || d > max_month_length[m - 1]) return false;
118  return true;
119 }
120 
121 // NB Assumes the length has been checked to be 10 already.
122 static bool
123 is_yyyy_mm_dd(const string &s)
124 {
125  return (s.find_first_not_of("0123456789") == 4 &&
126  s.find_first_not_of("0123456789", 5) == 7 &&
127  s.find_first_not_of("0123456789", 8) == string::npos &&
128  s[4] == s[7] &&
129  (s[4] == '-' || s[4] == '.' || s[4] == '/'));
130 }
131 
132 // Write exactly w chars to buffer p representing integer v.
133 //
134 // The result is left padded with zeros if v < pow(10, w - 1).
135 //
136 // If v >= pow(10, w), then the output will show v % pow(10, w) (i.e. the
137 // most significant digits are lost).
138 static void
139 format_int_fixed_width(char * p, int v, int w)
140 {
141  while (--w >= 0) {
142  p[w] = '0' + (v % 10);
143  v /= 10;
144  }
145 }
146 
147 static void
148 format_yyyymmdd(char * p, int y, int m, int d)
149 {
150  format_int_fixed_width(p, y, 4);
151  format_int_fixed_width(p + 4, m, 2);
152  format_int_fixed_width(p + 6, d, 2);
153 }
154 
156 DateValueRangeProcessor::operator()(string &begin, string &end)
157 {
158  if (StringValueRangeProcessor::operator()(begin, end) == BAD_VALUENO)
159  return BAD_VALUENO;
160 
161  if ((begin.size() == 8 || begin.size() == 0) &&
162  (end.size() == 8 || end.size() == 0) &&
163  begin.find_first_not_of("0123456789") == string::npos &&
164  end.find_first_not_of("0123456789") == string::npos) {
165  // YYYYMMDD
166  return valno;
167  }
168  if ((begin.size() == 10 || begin.size() == 0) &&
169  (end.size() == 10 || end.size() == 0)) {
170  if ((begin.empty() || is_yyyy_mm_dd(begin)) &&
171  (end.empty() || is_yyyy_mm_dd(end))) {
172  // YYYY-MM-DD
173  if (!begin.empty()) {
174  begin.erase(7, 1);
175  begin.erase(4, 1);
176  }
177  if (!end.empty()) {
178  end.erase(7, 1);
179  end.erase(4, 1);
180  }
181  return valno;
182  }
183  }
184 
185  int b_d, b_m, b_y;
186  int e_d, e_m, e_y;
187  if (!decode_xxy(begin, b_d, b_m, b_y) || !decode_xxy(end, e_d, e_m, e_y))
188  return Xapian::BAD_VALUENO;
189 
190  // Check that the month and day are within range. Also assume "start" <=
191  // "end" to help decide ambiguous cases.
192  if (!prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
193  (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
194  // OK.
195  } else if (vet_dm(b_m, b_d) && vet_dm(e_m, e_d) &&
196  (b_y != e_y || b_d < e_d || (b_d == e_d && b_m <= e_m))) {
197  swap(b_m, b_d);
198  swap(e_m, e_d);
199  } else if (prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
200  (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
201  // OK.
202  } else {
203  return Xapian::BAD_VALUENO;
204  }
205 
206  char buf[8];
207  if (!begin.empty()) {
208  if (b_y < 100) {
209  b_y += 1900;
210  if (b_y < epoch_year) b_y += 100;
211  }
212  format_yyyymmdd(buf, b_y, b_m, b_d);
213  begin.assign(buf, 8);
214  }
215  if (!end.empty()) {
216  if (e_y < 100) {
217  e_y += 1900;
218  if (e_y < epoch_year) e_y += 100;
219  }
220  format_yyyymmdd(buf, e_y, e_m, e_d);
221  end.assign(buf, 8);
222  }
223  return valno;
224 }
225 
227 NumberValueRangeProcessor::operator()(string &begin, string &end)
228 {
229  if (StringValueRangeProcessor::operator()(begin, end) == BAD_VALUENO)
230  return BAD_VALUENO;
231 
232  // Parse the numbers to floating point.
233  double beginnum;
234 
235  if (!begin.empty()) {
236  errno = 0;
237  const char * startptr = begin.c_str();
238  char * endptr;
239  beginnum = strtod(startptr, &endptr);
240  if (endptr != startptr + begin.size())
241  // Invalid characters in string
242  return Xapian::BAD_VALUENO;
243  if (errno)
244  // Overflow or underflow
245  return Xapian::BAD_VALUENO;
246  } else {
247  // Silence GCC warning.
248  beginnum = 0.0;
249  }
250 
251  if (!end.empty()) {
252  errno = 0;
253  const char * startptr = end.c_str();
254  char * endptr;
255  double endnum = strtod(startptr, &endptr);
256  if (endptr != startptr + end.size())
257  // Invalid characters in string
258  return Xapian::BAD_VALUENO;
259  if (errno)
260  // Overflow or underflow
261  return Xapian::BAD_VALUENO;
262  end.assign(Xapian::sortable_serialise(endnum));
263  }
264 
265  if (!begin.empty()) {
266  begin.assign(Xapian::sortable_serialise(beginnum));
267  }
268 
269  return valno;
270 }
271 
273 RangeProcessor::check_range(const string& b, const string& e)
274 {
275  if (str.empty())
276  return operator()(b, e);
277 
278  size_t off_b = 0, len_b = string::npos;
279  size_t off_e = 0, len_e = string::npos;
280 
281  bool prefix = !(flags & Xapian::RP_SUFFIX);
282  bool repeated = (flags & Xapian::RP_REPEATED);
283 
284  if (repeated && prefix && b.empty()) {
285  // Handle empty start and prefix on end, e.g.: ..$20
286  if (!startswith(e, str)) {
287  goto not_our_range;
288  }
289  off_e = str.size();
290  } else if (repeated && !prefix && e.empty()) {
291  // Handle empty end and suffix on start, e.g.: 20kg..
292  if (!endswith(b, str)) {
293  goto not_our_range;
294  }
295  len_b = b.size() - str.size();
296  } else if (prefix) {
297  // If there's a prefix, require it on the start of the range.
298  if (!startswith(b, str)) {
299  // Prefix not given.
300  goto not_our_range;
301  }
302  off_b = str.size();
303  // Optionally allow it on the end of the range, e.g. $10..50
304  if (repeated && startswith(e, str)) {
305  off_e = off_b;
306  }
307  } else {
308  // If there's a suffix, require it on the end of the range.
309  if (!endswith(e, str)) {
310  // Suffix not given.
311  goto not_our_range;
312  }
313  len_e = e.size() - str.size();
314  // Optionally allow it on the start of the range, e.g. 10..50kg
315  if (repeated && endswith(b, str)) {
316  len_b = b.size() - str.size();
317  }
318  }
319 
320  return operator()(string(b, off_b, len_b), string(e, off_e, len_e));
321 
322 not_our_range:
324 }
325 
327 RangeProcessor::operator()(const string& b, const string& e)
328 {
329  if (e.empty())
330  return Xapian::Query(Xapian::Query::OP_VALUE_GE, slot, b);
331  return Xapian::Query(Xapian::Query::OP_VALUE_RANGE, slot, b, e);
332 }
333 
335 DateRangeProcessor::operator()(const string& b, const string& e)
336 {
337  if ((b.size() == 8 || b.size() == 0) &&
338  (e.size() == 8 || e.size() == 0) &&
339  b.find_first_not_of("0123456789") == string::npos &&
340  e.find_first_not_of("0123456789") == string::npos) {
341  // YYYYMMDD
342  return RangeProcessor::operator()(b, e);
343  }
344  if ((b.size() == 10 || b.size() == 0) &&
345  (e.size() == 10 || e.size() == 0)) {
346  if ((b.empty() || is_yyyy_mm_dd(b)) &&
347  (e.empty() || is_yyyy_mm_dd(e))) {
348  string begin = b, end = e;
349  // YYYY-MM-DD
350  if (!begin.empty()) {
351  begin.erase(7, 1);
352  begin.erase(4, 1);
353  }
354  if (!end.empty()) {
355  end.erase(7, 1);
356  end.erase(4, 1);
357  }
358  return RangeProcessor::operator()(begin, end);
359  }
360  }
361 
362  bool prefer_mdy = (flags & Xapian::RP_DATE_PREFER_MDY);
363  int b_d, b_m, b_y;
364  int e_d, e_m, e_y;
365  if (!decode_xxy(b, b_d, b_m, b_y) || !decode_xxy(e, e_d, e_m, e_y))
366  goto not_our_range;
367 
368  // Check that the month and day are within range. Also assume "start" <=
369  // "e" to help decide ambiguous cases.
370  if (!prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
371  (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
372  // OK.
373  } else if (vet_dm(b_m, b_d) && vet_dm(e_m, e_d) &&
374  (b_y != e_y || b_d < e_d || (b_d == e_d && b_m <= e_m))) {
375  swap(b_m, b_d);
376  swap(e_m, e_d);
377  } else if (prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
378  (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
379  // OK.
380  } else {
381  goto not_our_range;
382  }
383 
384  {
385  char buf_b[8], buf_e[8];
386  size_t len_b = 0, len_e = 0;
387  if (!b.empty()) {
388  if (b_y < 100) {
389  b_y += 1900;
390  if (b_y < epoch_year) b_y += 100;
391  }
392  format_yyyymmdd(buf_b, b_y, b_m, b_d);
393  len_b = 8;
394  }
395  if (!e.empty()) {
396  if (e_y < 100) {
397  e_y += 1900;
398  if (e_y < epoch_year) e_y += 100;
399  }
400  format_yyyymmdd(buf_e, e_y, e_m, e_d);
401  len_e = 8;
402  }
403  return RangeProcessor::operator()(string(buf_b, len_b),
404  string(buf_e, len_e));
405  }
406 
407 not_our_range:
409 }
410 
412 NumberRangeProcessor::operator()(const string& b, const string& e)
413 {
414  // Parse the numbers to floating point.
415  double num_b, num_e;
416 
417  if (!b.empty()) {
418  errno = 0;
419  const char * startptr = b.c_str();
420  char * endptr;
421  num_b = strtod(startptr, &endptr);
422  if (endptr != startptr + b.size() || errno) {
423  // Invalid characters in string || overflow or underflow.
424  goto not_our_range;
425  }
426  } else {
427  // Silence GCC warning.
428  num_b = 0.0;
429  }
430 
431  if (!e.empty()) {
432  errno = 0;
433  const char * startptr = e.c_str();
434  char * endptr;
435  num_e = strtod(startptr, &endptr);
436  if (endptr != startptr + e.size() || errno) {
437  // Invalid characters in string || overflow or underflow.
438  goto not_our_range;
439  }
440  } else {
441  // Silence GCC warning.
442  num_e = 0.0;
443  }
444 
445  return RangeProcessor::operator()(
446  b.empty() ? b : Xapian::sortable_serialise(num_b),
447  e.empty() ? e : Xapian::sortable_serialise(num_e));
448 
449 not_our_range:
451 }
452 
453 }
Class representing a query.
Definition: query.h:56
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
Definition: query.h:168
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
Definition: query.h:233
@ OP_INVALID
Construct an invalid query.
Definition: query.h:273
string str(int value)
Convert int to std::string.
Definition: str.cc:90
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:125
std::string sortable_serialise(double value)
Convert a floating point number to a string, preserving sort order.
Definition: queryparser.h:1401
static bool decode_xxy(const string &s, int &x1, int &x2, int &y)
@ RP_DATE_PREFER_MDY
Definition: queryparser.h:136
@ RP_REPEATED
Definition: queryparser.h:135
@ RP_SUFFIX
Definition: queryparser.h:134
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
static bool vet_dm(int d, int m)
static const char max_month_length[12]
static void format_int_fixed_width(char *p, int v, int w)
static void format_yyyymmdd(char *p, int y, int m, int d)
static bool is_yyyy_mm_dd(const string &s)
parsing a user query string to build a Xapian::Query object
Various handy helpers which std::string really should provide.
bool endswith(const std::string &s, char sfx)
Definition: stringutils.h:75
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51