xapian-core  1.4.29
sortable-serialise.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2015,2016,2025 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include <xapian/queryparser.h>
24 
25 // Disable these assertions when building the library as these functions are
26 // marked not to throw exceptions in the API headers. In unittest we define
27 // UNITTEST_ASSERT_NOTHROW to set a variable to the exception message and
28 // return, then the harness checks if that variable has been set.
29 #ifndef XAPIAN_UNITTEST
30 # define UNITTEST_ASSERT_NOTHROW(COND,RET)
31 #endif
32 
33 #include <cfloat>
34 #include <cmath>
35 #include <cstring>
36 
37 #include <string>
38 
39 using namespace std;
40 
41 #if FLT_RADIX != 2
42 # error Code currently assumes FLT_RADIX == 2
43 #endif
44 
45 #ifdef _MSC_VER
46 // Disable warning about negating an unsigned type, which we do deliberately.
47 # pragma warning(disable:4146)
48 #endif
49 
50 size_t
52 {
53  /* Special case for infinities and NaN. We check this first since frexp()
54  * on infinity or NaN returns an unspecified value for the exponent.
55  */
56  if (rare(!isfinite(value))) {
57 handle_as_infinity:
58  if (value < 0) {
59  // Negative infinity.
60  return 0;
61  } else {
62  // Positive infinity.
63  memset(buf, '\xff', 9);
64  return 9;
65  }
66  }
67 
68  int exponent;
69  double mantissa = frexp(value, &exponent);
70 
71  /* Deal with zero specially.
72  *
73  * IEEE representation of doubles uses 11 bits for the exponent, with a
74  * bias of 1023. We bias this by subtracting 8, and non-IEEE
75  * representations may allow higher exponents, so allow exponents down to
76  * -2039 - if smaller exponents are possible anywhere, we underflow such
77  * numbers to 0.
78  */
79  if (mantissa == 0.0 || rare(exponent < -2039)) {
80  *buf = '\x80';
81  return 1;
82  }
83 
84  if (rare(exponent > 2055)) {
85  // Extremely large non-IEEE representation.
86  goto handle_as_infinity;
87  }
88 
89  bool negative = (mantissa < 0);
90  if (negative) mantissa = -mantissa;
91 
92  // Encoding:
93  //
94  // [ 7 | 6 | 5 | 4 3 2 1 0]
95  // Sm Se Le
96  //
97  // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
98  // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
99  // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
100  unsigned char next = (negative ? 0 : 0xe0);
101 
102  // Bias the exponent by 8 so that more small integers get short encodings.
103  exponent -= 8;
104  bool exponent_negative = (exponent < 0);
105  if (exponent_negative) {
106  exponent = -exponent;
107  next ^= 0x60;
108  }
109 
110  size_t len = 0;
111 
112  /* We store the exponent in 3 or 11 bits. If the number is negative, we
113  * flip all the bits of the exponent, since larger negative numbers should
114  * sort first.
115  *
116  * If the exponent is negative, we flip the bits of the exponent, since
117  * larger negative exponents should sort first (unless the number is
118  * negative, in which case they should sort later).
119  */
120  UNITTEST_ASSERT_NOTHROW(exponent >= 0, 0);
121  if (exponent < 8) {
122  next ^= 0x20;
123  next |= static_cast<unsigned char>(exponent << 2);
124  if (negative ^ exponent_negative) next ^= 0x1c;
125  } else {
126  UNITTEST_ASSERT_NOTHROW((exponent >> 11) == 0, 0);
127  // Put the top 5 bits of the exponent into the lower 5 bits of the
128  // first byte:
129  next |= static_cast<unsigned char>(exponent >> 6);
130  if (negative ^ exponent_negative) next ^= 0x1f;
131  buf[len++] = next;
132  // And the lower 6 bits of the exponent go into the upper 6 bits
133  // of the second byte:
134  next = static_cast<unsigned char>(exponent << 2);
135  if (negative ^ exponent_negative) next ^= 0xfc;
136  }
137 
138  // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
139  mantissa *= 1 << (negative ? 26 : 27);
140  unsigned word1 = static_cast<unsigned>(mantissa);
141  mantissa -= word1;
142  unsigned word2 = static_cast<unsigned>(mantissa * 4294967296.0); // 1<<32
143  // If the number is positive, the first bit will always be set because 0.5
144  // <= mantissa < 1, unless mantissa is zero, which we handle specially
145  // above). If the number is negative, we negate the mantissa instead of
146  // flipping all the bits, so in the case of 0.5, the first bit isn't set
147  // so we need to store it explicitly. But for the cost of one extra
148  // leading bit, we can save several trailing 0xff bytes in lots of common
149  // cases.
150  UNITTEST_ASSERT_NOTHROW(negative || (word1 & (1<<26)), 0);
151  if (negative) {
152  // We negate the mantissa for negative numbers, so that the sort order
153  // is reversed (since larger negative numbers should come first).
154  word1 = -word1;
155  if (word2 != 0) ++word1;
156  word2 = -word2;
157  }
158 
159  word1 &= 0x03ffffff;
160  next |= static_cast<unsigned char>(word1 >> 24);
161  buf[len++] = next;
162  buf[len++] = char(word1 >> 16);
163  buf[len++] = char(word1 >> 8);
164  buf[len++] = char(word1);
165 
166  buf[len++] = char(word2 >> 24);
167  buf[len++] = char(word2 >> 16);
168  buf[len++] = char(word2 >> 8);
169  buf[len++] = char(word2);
170 
171  // Finally, we can chop off any trailing zero bytes.
172  while (len > 0 && buf[len - 1] == '\0') {
173  --len;
174  }
175 
176  return len;
177 }
178 
181 static inline unsigned char
182 numfromstr(const std::string & str, std::string::size_type pos)
183 {
184  return (pos < str.size()) ? static_cast<unsigned char>(str[pos]) : '\0';
185 }
186 
187 double
189 {
190  // Zero.
191  if (value.size() == 1 && value[0] == '\x80') return 0.0;
192 
193  // Positive infinity.
194  if (value.size() == 9 &&
195  memcmp(value.data(), "\xff\xff\xff\xff\xff\xff\xff\xff\xff", 9) == 0) {
196  // "On implementations that support floating-point infinities,
197  // [HUGE_VAL] always expand[s] to the positive infinit[y] of double"
198  // https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL
199  return HUGE_VAL;
200  }
201 
202  // Negative infinity.
203  if (value.empty()) {
204  return -HUGE_VAL;
205  }
206 
207  unsigned char first = numfromstr(value, 0);
208  size_t i = 0;
209 
210  first ^= static_cast<unsigned char>(first & 0xc0) >> 1;
211  bool negative = !(first & 0x80);
212  bool exponent_negative = (first & 0x40);
213  bool explen = !(first & 0x20);
214  int exponent = first & 0x1f;
215  if (!explen) {
216  exponent >>= 2;
217  if (negative ^ exponent_negative) exponent ^= 0x07;
218  } else {
219  first = numfromstr(value, ++i);
220  exponent <<= 6;
221  exponent |= (first >> 2);
222  if (negative ^ exponent_negative) exponent ^= 0x07ff;
223  }
224 
225  unsigned word1;
226  word1 = (unsigned(first & 0x03) << 24);
227  word1 |= numfromstr(value, ++i) << 16;
228  word1 |= numfromstr(value, ++i) << 8;
229  word1 |= numfromstr(value, ++i);
230 
231  unsigned word2 = 0;
232  if (i < value.size()) {
233  word2 = numfromstr(value, ++i) << 24;
234  word2 |= numfromstr(value, ++i) << 16;
235  word2 |= numfromstr(value, ++i) << 8;
236  word2 |= numfromstr(value, ++i);
237  }
238 
239  if (negative) {
240  word1 = -word1;
241  if (word2 != 0) ++word1;
242  word2 = -word2;
243  UNITTEST_ASSERT_NOTHROW((word1 & 0xf0000000) != 0, 0);
244  word1 &= 0x03ffffff;
245  }
246  if (!negative) word1 |= 1<<26;
247 
248  double mantissa = 0;
249  if (word2) mantissa = word2 / 4294967296.0; // 1<<32
250  mantissa += word1;
251  mantissa /= 1 << (negative ? 26 : 27);
252 
253  if (exponent_negative) exponent = -exponent;
254  exponent += 8;
255 
256  if (negative) mantissa = -mantissa;
257 
258  // We use scalbn() since it's equivalent to ldexp() when FLT_RADIX == 2
259  // (which we currently assume), except that ldexp() will set errno if the
260  // result overflows or underflows. We don't need or want errno set (indeed
261  // configure turns on -fno-math-errno if it detects the compiler supports
262  // it).
263  return scalbn(mantissa, exponent);
264 }
STL namespace.
#define rare(COND)
Definition: config.h:575
size_t sortable_serialise_(double value, char *buf)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
static unsigned char numfromstr(const std::string &str, std::string::size_type pos)
Get a number from the character at a given position in a string, returning 0 if the string isn&#39;t long...
double sortable_unserialise(const std::string &serialised)
Convert a string encoded using sortable_serialise back to a floating point number.
#define UNITTEST_ASSERT_NOTHROW(COND, RET)
#define XAPIAN_NOEXCEPT
Definition: attributes.h:39
parsing a user query string to build a Xapian::Query object