xapian-core  2.0.0
sortable-serialise.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2015,2016,2024,2025 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include <xapian/queryparser.h>
24 
25 // Disable these assertions when building the library as these functions are
26 // marked not to throw exceptions in the API headers. In unittest we define
27 // UNITTEST_ASSERT_NOTHROW to set a variable to the exception message and
28 // return, then the harness checks if that variable has been set.
29 #ifndef XAPIAN_UNITTEST
30 # define UNITTEST_ASSERT_NOTHROW(COND,RET)
31 #endif
32 
33 #include "negate_unsigned.h"
34 
35 #include <cfloat>
36 #include <cmath>
37 #include <cstring>
38 
39 #include <string>
40 #include <string_view>
41 
42 using namespace std;
43 
44 #if FLT_RADIX != 2
45 # error Code currently assumes FLT_RADIX == 2
46 #endif
47 
48 size_t
49 Xapian::sortable_serialise_(double value, char* buf) noexcept
50 {
51  /* Special case for infinities and NaN. We check this first since frexp()
52  * on infinity or NaN returns an unspecified value for the exponent.
53  */
54  if (rare(!isfinite(value))) {
55 handle_as_infinity:
56  if (value < 0) {
57  // Negative infinity.
58  return 0;
59  } else {
60  // Positive infinity.
61  memset(buf, '\xff', 9);
62  return 9;
63  }
64  }
65 
66  int exponent;
67  double mantissa = frexp(value, &exponent);
68 
69  /* Deal with zero specially.
70  *
71  * IEEE representation of doubles uses 11 bits for the exponent, with a
72  * bias of 1023. We bias this by subtracting 8, and non-IEEE
73  * representations may allow higher exponents, so allow exponents down to
74  * -2039 - if smaller exponents are possible anywhere, we underflow such
75  * numbers to 0.
76  */
77  if (mantissa == 0.0 || rare(exponent < -2039)) {
78  *buf = '\x80';
79  return 1;
80  }
81 
82  if (rare(exponent > 2055)) {
83  // Extremely large non-IEEE representation.
84  goto handle_as_infinity;
85  }
86 
87  bool negative = (mantissa < 0);
88  if (negative) mantissa = -mantissa;
89 
90  // Encoding:
91  //
92  // [ 7 | 6 | 5 | 4 3 2 1 0]
93  // Sm Se Le
94  //
95  // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
96  // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
97  // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
98  unsigned char next = (negative ? 0 : 0xe0);
99 
100  // Bias the exponent by 8 so that more small integers get short encodings.
101  exponent -= 8;
102  bool exponent_negative = (exponent < 0);
103  if (exponent_negative) {
104  exponent = -exponent;
105  next ^= 0x60;
106  }
107 
108  size_t len = 0;
109 
110  /* We store the exponent in 3 or 11 bits. If the number is negative, we
111  * flip all the bits of the exponent, since larger negative numbers should
112  * sort first.
113  *
114  * If the exponent is negative, we flip the bits of the exponent, since
115  * larger negative exponents should sort first (unless the number is
116  * negative, in which case they should sort later).
117  */
118  UNITTEST_ASSERT_NOTHROW(exponent >= 0, 0);
119  if (exponent < 8) {
120  next ^= 0x20;
121  next |= static_cast<unsigned char>(exponent << 2);
122  if (negative ^ exponent_negative) next ^= 0x1c;
123  } else {
124  UNITTEST_ASSERT_NOTHROW((exponent >> 11) == 0, 0);
125  // Put the top 5 bits of the exponent into the lower 5 bits of the
126  // first byte:
127  next |= static_cast<unsigned char>(exponent >> 6);
128  if (negative ^ exponent_negative) next ^= 0x1f;
129  buf[len++] = next;
130  // And the lower 6 bits of the exponent go into the upper 6 bits
131  // of the second byte:
132  next = static_cast<unsigned char>(exponent << 2);
133  if (negative ^ exponent_negative) next ^= 0xfc;
134  }
135 
136  // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
137  mantissa *= 1 << (negative ? 26 : 27);
138  unsigned word1 = static_cast<unsigned>(mantissa);
139  mantissa -= word1;
140  unsigned word2 = static_cast<unsigned>(mantissa * 4294967296.0); // 1<<32
141  // If the number is positive, the first bit will always be set because 0.5
142  // <= mantissa < 1, unless mantissa is zero, which we handle specially
143  // above). If the number is negative, we negate the mantissa instead of
144  // flipping all the bits, so in the case of 0.5, the first bit isn't set
145  // so we need to store it explicitly. But for the cost of one extra
146  // leading bit, we can save several trailing 0xff bytes in lots of common
147  // cases.
148  UNITTEST_ASSERT_NOTHROW(negative || (word1 & (1<<26)), 0);
149  if (negative) {
150  // We negate the mantissa for negative numbers, so that the sort order
151  // is reversed (since larger negative numbers should come first).
152  word1 = negate_unsigned(word1);
153  if (word2 != 0) ++word1;
154  word2 = negate_unsigned(word2);
155  }
156 
157  word1 &= 0x03ffffff;
158  next |= static_cast<unsigned char>(word1 >> 24);
159  buf[len++] = next;
160  buf[len++] = char(word1 >> 16);
161  buf[len++] = char(word1 >> 8);
162  buf[len++] = char(word1);
163 
164  buf[len++] = char(word2 >> 24);
165  buf[len++] = char(word2 >> 16);
166  buf[len++] = char(word2 >> 8);
167  buf[len++] = char(word2);
168 
169  // Finally, we can chop off any trailing zero bytes.
170  while (len > 0 && buf[len - 1] == '\0') {
171  --len;
172  }
173 
174  return len;
175 }
176 
179 static inline unsigned char
180 numfromstr(std::string_view str, std::string::size_type pos)
181 {
182  return (pos < str.size()) ? static_cast<unsigned char>(str[pos]) : '\0';
183 }
184 
185 double
186 Xapian::sortable_unserialise(std::string_view value) noexcept
187 {
188  // Zero.
189  if (value.size() == 1 && value[0] == '\x80') return 0.0;
190 
191  // Positive infinity.
192  if (value.size() == 9 &&
193  memcmp(value.data(), "\xff\xff\xff\xff\xff\xff\xff\xff\xff", 9) == 0) {
194  // "On implementations that support floating-point infinities,
195  // [HUGE_VAL] always expand[s] to the positive infinit[y] of double"
196  // https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL
197  return HUGE_VAL;
198  }
199 
200  // Negative infinity.
201  if (value.empty()) {
202  return -HUGE_VAL;
203  }
204 
205  unsigned char first = numfromstr(value, 0);
206  size_t i = 0;
207 
208  first ^= static_cast<unsigned char>(first & 0xc0) >> 1;
209  bool negative = !(first & 0x80);
210  bool exponent_negative = (first & 0x40);
211  bool explen = !(first & 0x20);
212  int exponent = first & 0x1f;
213  if (!explen) {
214  exponent >>= 2;
215  if (negative ^ exponent_negative) exponent ^= 0x07;
216  } else {
217  first = numfromstr(value, ++i);
218  exponent <<= 6;
219  exponent |= (first >> 2);
220  if (negative ^ exponent_negative) exponent ^= 0x07ff;
221  }
222 
223  unsigned word1;
224  word1 = (unsigned(first & 0x03) << 24);
225  word1 |= numfromstr(value, ++i) << 16;
226  word1 |= numfromstr(value, ++i) << 8;
227  word1 |= numfromstr(value, ++i);
228 
229  unsigned word2 = 0;
230  if (i < value.size()) {
231  word2 = numfromstr(value, ++i) << 24;
232  word2 |= numfromstr(value, ++i) << 16;
233  word2 |= numfromstr(value, ++i) << 8;
234  word2 |= numfromstr(value, ++i);
235  }
236 
237  if (negative) {
238  word1 = negate_unsigned(word1);
239  if (word2 != 0) ++word1;
240  word2 = negate_unsigned(word2);
241  UNITTEST_ASSERT_NOTHROW((word1 & 0xf0000000) != 0, 0);
242  word1 &= 0x03ffffff;
243  }
244  if (!negative) word1 |= 1<<26;
245 
246  double mantissa = 0;
247  if (word2) mantissa = word2 / 4294967296.0; // 1<<32
248  mantissa += word1;
249  mantissa /= 1 << (negative ? 26 : 27);
250 
251  if (exponent_negative) exponent = -exponent;
252  exponent += 8;
253 
254  if (negative) mantissa = -mantissa;
255 
256  // We use scalbn() since it's equivalent to ldexp() when FLT_RADIX == 2
257  // (which we currently assume), except that ldexp() will set errno if the
258  // result overflows or underflows. We don't need or want errno set (indeed
259  // configure turns on -fno-math-errno if it detects the compiler supports
260  // it).
261  return scalbn(mantissa, exponent);
262 }
#define rare(COND)
Definition: config.h:607
Xapian::termpos pos
string str(int value)
Convert int to std::string.
Definition: str.cc:91
size_t sortable_serialise_(double value, char *buf) noexcept
double sortable_unserialise(std::string_view serialised) noexcept
Convert a string encoded using sortable_serialise back to a floating point number.
Negate unsigned integer, avoiding compiler warnings.
constexpr std::enable_if_t< std::is_unsigned_v< T >, T > negate_unsigned(T value)
parsing a user query string to build a Xapian::Query object
static unsigned char numfromstr(std::string_view str, std::string::size_type pos)
Get a number from the character at a given position in a string, returning 0 if the string isn't long...
#define UNITTEST_ASSERT_NOTHROW(COND, RET)