xapian-core  1.4.28
serialise-double.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2006,2007,2008,2009,2015,2025 Olly Betts
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include <config.h>
26 
27 #include <xapian/error.h>
28 
29 #include "omassert.h"
30 
31 #include "serialise-double.h"
32 
33 #include <cfloat>
34 #include <cmath>
35 
36 #include <algorithm>
37 #include <string>
38 
39 using namespace std;
40 
41 // The serialisation we use for doubles is inspired by a comp.lang.c post
42 // by Jens Moeller:
43 //
44 // https://groups.google.com/group/comp.lang.c/browse_thread/thread/6558d4653f6dea8b/75a529ec03148c98
45 //
46 // The clever part is that the mantissa is encoded as a base-256 number which
47 // means there's no rounding error provided both ends have FLT_RADIX as some
48 // power of two.
49 //
50 // FLT_RADIX == 2 seems to be ubiquitous on modern UNIX platforms, while
51 // some older platforms used FLT_RADIX == 16 (IBM machines for example).
52 // FLT_RADIX == 10 seems to be very rare (the only instance Google finds
53 // is for a cross-compiler to some TI calculators).
54 
55 #if FLT_RADIX == 2
56 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 7 + 7) / 8)
57 # define MAX_EXP ((DBL_MAX_EXP + 1) / 8)
58 # define MAX_MANTISSA (1 << (DBL_MAX_EXP & 7))
59 #elif FLT_RADIX == 16
60 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 1 + 1) / 2)
61 # define MAX_EXP ((DBL_MAX_EXP + 1) / 2)
62 # define MAX_MANTISSA (1 << ((DBL_MAX_EXP & 1) * 4))
63 #else
64 # error FLT_RADIX is a value not currently handled (not 2 or 16)
65 // # define MAX_MANTISSA_BYTES (sizeof(double) + 1)
66 #endif
67 
68 static int base256ify_double(double &v) {
69  if (rare(!isfinite(v))) {
70  // frexp() returns an unspecified exponent for infinities and NaN so
71  // we need to special case these.
72  if (isinf(v)) {
73  // Map infinities to maximum representable finite value with the
74  // same sign.
75  v = v > 0 ? DBL_MAX : -DBL_MAX;
76  } else {
77  // Rather arbitrarily we map NaN to zero.
78  v = 0.0;
79  }
80  }
81 
82  int exp;
83  v = frexp(v, &exp);
84  // v is now in the range [0.5, 1.0)
85  --exp;
86 #if FLT_RADIX == 2
87  v = scalbn(v, (exp & 7) + 1);
88 #else
89  v = ldexp(v, (exp & 7) + 1);
90 #endif
91  // v is now in the range [1.0, 256.0)
92  exp >>= 3;
93  return exp;
94 }
95 
96 std::string serialise_double(double v)
97 {
98  /* First byte:
99  * bit 7 Negative flag
100  * bit 4..6 Mantissa length - 1
101  * bit 0..3 --- 0-13 -> Exponent + 7
102  * \- 14 -> Exponent given by next byte
103  * - 15 -> Exponent given by next 2 bytes
104  *
105  * Then optional medium (1 byte) or large exponent (2 bytes, lsb first)
106  *
107  * Then mantissa (0 iff value is 0)
108  */
109 
110  bool negative = (v < 0.0);
111 
112  if (negative) v = -v;
113 
114  int exp = base256ify_double(v);
115 
116  string result;
117 
118  if (exp <= 6 && exp >= -7) {
119  unsigned char b = static_cast<unsigned char>(exp + 7);
120  if (negative) b |= static_cast<unsigned char>(0x80);
121  result += char(b);
122  } else {
123  if (exp >= -128 && exp < 127) {
124  result += negative ? char(0x8e) : char(0x0e);
125  result += char(exp + 128);
126  } else {
127  if (exp < -32768 || exp > 32767) {
128  throw Xapian::InternalError("Insane exponent in floating point number");
129  }
130  result += negative ? char(0x8f) : char(0x0f);
131  result += char(unsigned(exp + 32768) & 0xff);
132  result += char(unsigned(exp + 32768) >> 8);
133  }
134  }
135 
136  int maxbytes = min(MAX_MANTISSA_BYTES, 8);
137 
138  size_t n = result.size();
139  do {
140  unsigned char byte = static_cast<unsigned char>(v);
141  result += char(byte);
142  v -= double(byte);
143  v *= 256.0;
144  } while (v != 0.0 && --maxbytes);
145 
146  n = result.size() - n;
147  if (n > 1) {
148  Assert(n <= 8);
149  result[0] = static_cast<unsigned char>(result[0] | ((n - 1) << 4));
150  }
151 
152  return result;
153 }
154 
155 double unserialise_double(const char ** p, const char *end)
156 {
157  if (end - *p < 2) {
158  throw Xapian::SerialisationError("Bad encoded double: insufficient data");
159  }
160  unsigned char first = *(*p)++;
161  if (first == 0 && *(*p) == 0) {
162  ++*p;
163  return 0.0;
164  }
165 
166  bool negative = (first & 0x80) != 0;
167  size_t mantissa_len = ((first >> 4) & 0x07) + 1;
168 
169  int exp = first & 0x0f;
170  if (exp >= 14) {
171  int bigexp = static_cast<unsigned char>(*(*p)++);
172  if (exp == 15) {
173  if (*p == end) {
174  throw Xapian::SerialisationError("Bad encoded double: short large exponent");
175  }
176  exp = bigexp | (static_cast<unsigned char>(*(*p)++) << 8);
177  exp -= 32768;
178  } else {
179  exp = bigexp - 128;
180  }
181  } else {
182  exp -= 7;
183  }
184 
185  if (size_t(end - *p) < mantissa_len) {
186  throw Xapian::SerialisationError("Bad encoded double: short mantissa");
187  }
188 
189  double v = 0.0;
190 
191  static double dbl_max_mantissa = DBL_MAX;
192  static int dbl_max_exp = base256ify_double(dbl_max_mantissa);
193  *p += mantissa_len;
194  if (exp > dbl_max_exp ||
195  (exp == dbl_max_exp &&
196  double(static_cast<unsigned char>((*p)[-1])) > dbl_max_mantissa)) {
197  // The mantissa check should be precise provided that FLT_RADIX
198  // is a power of 2.
199  v = HUGE_VAL;
200  } else {
201  const char *q = *p;
202  while (mantissa_len--) {
203  v *= 0.00390625; // 1/256
204  v += double(static_cast<unsigned char>(*--q));
205  }
206 
207 #if FLT_RADIX == 2
208  if (exp) v = scalbn(v, exp * 8);
209 #elif FLT_RADIX == 16
210  if (exp) v = scalbn(v, exp * 2);
211 #else
212  if (exp) v = ldexp(v, exp * 8);
213 #endif
214 
215 #if 0
216  if (v == 0.0) {
217  // FIXME: handle underflow
218  }
219 #endif
220  }
221 
222  if (negative) v = -v;
223 
224  return v;
225 }
#define Assert(COND)
Definition: omassert.h:122
STL namespace.
unsigned char byte
Definition: header.h:5
#define rare(COND)
Definition: config.h:575
static int base256ify_double(double &v)
Hierarchy of classes which Xapian can throw as exceptions.
functions to serialise and unserialise a double
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
InternalError indicates a runtime problem of some sort.
Definition: error.h:761
std::string serialise_double(double v)
Serialise a double to a string.
Various assertion macros.