xapian-core  1.4.26
serialise-double.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2006,2007,2008,2009,2015 Olly Betts
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include <config.h>
26 
27 #include <xapian/error.h>
28 
29 #include "omassert.h"
30 
31 #include "serialise-double.h"
32 
33 #include <cfloat>
34 #include <cmath>
35 
36 #include <algorithm>
37 #include <string>
38 
39 using namespace std;
40 
41 // The serialisation we use for doubles is inspired by a comp.lang.c post
42 // by Jens Moeller:
43 //
44 // https://groups.google.com/group/comp.lang.c/browse_thread/thread/6558d4653f6dea8b/75a529ec03148c98
45 //
46 // The clever part is that the mantissa is encoded as a base-256 number which
47 // means there's no rounding error provided both ends have FLT_RADIX as some
48 // power of two.
49 //
50 // FLT_RADIX == 2 seems to be ubiquitous on modern UNIX platforms, while
51 // some older platforms used FLT_RADIX == 16 (IBM machines for example).
52 // FLT_RADIX == 10 seems to be very rare (the only instance Google finds
53 // is for a cross-compiler to some TI calculators).
54 
55 #if FLT_RADIX == 2
56 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 7 + 7) / 8)
57 # define MAX_EXP ((DBL_MAX_EXP + 1) / 8)
58 # define MAX_MANTISSA (1 << (DBL_MAX_EXP & 7))
59 #elif FLT_RADIX == 16
60 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 1 + 1) / 2)
61 # define MAX_EXP ((DBL_MAX_EXP + 1) / 2)
62 # define MAX_MANTISSA (1 << ((DBL_MAX_EXP & 1) * 4))
63 #else
64 # error FLT_RADIX is a value not currently handled (not 2 or 16)
65 // # define MAX_MANTISSA_BYTES (sizeof(double) + 1)
66 #endif
67 
68 static int base256ify_double(double &v) {
69  int exp;
70  v = frexp(v, &exp);
71  // v is now in the range [0.5, 1.0)
72  --exp;
73 #if FLT_RADIX == 2
74  v = scalbn(v, (exp & 7) + 1);
75 #else
76  v = ldexp(v, (exp & 7) + 1);
77 #endif
78  // v is now in the range [1.0, 256.0)
79  exp >>= 3;
80  return exp;
81 }
82 
83 std::string serialise_double(double v)
84 {
85  /* First byte:
86  * bit 7 Negative flag
87  * bit 4..6 Mantissa length - 1
88  * bit 0..3 --- 0-13 -> Exponent + 7
89  * \- 14 -> Exponent given by next byte
90  * - 15 -> Exponent given by next 2 bytes
91  *
92  * Then optional medium (1 byte) or large exponent (2 bytes, lsb first)
93  *
94  * Then mantissa (0 iff value is 0)
95  */
96 
97  bool negative = (v < 0.0);
98 
99  if (negative) v = -v;
100 
101  int exp = base256ify_double(v);
102 
103  string result;
104 
105  if (exp <= 6 && exp >= -7) {
106  unsigned char b = static_cast<unsigned char>(exp + 7);
107  if (negative) b |= static_cast<unsigned char>(0x80);
108  result += char(b);
109  } else {
110  if (exp >= -128 && exp < 127) {
111  result += negative ? char(0x8e) : char(0x0e);
112  result += char(exp + 128);
113  } else {
114  if (exp < -32768 || exp > 32767) {
115  throw Xapian::InternalError("Insane exponent in floating point number");
116  }
117  result += negative ? char(0x8f) : char(0x0f);
118  result += char(unsigned(exp + 32768) & 0xff);
119  result += char(unsigned(exp + 32768) >> 8);
120  }
121  }
122 
123  int maxbytes = min(MAX_MANTISSA_BYTES, 8);
124 
125  size_t n = result.size();
126  do {
127  unsigned char byte = static_cast<unsigned char>(v);
128  result += char(byte);
129  v -= double(byte);
130  v *= 256.0;
131  } while (v != 0.0 && --maxbytes);
132 
133  n = result.size() - n;
134  if (n > 1) {
135  Assert(n <= 8);
136  result[0] = static_cast<unsigned char>(result[0] | ((n - 1) << 4));
137  }
138 
139  return result;
140 }
141 
142 double unserialise_double(const char ** p, const char *end)
143 {
144  if (end - *p < 2) {
145  throw Xapian::SerialisationError("Bad encoded double: insufficient data");
146  }
147  unsigned char first = *(*p)++;
148  if (first == 0 && *(*p) == 0) {
149  ++*p;
150  return 0.0;
151  }
152 
153  bool negative = (first & 0x80) != 0;
154  size_t mantissa_len = ((first >> 4) & 0x07) + 1;
155 
156  int exp = first & 0x0f;
157  if (exp >= 14) {
158  int bigexp = static_cast<unsigned char>(*(*p)++);
159  if (exp == 15) {
160  if (*p == end) {
161  throw Xapian::SerialisationError("Bad encoded double: short large exponent");
162  }
163  exp = bigexp | (static_cast<unsigned char>(*(*p)++) << 8);
164  exp -= 32768;
165  } else {
166  exp = bigexp - 128;
167  }
168  } else {
169  exp -= 7;
170  }
171 
172  if (size_t(end - *p) < mantissa_len) {
173  throw Xapian::SerialisationError("Bad encoded double: short mantissa");
174  }
175 
176  double v = 0.0;
177 
178  static double dbl_max_mantissa = DBL_MAX;
179  static int dbl_max_exp = base256ify_double(dbl_max_mantissa);
180  *p += mantissa_len;
181  if (exp > dbl_max_exp ||
182  (exp == dbl_max_exp &&
183  double(static_cast<unsigned char>((*p)[-1])) > dbl_max_mantissa)) {
184  // The mantissa check should be precise provided that FLT_RADIX
185  // is a power of 2.
186  v = HUGE_VAL;
187  } else {
188  const char *q = *p;
189  while (mantissa_len--) {
190  v *= 0.00390625; // 1/256
191  v += double(static_cast<unsigned char>(*--q));
192  }
193 
194 #if FLT_RADIX == 2
195  if (exp) v = scalbn(v, exp * 8);
196 #elif FLT_RADIX == 16
197  if (exp) v = scalbn(v, exp * 2);
198 #else
199  if (exp) v = ldexp(v, exp * 8);
200 #endif
201 
202 #if 0
203  if (v == 0.0) {
204  // FIXME: handle underflow
205  }
206 #endif
207  }
208 
209  if (negative) v = -v;
210 
211  return v;
212 }
#define Assert(COND)
Definition: omassert.h:122
STL namespace.
unsigned char byte
Definition: header.h:5
static int base256ify_double(double &v)
Hierarchy of classes which Xapian can throw as exceptions.
functions to serialise and unserialise a double
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
InternalError indicates a runtime problem of some sort.
Definition: error.h:761
std::string serialise_double(double v)
Serialise a double to a string.
Various assertion macros.