00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_PACK_H
00022 #define XAPIAN_INCLUDED_PACK_H
00023
00024 #include <cstring>
00025 #include <string>
00026
00027 #include "omassert.h"
00028
00029 #include "xapian/types.h"
00030
00037 const unsigned int SORTABLE_UINT_LOG2_MAX_BYTES = 2;
00038
00040 const unsigned int SORTABLE_UINT_MAX_BYTES = 1 << SORTABLE_UINT_LOG2_MAX_BYTES;
00041
00043 const unsigned int SORTABLE_UINT_1ST_BYTE_MASK =
00044 (0xffu >> SORTABLE_UINT_LOG2_MAX_BYTES);
00045
00051 inline void
00052 pack_bool(std::string & s, bool value)
00053 {
00054 s += char('0' | static_cast<char>(value));
00055 }
00056
00063 inline bool
00064 unpack_bool(const char ** p, const char * end, bool * result)
00065 {
00066 Assert(result);
00067 const char * & ptr = *p;
00068 Assert(ptr);
00069 char ch;
00070 if (rare(ptr == end || ((ch = *ptr++ - '0') &~ 1))) {
00071 ptr = NULL;
00072 return false;
00073 }
00074 *result = static_cast<bool>(ch);
00075 return true;
00076 }
00077
00086 template<class U>
00087 inline void
00088 pack_uint_last(std::string & s, U value)
00089 {
00090
00091 STATIC_ASSERT_UNSIGNED_TYPE(U);
00092
00093 while (value) {
00094 s += char(value & 0xff);
00095 value >>= 8;
00096 }
00097 }
00098
00105 template<class U>
00106 inline bool
00107 unpack_uint_last(const char ** p, const char * end, U * result)
00108 {
00109
00110 STATIC_ASSERT_UNSIGNED_TYPE(U);
00111 Assert(result);
00112
00113 const char * ptr = *p;
00114 Assert(ptr);
00115 *p = end;
00116
00117
00118 if (rare(end - ptr > int(sizeof(U)))) {
00119 return false;
00120 }
00121
00122 *result = 0;
00123 while (end != ptr) {
00124 *result = (*result << 8) | U(static_cast<unsigned char>(*--end));
00125 }
00126
00127 return true;
00128 }
00129
00142 template<class U>
00143 inline void
00144 pack_uint_preserving_sort(std::string & s, U value)
00145 {
00146
00147 STATIC_ASSERT_UNSIGNED_TYPE(U);
00148 STATIC_ASSERT(sizeof(U) <= SORTABLE_UINT_MAX_BYTES);
00149
00150 char tmp[sizeof(U) + 1];
00151 char * p = tmp + sizeof(tmp);
00152
00153 do {
00154 *--p = char(value & 0xff);
00155 value >>= 8;
00156 } while (value &~ SORTABLE_UINT_1ST_BYTE_MASK);
00157
00158 unsigned char len = static_cast<unsigned char>(tmp + sizeof(tmp) - p);
00159 *--p = char((len - 1) << (8 - SORTABLE_UINT_LOG2_MAX_BYTES) | value);
00160 s.append(p, len + 1);
00161 }
00162
00172 template<class U>
00173 inline bool
00174 unpack_uint_preserving_sort(const char ** p, const char * end, U * result)
00175 {
00176
00177 STATIC_ASSERT_UNSIGNED_TYPE(U);
00178 STATIC_ASSERT(sizeof(U) < 256);
00179 Assert(result);
00180
00181 const char * ptr = *p;
00182 Assert(ptr);
00183
00184 if (rare(ptr == end)) {
00185 return false;
00186 }
00187
00188 unsigned char len_byte = static_cast<unsigned char>(*ptr++);
00189 *result = len_byte & SORTABLE_UINT_1ST_BYTE_MASK;
00190 size_t len = (len_byte >> (8 - SORTABLE_UINT_LOG2_MAX_BYTES)) + 1;
00191
00192 if (rare(size_t(end - ptr) < len)) {
00193 return false;
00194 }
00195
00196 end = ptr + len;
00197 *p = end;
00198
00199
00200 if (rare(len > int(sizeof(U)))) {
00201 return false;
00202 }
00203
00204 while (ptr != end) {
00205 *result = (*result << 8) | U(static_cast<unsigned char>(*ptr++));
00206 }
00207
00208 return true;
00209 }
00210
00216 template<class U>
00217 inline void
00218 pack_uint(std::string & s, U value)
00219 {
00220
00221 STATIC_ASSERT_UNSIGNED_TYPE(U);
00222
00223 while (value >= 128) {
00224 s += static_cast<char>(static_cast<unsigned char>(value) | 0x80);
00225 value >>= 7;
00226 }
00227 s += static_cast<char>(value);
00228 }
00229
00236 template<class U>
00237 inline bool
00238 unpack_uint(const char ** p, const char * end, U * result)
00239 {
00240
00241 STATIC_ASSERT_UNSIGNED_TYPE(U);
00242
00243 const char * ptr = *p;
00244 Assert(ptr);
00245 const char * start = ptr;
00246
00247
00248 do {
00249 if (rare(ptr == end)) {
00250
00251 *p = NULL;
00252 return false;
00253 }
00254 } while (static_cast<unsigned char>(*ptr++) >= 128);
00255
00256 *p = ptr;
00257
00258 if (!result) return true;
00259
00260 *result = U(*--ptr);
00261 if (ptr == start) {
00262
00263 return true;
00264 }
00265
00266 size_t maxbits = size_t(ptr - start) * 7;
00267 if (maxbits <= sizeof(U) * 8) {
00268
00269 do {
00270 unsigned char chunk = static_cast<unsigned char>(*--ptr) & 0x7f;
00271 *result = (*result << 7) | U(chunk);
00272 } while (ptr != start);
00273 return true;
00274 }
00275
00276 size_t minbits = maxbits - 6;
00277 if (rare(minbits > sizeof(U) * 8)) {
00278
00279 return false;
00280 }
00281
00282 while (--ptr != start) {
00283 unsigned char chunk = static_cast<unsigned char>(*--ptr) & 0x7f;
00284 *result = (*result << 7) | U(chunk);
00285 }
00286
00287 U tmp = *result;
00288 *result <<= 7;
00289 if (rare(*result < tmp)) {
00290
00291 return false;
00292 }
00293 *result |= U(static_cast<unsigned char>(*ptr) & 0x7f);
00294 return true;
00295 }
00296
00302 inline void
00303 pack_string(std::string & s, const std::string & value)
00304 {
00305 pack_uint(s, value.size());
00306 s += value;
00307 }
00308
00314 inline void
00315 pack_string(std::string & s, const char * ptr)
00316 {
00317 Assert(ptr);
00318 size_t len = std::strlen(ptr);
00319 pack_uint(s, len);
00320 s.append(ptr, len);
00321 }
00322
00329 inline bool
00330 unpack_string(const char ** p, const char * end, std::string & result)
00331 {
00332 size_t len;
00333 if (rare(!unpack_uint(p, end, &len))) {
00334 return false;
00335 }
00336
00337 const char * & ptr = *p;
00338 if (rare(len > size_t(end - ptr))) {
00339 ptr = NULL;
00340 return false;
00341 }
00342
00343 result.assign(ptr, len);
00344 ptr += len;
00345 return true;
00346 }
00347
00364 inline void
00365 pack_string_preserving_sort(std::string & s, const std::string & value,
00366 bool last = false)
00367 {
00368 std::string::size_type b = 0, e;
00369 while ((e = value.find('\0', b)) != std::string::npos) {
00370 ++e;
00371 s.append(value, b, e - b);
00372 s += '\xff';
00373 b = e;
00374 }
00375 s.append(value, b, std::string::npos);
00376 if (!last) s += '\0';
00377 }
00378
00387 inline bool
00388 unpack_string_preserving_sort(const char ** p, const char * end,
00389 std::string & result)
00390 {
00391 result.resize(0);
00392
00393 const char *ptr = *p;
00394 Assert(ptr);
00395
00396 while (ptr != end) {
00397 char ch = *ptr++;
00398 if (rare(ch == '\0')) {
00399 if (usual(ptr == end || *ptr != '\xff')) {
00400 break;
00401 }
00402 ++ptr;
00403 }
00404 result += ch;
00405 }
00406 *p = ptr;
00407 return true;
00408 }
00409
00410 inline std::string
00411 pack_chert_postlist_key(const std::string &term)
00412 {
00413
00414 if (term.empty())
00415 return std::string("\x00\xe0", 2);
00416
00417 std::string key;
00418 pack_string_preserving_sort(key, term, true);
00419 return key;
00420 }
00421
00422 inline std::string
00423 pack_chert_postlist_key(const std::string &term, Xapian::docid did)
00424 {
00425
00426 if (term.empty()) {
00427 std::string key("\x00\xe0", 2);
00428 pack_uint_preserving_sort(key, did);
00429 return key;
00430 }
00431
00432 std::string key;
00433 pack_string_preserving_sort(key, term);
00434 pack_uint_preserving_sort(key, did);
00435 return key;
00436 }
00437
00438 inline std::string
00439 pack_brass_postlist_key(const std::string &term)
00440 {
00441 return pack_chert_postlist_key(term);
00442 }
00443
00444 inline std::string
00445 pack_brass_postlist_key(const std::string &term, Xapian::docid did)
00446 {
00447 return pack_chert_postlist_key(term, did);
00448 }
00449
00450 #endif // XAPIAN_INCLUDED_PACK_H