00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
00023 #define XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
00024
00025 #include <xapian/error.h>
00026
00027 #include <algorithm>
00028 #include <string>
00029
00030
00031
00032
00033
00034 #define MAGIC_XOR_VALUE 96
00035
00036 class PrefixCompressedStringItor {
00037 const unsigned char * p;
00038 size_t left;
00039 std::string current;
00040
00041 PrefixCompressedStringItor(const unsigned char * p_, size_t left_,
00042 const std::string ¤t_)
00043 : p(p_), left(left_), current(current_) { }
00044
00045 public:
00046 PrefixCompressedStringItor(const std::string & s)
00047 : p(reinterpret_cast<const unsigned char *>(s.data())),
00048 left(s.size()) {
00049 if (left) {
00050 operator++();
00051 } else {
00052 p = NULL;
00053 }
00054 }
00055
00056 const std::string & operator*() const {
00057 return current;
00058 }
00059
00060 PrefixCompressedStringItor operator++(int) {
00061 const unsigned char * old_p = p;
00062 size_t old_left = left;
00063 std::string old_current = current;
00064 operator++();
00065 return PrefixCompressedStringItor(old_p, old_left, old_current);
00066 }
00067
00068 PrefixCompressedStringItor & operator++() {
00069 if (left == 0) {
00070 p = NULL;
00071 } else {
00072 if (!current.empty()) {
00073 current.resize(*p++ ^ MAGIC_XOR_VALUE);
00074 --left;
00075 }
00076 size_t add;
00077 if (left == 0 || (add = *p ^ MAGIC_XOR_VALUE) >= left)
00078 throw Xapian::DatabaseCorruptError("Bad spelling data (too little left)");
00079 current.append(reinterpret_cast<const char *>(p + 1), add);
00080 p += add + 1;
00081 left -= add + 1;
00082 }
00083 return *this;
00084 }
00085
00086 bool at_end() const {
00087 return p == NULL;
00088 }
00089 };
00090
00091 class PrefixCompressedStringWriter {
00092 std::string current;
00093 std::string & out;
00094
00095 public:
00096 PrefixCompressedStringWriter(std::string & out_) : out(out_) { }
00097
00098 void append(const std::string & word) {
00099
00100
00101 if (!current.empty()) {
00102 size_t len = std::min(current.size(), word.size());
00103 size_t i;
00104 for (i = 0; i < len; ++i) {
00105 if (current[i] != word[i]) break;
00106 }
00107 out += char(i ^ MAGIC_XOR_VALUE);
00108 out += char((word.size() - i) ^ MAGIC_XOR_VALUE);
00109 out.append(word.data() + i, word.size() - i);
00110 } else {
00111 out += char(word.size() ^ MAGIC_XOR_VALUE);
00112 out += word;
00113 }
00114 current = word;
00115 }
00116 };
00117
00118 struct PrefixCompressedStringItorGt {
00120 bool operator()(const PrefixCompressedStringItor *a,
00121 const PrefixCompressedStringItor *b) {
00122 return (**a > **b);
00123 }
00124 };
00125
00126 #endif // XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H