xapian-core  1.4.25
prefix_compressed_strings.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2010 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19  * USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
23 #define XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
24 
25 #include <xapian/error.h>
26 
27 #include <algorithm>
28 #include <string>
29 
30 // We XOR the length values with this so that they are more likely to coincide
31 // with lower case ASCII letters, which are likely to be common. This means
32 // that zlib should do a better job of compressing tag values - in tests, this
33 // gave 5% better compression.
34 #define MAGIC_XOR_VALUE 96
35 
37  const unsigned char * p;
38  size_t left;
39  std::string current;
40 
41  PrefixCompressedStringItor(const unsigned char * p_, size_t left_,
42  const std::string &current_)
43  : p(p_), left(left_), current(current_) { }
44 
45  public:
46  explicit PrefixCompressedStringItor(const std::string & s)
47  : p(reinterpret_cast<const unsigned char *>(s.data())),
48  left(s.size()) {
49  if (left) {
50  operator++();
51  } else {
52  p = NULL;
53  }
54  }
55 
56  const std::string & operator*() const {
57  return current;
58  }
59 
61  const unsigned char * old_p = p;
62  size_t old_left = left;
63  std::string old_current = current;
64  operator++();
65  return PrefixCompressedStringItor(old_p, old_left, old_current);
66  }
67 
69  if (left == 0) {
70  p = NULL;
71  } else {
72  if (!current.empty()) {
73  current.resize(*p++ ^ MAGIC_XOR_VALUE);
74  --left;
75  }
76  size_t add;
77  if (left == 0 || (add = *p ^ MAGIC_XOR_VALUE) >= left)
78  throw Xapian::DatabaseCorruptError("Bad spelling data (too little left)");
79  current.append(reinterpret_cast<const char *>(p + 1), add);
80  p += add + 1;
81  left -= add + 1;
82  }
83  return *this;
84  }
85 
86  bool at_end() const {
87  return p == NULL;
88  }
89 };
90 
92  std::string current;
93  std::string & out;
94 
95  public:
96  explicit PrefixCompressedStringWriter(std::string & out_) : out(out_) { }
97 
98  void append(const std::string & word) {
99  // If this isn't the first entry, see how much of the previous one
100  // we can reuse.
101  if (!current.empty()) {
102  size_t len = std::min(current.size(), word.size());
103  size_t i;
104  for (i = 0; i < len; ++i) {
105  if (current[i] != word[i]) break;
106  }
107  out += char(i ^ MAGIC_XOR_VALUE);
108  out += char((word.size() - i) ^ MAGIC_XOR_VALUE);
109  out.append(word.data() + i, word.size() - i);
110  } else {
111  out += char(word.size() ^ MAGIC_XOR_VALUE);
112  out += word;
113  }
114  current = word;
115  }
116 };
117 
121  const PrefixCompressedStringItor *b) const {
122  return (**a > **b);
123  }
124 };
125 
126 #endif // XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
PrefixCompressedStringItor operator++(int)
PrefixCompressedStringItor(const std::string &s)
bool operator()(const PrefixCompressedStringItor *a, const PrefixCompressedStringItor *b) const
Return true if and only if a&#39;s string is strictly greater than b&#39;s.
Hierarchy of classes which Xapian can throw as exceptions.
PrefixCompressedStringItor(const unsigned char *p_, size_t left_, const std::string &current_)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
PrefixCompressedStringItor & operator++()
const std::string & operator*() const
void append(const std::string &word)
#define MAGIC_XOR_VALUE
PrefixCompressedStringWriter(std::string &out_)