xapian-core  1.4.26
prefix_compressed_strings.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19  * USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
23 #define XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
24 
25 #include <xapian/error.h>
26 
27 #include <algorithm>
28 #include <string>
29 
30 // We XOR the length values with this so that they are more likely to coincide
31 // with lower case ASCII letters, which are likely to be common. This means
32 // that zlib should do a better job of compressing tag values - in tests, this
33 // gave 5% better compression.
34 #define MAGIC_XOR_VALUE 96
35 
37  const unsigned char * p;
38  size_t left;
39  std::string current;
40 
41  PrefixCompressedStringItor(const unsigned char * p_, size_t left_,
42  const std::string &current_)
43  : p(p_), left(left_), current(current_) { }
44 
46  : p(o.p), left(o.left), current(std::move(o.current)) {}
47 
48  public:
49  explicit PrefixCompressedStringItor(const std::string & s)
50  : p(reinterpret_cast<const unsigned char *>(s.data())),
51  left(s.size()) {
52  if (left) {
53  operator++();
54  } else {
55  p = NULL;
56  }
57  }
58 
59  const std::string & operator*() const {
60  return current;
61  }
62 
64  const unsigned char * old_p = p;
65  size_t old_left = left;
66  std::string old_current = current;
67  operator++();
68  return PrefixCompressedStringItor(old_p, old_left, old_current);
69  }
70 
72  if (left == 0) {
73  p = NULL;
74  } else {
75  if (!current.empty()) {
76  current.resize(*p++ ^ MAGIC_XOR_VALUE);
77  --left;
78  }
79  size_t add;
80  if (left == 0 || (add = *p ^ MAGIC_XOR_VALUE) >= left)
81  throw Xapian::DatabaseCorruptError("Bad spelling data (too little left)");
82  current.append(reinterpret_cast<const char *>(p + 1), add);
83  p += add + 1;
84  left -= add + 1;
85  }
86  return *this;
87  }
88 
89  bool at_end() const {
90  return p == NULL;
91  }
92 };
93 
95  std::string current;
96  std::string & out;
97 
98  public:
99  explicit PrefixCompressedStringWriter(std::string & out_) : out(out_) { }
100 
101  void append(const std::string & word) {
102  // If this isn't the first entry, see how much of the previous one
103  // we can reuse.
104  if (!current.empty()) {
105  size_t len = std::min(current.size(), word.size());
106  size_t i;
107  for (i = 0; i < len; ++i) {
108  if (current[i] != word[i]) break;
109  }
110  out += char(i ^ MAGIC_XOR_VALUE);
111  out += char((word.size() - i) ^ MAGIC_XOR_VALUE);
112  out.append(word.data() + i, word.size() - i);
113  } else {
114  out += char(word.size() ^ MAGIC_XOR_VALUE);
115  out += word;
116  }
117  current = word;
118  }
119 };
120 
124  const PrefixCompressedStringItor *b) const {
125  return (**a > **b);
126  }
127 };
128 
129 #endif // XAPIAN_INCLUDED_PREFIX_COMPRESSED_STRINGS_H
PrefixCompressedStringItor operator++(int)
STL namespace.
PrefixCompressedStringItor(const std::string &s)
bool operator()(const PrefixCompressedStringItor *a, const PrefixCompressedStringItor *b) const
Return true if and only if a&#39;s string is strictly greater than b&#39;s.
PrefixCompressedStringItor(PrefixCompressedStringItor &&o)
Hierarchy of classes which Xapian can throw as exceptions.
PrefixCompressedStringItor(const unsigned char *p_, size_t left_, const std::string &current_)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
PrefixCompressedStringItor & operator++()
const std::string & operator*() const
void append(const std::string &word)
#define MAGIC_XOR_VALUE
PrefixCompressedStringWriter(std::string &out_)