xapian-core  1.4.25
bitstream.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2008,2012,2013,2014,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19  * USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_BITSTREAM_H
23 #define XAPIAN_INCLUDED_BITSTREAM_H
24 
25 #include <xapian/types.h>
26 
27 #include <string>
28 #include <vector>
29 
30 namespace Xapian {
31 
33 class BitWriter {
34  std::string buf;
35  int n_bits;
37 
38  public:
40  BitWriter() : n_bits(0), acc(0) { }
41 
43  explicit BitWriter(const std::string& seed)
44  : buf(seed), n_bits(0), acc(0) { }
45 
47  void encode(Xapian::termpos value, Xapian::termpos outof);
48 
50  std::string& freeze() {
51  if (n_bits) {
52  buf += char(acc);
53  n_bits = 0;
54  acc = 0;
55  }
56  return buf;
57  }
58 
60  void encode_interpolative(const std::vector<Xapian::termpos>& pos, int j, int k);
61 };
62 
64 class BitReader {
65  std::string buf;
66  size_t idx;
67  int n_bits;
69 
70  Xapian::termpos read_bits(int count);
71 
72  struct DIStack {
73  int j, k;
75  };
76 
77  struct DIState : public DIStack {
79 
80  void set_j(int j_, Xapian::termpos pos_j_) {
81  j = j_;
82  pos_j = pos_j_;
83  }
84  void set_k(int k_, Xapian::termpos pos_k_) {
85  k = k_;
86  pos_k = pos_k_;
87  }
88  void uninit() {
89  j = 1;
90  k = 0;
91  }
92  DIState() { uninit(); }
93  DIState(int j_, int k_,
94  Xapian::termpos pos_j_, Xapian::termpos pos_k_) {
95  set_j(j_, pos_j_);
96  set_k(k_, pos_k_);
97  }
98  void operator=(const DIStack& o) {
99  j = o.j;
100  set_k(o.k, o.pos_k);
101  }
102  bool is_next() const { return j + 1 < k; }
103  bool is_initialized() const {
104  return j <= k;
105  }
106  // Given pos[j] = pos_j and pos[k] = pos_k, how many possible position
107  // values are there for the value midway between?
109  return pos_k - pos_j - Xapian::termpos(k - j) + 1;
110  }
111  };
112 
113  std::vector<DIStack> di_stack;
115 
116  public:
117  // Construct.
118  BitReader() { }
119 
120  // Construct with the contents of buf_.
121  explicit BitReader(const std::string &buf_)
122  : buf(buf_), idx(0), n_bits(0), acc(0) { }
123 
124  // Construct with the contents of buf_, skipping some bytes.
125  BitReader(const std::string &buf_, size_t skip)
126  : buf(buf_, skip), idx(0), n_bits(0), acc(0) { }
127 
128  // Initialise from buf_, optionally skipping some bytes.
129  void init(const std::string &buf_, size_t skip = 0) {
130  buf.assign(buf_, skip, std::string::npos);
131  idx = 0;
132  n_bits = 0;
133  acc = 0;
134  di_stack.clear();
135  di_current.uninit();
136  }
137 
138  // Decode value, known to be less than outof.
139  Xapian::termpos decode(Xapian::termpos outof, bool force = false);
140 
141  // Check all the data has been read. Because it'll be zero padded
142  // to fill a byte, the best we can actually do is check that
143  // there's less than a byte left and that all remaining bits are
144  // zero.
145  bool check_all_gone() const {
146  return (idx == buf.size() && n_bits <= 7 && acc == 0);
147  }
148 
150  void decode_interpolative(int j, int k,
151  Xapian::termpos pos_j, Xapian::termpos pos_k);
152 
154  Xapian::termpos decode_interpolative_next();
155 };
156 
157 }
158 
159 using Xapian::BitWriter;
160 using Xapian::BitReader;
161 
162 #endif // XAPIAN_INCLUDED_BITSTREAM_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
typedefs for Xapian
DIState(int j_, int k_, Xapian::termpos pos_j_, Xapian::termpos pos_k_)
Definition: bitstream.h:93
BitWriter()
Construct empty.
Definition: bitstream.h:40
static int decode(const T(&table)[N], const char *s)
Decode a string to an integer.
Definition: quest.cc:70
void operator=(const DIStack &o)
Definition: bitstream.h:98
Xapian::termpos pos_j
Definition: bitstream.h:78
std::vector< DIStack > di_stack
Definition: bitstream.h:113
Xapian::termpos acc
Definition: bitstream.h:36
void encode_interpolative(const std::vector< Xapian::termpos > &pos, int j, int k)
Perform interpolative encoding of pos elements between j and k.
Definition: bitstream.cc:157
void init(const std::string &buf_, size_t skip=0)
Definition: bitstream.h:129
BitReader(const std::string &buf_)
Definition: bitstream.h:121
bool is_initialized() const
Definition: bitstream.h:103
bool check_all_gone() const
Definition: bitstream.h:145
void set_j(int j_, Xapian::termpos pos_j_)
Definition: bitstream.h:80
Xapian::termpos pos_k
Definition: bitstream.h:74
BitWriter(const std::string &seed)
Construct with the contents of seed already in the stream.
Definition: bitstream.h:43
Read a stream created by BitWriter.
Definition: bitstream.h:64
static int seed
Definition: stemtest.cc:45
Xapian::termpos outof() const
Definition: bitstream.h:108
void set_k(int k_, Xapian::termpos pos_k_)
Definition: bitstream.h:84
std::string buf
Definition: bitstream.h:65
BitReader(const std::string &buf_, size_t skip)
Definition: bitstream.h:125
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
Create a stream to which non-byte-aligned values can be written.
Definition: bitstream.h:33
DIState di_current
Definition: bitstream.h:114
void encode(Xapian::termpos value, Xapian::termpos outof)
Encode value, known to be less than outof.
Definition: bitstream.cc:95
Xapian::termpos acc
Definition: bitstream.h:68
std::string & freeze()
Finish encoding and return the encoded data as a std::string.
Definition: bitstream.h:50
std::string buf
Definition: bitstream.h:34