xapian-core  2.0.0
honey_cursor.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2017,2018,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 //#define DEBUGGING
24 
25 #include "honey_cursor.h"
26 
27 #include <cerrno>
28 #include <string>
29 
30 #ifdef DEBUGGING
31 # include <iostream>
32 #endif
33 
34 using namespace std;
35 
36 bool
38 {
39  if (is_at_end) {
40  Assert(false);
41  return false;
42  }
43 
44  if (val_size) {
45  // Skip val data we've not looked at.
46  store.skip(val_size);
47  val_size = 0;
48  }
49 
50  if (store.get_pos() >= root) {
51  AssertEq(store.get_pos(), root);
52  is_at_end = true;
53  return false;
54  }
55 
56  int ch = store.read();
57  if (ch == EOF) {
58  // The root check above should mean this can't legitimately happen.
59  throw Xapian::DatabaseCorruptError("EOF reading key");
60  }
61 
62  size_t reuse = ch;
63  if (reuse > last_key.size()) {
64  throw Xapian::DatabaseCorruptError("Reuse > previous key size");
65  }
66  ch = store.read();
67  if (ch == EOF) {
68  throw Xapian::DatabaseError("EOF/error while reading key length",
69  errno);
70  }
71  size_t key_size = ch;
72  char buf[256];
73  store.read(buf, key_size);
74  current_key.assign(last_key, 0, reuse);
75  current_key.append(buf, key_size);
76  last_key = current_key;
77 
78 #ifdef DEBUGGING
79  {
80  string esc;
81  description_append(esc, current_key);
82  cerr << "K:" << esc << endl;
83  }
84 #endif
85 
86  return next_from_index();
87 }
88 
89 bool
91 {
92  char buf[8];
93  int r;
94  {
95  // FIXME: rework to take advantage of buffering that's happening
96  // anyway?
97  char* p = buf;
98  for (int i = 0; i < 8; ++i) {
99  int ch2 = store.read();
100  if (ch2 == EOF) {
101  break;
102  }
103  *p++ = char(ch2);
104  if (ch2 < 128) break;
105  }
106  r = p - buf;
107  }
108  const char* p = buf;
109  const char* end = p + r;
110  if (!unpack_uint(&p, end, &val_size)) {
111  throw Xapian::DatabaseError("val_size unpack_uint invalid");
112  }
113  if (p != end) abort();
114  current_compressed = val_size & 1;
115  val_size >>= 1;
116 
117  // FIXME: Always resize to 0? Not doing so avoids always having to clear
118  // all the data before reading it.
119  if (true && val_size == 0)
120  current_tag.resize(0);
121 
122  is_at_end = false;
123  return true;
124 }
125 
126 bool
127 HoneyCursor::read_tag(bool keep_compressed)
128 {
129  if (val_size) {
130  if (store.was_forced_closed()) {
132  }
133 
134  current_tag.resize(val_size);
135  store.read(&(current_tag[0]), val_size);
136 #ifdef DEBUGGING
137  {
138  cerr << "read " << val_size << " bytes of value data ending @"
139  << store.get_pos() << endl;
140  string esc;
141  description_append(esc, current_tag);
142  cerr << "V:" << esc << endl;
143  }
144 #endif
145  val_size = 0;
146  }
147  if (!keep_compressed && current_compressed) {
148  // Need to decompress.
149  comp_stream.decompress_start();
150  string new_tag;
151  if (!comp_stream.decompress_chunk(current_tag.data(),
152  current_tag.size(),
153  new_tag)) {
154  // Decompression didn't complete.
155  abort();
156  }
157  swap(current_tag, new_tag);
158  current_compressed = false;
159 #ifdef DEBUGGING
160  {
161  cerr << "decompressed to " << current_tag.size()
162  << "bytes of value data" << endl;
163  }
164 #endif
165  }
166  return current_compressed;
167 }
168 
169 bool
170 HoneyCursor::do_find(string_view key, bool greater_than)
171 {
172  // FIXME: Actually use this!
173  (void)greater_than;
174 
175 #ifdef DEBUGGING
176  {
177  string esc;
178  description_append(esc, key);
179  cerr << "do_find(" << esc << ", " << greater_than << ") @"
180  << store.get_pos() << endl;
181  }
182 #endif
183 
184  if (key.empty()) {
185  rewind();
186  do_next();
187  return false;
188  }
189 
190  bool use_index = true;
191  if (!is_at_end && !last_key.empty() && last_key[0] == key[0]) {
192  int cmp0 = last_key.compare(key);
193  if (cmp0 == 0) {
194  current_key = last_key;
195  return true;
196  }
197  if (cmp0 < 0) {
198  // We're going forwards to a key with the same first character, so
199  // an array index won't help us.
200  use_index = false;
201  }
202  }
203 
204  if (store.was_forced_closed()) {
206  }
207 
208  if (use_index) {
209  store.rewind(root);
210  int index_type = store.read();
211  switch (index_type) {
212  case EOF:
213  return false;
214  case 0x00: {
215  unsigned char first = key[0] - store.read();
216  unsigned char range = store.read();
217  if (first > range) {
218  is_at_end = true;
219  return false;
220  }
221  store.skip(first * 4); // FIXME: pointer width
222  off_t jump = store.read_uint4_be();
223  store.rewind(jump);
224  // The jump point will be an entirely new key (because it is
225  // the first key with that initial character), and we drop in
226  // as if this was the first key so set last_key to be empty.
227  last_key = string();
228  break;
229  }
230  case 0x01: {
231  size_t j = store.read_uint4_be();
232  if (j == 0) {
233  is_at_end = true;
234  return false;
235  }
236  off_t base = store.get_pos();
238  size_t kkey_len = 0;
239  size_t i = 0;
240  while (j - i > 1) {
241  size_t k = i + (j - i) / 2;
242  store.set_pos(base + k * SSTINDEX_BINARY_CHOP_ENTRY_SIZE);
243  store.read(kkey, SSTINDEX_BINARY_CHOP_KEY_SIZE);
244  kkey_len = 4;
245  while (kkey_len > 0 && kkey[kkey_len - 1] == '\0')
246  --kkey_len;
247  int r = key.compare(0, SSTINDEX_BINARY_CHOP_KEY_SIZE,
248  kkey, kkey_len);
249  if (r < 0) {
250  j = k;
251  } else {
252  i = k;
253  if (r == 0) {
254  break;
255  }
256  }
257  }
258  store.set_pos(base + i * SSTINDEX_BINARY_CHOP_ENTRY_SIZE);
259  store.read(kkey, SSTINDEX_BINARY_CHOP_KEY_SIZE);
260  kkey_len = 4;
261  while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
262  off_t jump = store.read_uint4_be();
263  store.rewind(jump);
264  // The jump point is to the first key with prefix kkey, so will
265  // work if we set last key to kkey. Unless we're jumping to the
266  // start of the table, in which case last_key needs to be empty.
267  last_key.assign(kkey, jump == 0 ? 0 : kkey_len);
268  break;
269  }
270  case 0x02: {
271  // FIXME: If "close" just seek forwards? Or consider seeking
272  // from current index pos?
273  // off_t pos = store.get_pos();
274  string index_key, prev_index_key;
275  make_unsigned_t<off_t> ptr = 0;
276  int cmp0 = 1;
277 #ifdef DEBUGGING
278  {
279  cerr << "Using skiplist index\n";
280  }
281 #endif
282  while (true) {
283  int reuse = store.read();
284  if (reuse == EOF) break;
285  int len = store.read();
286  if (len == EOF) abort(); // FIXME
287 #ifdef DEBUGGING
288  {
289  cerr << "reuse = " << reuse << " len = " << len << endl;
290  }
291 #endif
292  index_key.resize(reuse + len);
293  store.read(&index_key[reuse], len);
294 
295 #ifdef DEBUGGING
296  {
297  string desc;
298  description_append(desc, index_key);
299  cerr << "Index key: " << desc << endl;
300  }
301 #endif
302 
303  cmp0 = index_key.compare(key);
304  if (cmp0 > 0) {
305  index_key = prev_index_key;
306  break;
307  }
308  char buf[8];
309  char* e = buf;
310  while (true) {
311  int b = store.read();
312  *e++ = b;
313  if ((b & 0x80) == 0) break;
314  }
315  const char* p = buf;
316  if (!unpack_uint(&p, e, &ptr) || p != e) abort(); // FIXME
317 #ifdef DEBUGGING
318  {
319  cerr << " -> " << ptr << endl;
320  }
321 #endif
322  if (cmp0 == 0)
323  break;
324  prev_index_key = index_key;
325 #ifdef DEBUGGING
326  {
327  string desc;
328  description_append(desc, prev_index_key);
329  cerr << "prev_index_key -> " << desc << endl;
330  }
331 #endif
332  }
333 #ifdef DEBUGGING
334  {
335  string desc;
336  description_append(desc, index_key);
337  cerr << " index_key = " << desc << ", cmp0 = " << cmp0
338  << ", going to " << ptr << endl;
339  }
340 #endif
341  store.set_pos(ptr);
342 
343  if (ptr != 0) {
344  last_key = current_key = index_key;
345  bool res = next_from_index();
346  (void)res;
347  Assert(res);
348  if (cmp0 == 0) {
349  Assert(ptr != 0);
350  return true;
351  }
352  store.skip(val_size);
353  } else {
354  last_key = current_key = string();
355  }
356 
357 #ifdef DEBUGGING
358  {
359  string desc;
360  description_append(desc, current_key);
361  cerr << "cmp0 was " << cmp0
362  << ", Dropped to data layer on key: " << desc << endl;
363  }
364 #endif
365 
366  break;
367  }
368  default: {
369  string m = "HoneyCursor: Unknown index type ";
370  m += str(index_type);
372  }
373  }
374  is_at_end = false;
375  val_size = 0;
376  }
377 
378  while (do_next()) {
379  int cmp = current_key.compare(key);
380  if (cmp == 0) return true;
381  if (cmp > 0) break;
382  }
383  return false;
384 }
385 
386 bool
388 {
389  if (store.was_forced_closed()) {
391  }
392 
393  string key;
394  if (is_at_end) {
395  // To position on the last key we just do a < search for a key greater
396  // than any possible key - one longer than the longest possible length
397  // and consisting entirely of the highest sorting byte value.
398  key.assign(HONEY_MAX_KEY_LENGTH + 1, '\xff');
399  } else {
400  if (current_key.empty())
401  return false;
402  key = current_key;
403  }
404 
405  // FIXME: use index - for an array index we can look at index points for
406  // first characters starting with key[0] and working down; for a binary
407  // chop index we can start at the entry including the current key, or the
408  // one before if this is the first key for that index entry; for a skiplist
409  // index we can find the previous entry at the index level above.
410  rewind();
411 
412  off_t pos;
413  string k;
414  size_t vs;
415  bool compressed;
416  do {
417  pos = store.get_pos();
418  k = current_key;
419  vs = val_size;
420  compressed = current_compressed;
421  } while (do_next() && current_key < key);
422 
423  // Back up to previous entry.
424  is_at_end = false;
425  last_key = current_key = k;
426  val_size = vs;
427  current_compressed = compressed;
428  store.set_pos(pos);
429 
430  return true;
431 }
bool read_tag(bool keep_compressed=false)
bool do_find(std::string_view key, bool greater_than)
Search for key.
bool next_from_index()
Handle the value part of the (key,value).
Definition: honey_cursor.cc:90
bool prev()
Move to the item before the current one.
bool do_next()
Definition: honey_cursor.cc:37
static void throw_database_closed()
Definition: honey_table.h:689
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
DatabaseError indicates some sort of database related error.
Definition: error.h:355
PositionList * p
Xapian::termpos pos
HoneyCursor class.
#define HONEY_MAX_KEY_LENGTH
Maximum key length.
Definition: honey_defs.h:39
#define SSTINDEX_BINARY_CHOP_ENTRY_SIZE
Definition: honey_table.h:34
#define SSTINDEX_BINARY_CHOP_KEY_SIZE
Definition: honey_table.h:32
string str(int value)
Convert int to std::string.
Definition: str.cc:91
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346
void description_append(std::string &desc, std::string_view s)
Definition: unittest.cc:105