xapian-core  2.0.0
honey_values.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008,2009,2011,2018 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_HONEY_VALUES_H
23 #define XAPIAN_INCLUDED_HONEY_VALUES_H
24 
25 #include "honey_cursor.h"
26 #include "backends/valuestats.h"
27 #include "pack.h"
28 #include "xapian/error.h"
29 #include "xapian/types.h"
30 
31 #include <map>
32 #include <memory>
33 #include <string>
34 
35 namespace Honey {
36 
38 inline std::string
40 {
41  std::string key(1, '\0');
43  key += char(Honey::KEY_VALUE_CHUNK + slot);
44  } else {
45  key += char(Honey::KEY_VALUE_CHUNK_HI);
46  pack_uint_preserving_sort(key, slot);
47  }
48  pack_uint_preserving_sort(key, last_did);
49  return key;
50 }
51 
52 inline Xapian::docid
53 docid_from_key(Xapian::valueno required_slot, const std::string& key)
54 {
55  const char* p = key.data();
56  const char* end = p + key.length();
57  if (end - p < 3 || *p++ != '\0') {
58  // Not a value chunk key.
59  return 0;
60  }
61  unsigned char code = *p++;
62  if (code < Honey::KEY_VALUE_CHUNK || code > Honey::KEY_VALUE_CHUNK_HI) {
63  // Also not a value chunk key.
64  return 0;
65  }
66 
67  Xapian::valueno slot;
68  if (code < Honey::KEY_VALUE_CHUNK_HI) {
69  slot = code - Honey::KEY_VALUE_CHUNK;
70  } else {
71  if (!unpack_uint_preserving_sort(&p, end, &slot))
72  throw Xapian::DatabaseCorruptError("Bad value key");
73  }
74  // Fail if for a different slot.
75  if (slot != required_slot) return 0;
76  Xapian::docid did;
77  if (!unpack_uint_preserving_sort(&p, end, &did))
78  throw Xapian::DatabaseCorruptError("Bad value key");
79  return did;
80 }
81 
82 inline std::string
84 {
85  std::string key(1, '\0');
86  if (slot <= 7) {
87  key += char(Honey::KEY_VALUE_STATS + slot);
88  } else {
89  key += char(Honey::KEY_VALUE_STATS + 7);
90  pack_uint_preserving_sort(key, slot);
91  }
92  return key;
93 }
94 
95 inline static std::string
97  const std::string& lbound,
98  const std::string& ubound)
99 {
100  std::string value;
101  pack_uint(value, freq);
102  pack_string(value, lbound);
103  // We don't store or count empty values, so neither of the bounds
104  // can be empty. So we can safely store an empty upper bound when
105  // the bounds are equal.
106  if (lbound != ubound) value += ubound;
107  return value;
108 }
109 
110 }
111 
112 namespace Xapian {
113  class Document;
114 }
115 
116 class HoneyPostListTable;
117 class HoneyTermListTable;
118 struct ValueStats;
119 
127 
130 
132 
134 
135  std::map<Xapian::docid, std::string> slots;
136 
137  std::map<Xapian::valueno, std::map<Xapian::docid, std::string>> changes;
138 
139  mutable std::unique_ptr<HoneyCursor> cursor;
140 
141  void add_value(Xapian::docid did, Xapian::valueno slot,
142  const std::string& val);
143 
145 
151  Xapian::docid did,
152  std::string& chunk) const;
153 
155  void get_value_stats(Xapian::valueno slot) const;
156 
157  void get_value_stats(Xapian::valueno slot, ValueStats& stats) const;
158 
159  public:
162  HoneyTermListTable& termlist_table_)
163  : postlist_table(postlist_table_),
164  termlist_table(termlist_table_) { }
165 
166  // Merge in batched-up changes.
167  void merge_changes();
168 
169  std::string add_document(Xapian::docid did, const Xapian::Document& doc,
170  std::map<Xapian::valueno, ValueStats>& val_stats);
171 
173  std::map<Xapian::valueno, ValueStats>& val_stats);
174 
175  std::string replace_document(Xapian::docid did,
176  const Xapian::Document& doc,
177  std::map<Xapian::valueno, ValueStats>& val_stats);
178 
179  std::string get_value(Xapian::docid did, Xapian::valueno slot) const;
180 
181  void get_all_values(std::map<Xapian::valueno, std::string>& values,
182  Xapian::docid did) const;
183 
185  if (mru_slot != slot) get_value_stats(slot);
186  return mru_valstats.freq;
187  }
188 
189  std::string get_value_lower_bound(Xapian::valueno slot) const {
190  if (mru_slot != slot) get_value_stats(slot);
191  return mru_valstats.lower_bound;
192  }
193 
194  std::string get_value_upper_bound(Xapian::valueno slot) const {
195  if (mru_slot != slot) get_value_stats(slot);
196  return mru_valstats.upper_bound;
197  }
198 
206  void set_value_stats(std::map<Xapian::valueno, ValueStats>& val_stats);
207 
208  void reset() {
211  }
212 
213  bool is_modified() const {
214  return !changes.empty();
215  }
216 
217  void cancel() {
218  // Discard batched-up changes.
219  slots.clear();
220  changes.clear();
221  }
222 };
223 
224 namespace Honey {
225 
227  const char* p;
228  const char* end;
229 
231 
232  std::string value;
233 
234  public:
236  ValueChunkReader() : p(NULL) { }
237 
238  ValueChunkReader(const char* p_, size_t len, Xapian::docid last_did) {
239  assign(p_, len, last_did);
240  }
241 
242  void assign(const char* p_, size_t len, Xapian::docid last_did);
243 
244  bool at_end() const { return p == NULL; }
245 
246  Xapian::docid get_docid() const { return did; }
247 
248  const std::string& get_value() const { return value; }
249 
250  void next();
251 
252  void skip_to(Xapian::docid target);
253 };
254 
255 }
256 
257 #endif // XAPIAN_INCLUDED_HONEY_VALUES_H
ValueStats mru_valstats
The most recently used value statistics.
Definition: honey_values.h:129
void add_value(Xapian::docid did, Xapian::valueno slot, const std::string &val)
std::string get_value_upper_bound(Xapian::valueno slot) const
Definition: honey_values.h:194
std::map< Xapian::valueno, std::map< Xapian::docid, std::string > > changes
Definition: honey_values.h:137
bool is_modified() const
Definition: honey_values.h:213
std::unique_ptr< HoneyCursor > cursor
Definition: honey_values.h:139
HoneyTermListTable & termlist_table
Definition: honey_values.h:133
void get_all_values(std::map< Xapian::valueno, std::string > &values, Xapian::docid did) const
std::string get_value_lower_bound(Xapian::valueno slot) const
Definition: honey_values.h:189
std::string get_value(Xapian::docid did, Xapian::valueno slot) const
void remove_value(Xapian::docid did, Xapian::valueno slot)
void set_value_stats(std::map< Xapian::valueno, ValueStats > &val_stats)
Write the updated statistics to the table.
std::string add_document(Xapian::docid did, const Xapian::Document &doc, std::map< Xapian::valueno, ValueStats > &val_stats)
void get_value_stats(Xapian::valueno slot) const
Get the statistics for value slot slot.
Xapian::valueno mru_slot
The value number for the most recently used value statistics.
Definition: honey_values.h:126
void delete_document(Xapian::docid did, std::map< Xapian::valueno, ValueStats > &val_stats)
std::map< Xapian::docid, std::string > slots
Definition: honey_values.h:135
HoneyPostListTable & postlist_table
Definition: honey_values.h:131
std::string replace_document(Xapian::docid did, const Xapian::Document &doc, std::map< Xapian::valueno, ValueStats > &val_stats)
Xapian::docid get_chunk_containing_did(Xapian::valueno slot, Xapian::docid did, std::string &chunk) const
Move the cursor to the chunk containing did.
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Definition: honey_values.h:184
HoneyValueManager(HoneyPostListTable &postlist_table_, HoneyTermListTable &termlist_table_)
Create a new HoneyValueManager object.
Definition: honey_values.h:161
void assign(const char *p_, size_t len, Xapian::docid last_did)
Definition: honey_values.cc:51
ValueChunkReader()
Create a ValueChunkReader which is already at_end().
Definition: honey_values.h:236
const std::string & get_value() const
Definition: honey_values.h:248
ValueChunkReader(const char *p_, size_t len, Xapian::docid last_did)
Definition: honey_values.h:238
void skip_to(Xapian::docid target)
Definition: honey_values.cc:81
Xapian::docid get_docid() const
Definition: honey_values.h:246
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
Class representing a document.
Definition: document.h:64
PositionList * p
Hierarchy of classes which Xapian can throw as exceptions.
HoneyCursor class.
static std::string encode_valuestats(Xapian::doccount freq, const std::string &lbound, const std::string &ubound)
Definition: honey_values.h:96
std::string make_valuechunk_key(Xapian::valueno slot, Xapian::docid last_did)
Generate a key for a value stream chunk.
Definition: honey_values.h:39
Xapian::docid docid_from_key(const std::string &key)
std::string make_valuestats_key(Xapian::valueno slot)
Definition: honey_values.h:83
@ KEY_VALUE_CHUNK_HI
Definition: honey_defs.h:84
@ KEY_VALUE_CHUNK
Definition: honey_defs.h:83
@ KEY_VALUE_STATS
Definition: honey_defs.h:81
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:100
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Pack types into strings and unpack them again.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
Definition: pack.h:442
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
Definition: pack.h:251
void pack_uint_preserving_sort(std::string &s, U value)
Append an encoded unsigned integer to a string, preserving the sort order.
Definition: pack.h:204
Class to hold statistics for a given slot.
Definition: valuestats.h:28
std::string lower_bound
A lower bound on the values stored in the given value slot.
Definition: valuestats.h:36
std::string upper_bound
An upper bound on the values stored in the given value slot.
Definition: valuestats.h:40
Xapian::doccount freq
The number of documents which have a (non-empty) value stored in the slot.
Definition: valuestats.h:32
typedefs for Xapian
Statistics about values.