xapian-core  2.0.0
honey_version.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2006-2024 Olly Betts
5  * Copyright (C) 2011 Dan Colish
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_HONEY_VERSION_H
23 #define XAPIAN_INCLUDED_HONEY_VERSION_H
24 
25 #include "honey_defs.h"
26 
27 #include "omassert.h"
28 
29 #include <algorithm>
30 #include <string>
31 #include <string_view>
32 
33 #include "backends/uuids.h"
34 #include "internaltypes.h"
35 #include "min_non_zero.h"
36 #include "xapian/types.h"
37 
38 namespace Honey {
39 
40 class RootInfo {
41  off_t offset;
42  off_t root;
46  std::string fl_serialised;
47 
48  public:
49  void init(uint4 compress_min_);
50 
51  void serialise(std::string& s) const;
52 
53  bool unserialise(const char** p, const char* end);
54 
55  off_t get_offset() const { return offset; }
56  off_t get_root() const { return root; }
58  uint4 get_compress_min() const { return compress_min; }
59  const std::string& get_free_list() const { return fl_serialised; }
60 
62  void set_offset(off_t offset_) { offset = offset_; }
63  void set_root(off_t root_) { root = root_; }
64  void set_free_list(const std::string& s) { fl_serialised = s; }
65 };
66 
67 }
68 
70 #define HONEY_VERSION_MAX_SIZE 1024
71 
79 class HoneyVersion {
81 
84 
87 
96  int fd;
97 
102  off_t offset = 0;
103 
105  std::string db_dir;
106 
109 
112 
115 
118 
121 
124 
127 
130 
135 
140 
142  std::string serialised_stats;
143 
144  // Serialise the database stats.
145  void serialise_stats();
146 
147  // Unserialise the database stats.
148  void unserialise_stats();
149 
150  public:
151  explicit HoneyVersion(std::string_view db_dir_)
152  : fd(-1), db_dir(db_dir_) { }
153 
154  explicit HoneyVersion(int fd_);
155 
156  ~HoneyVersion();
157 
159  void create();
160 
165  void read();
166 
167  void cancel();
168 
169  const std::string write(honey_revision_number_t new_rev, int flags);
170 
171  bool sync(const std::string& tmpfile,
172  honey_revision_number_t new_rev, int flags);
173 
175 
177  return root[tbl];
178  }
179 
181  return &root[tbl];
182  }
183 
185  const char* get_uuid() const {
186  return uuid.data();
187  }
188 
190  std::string get_uuid_string() const {
191  return uuid.to_string();
192  }
193 
195 
197 
199 
201  return doclen_lbound;
202  }
203 
205  return doclen_ubound;
206  }
207 
209 
212  }
213 
215  return oldest_changeset;
216  }
217 
219  return uniq_terms_lbound;
220  }
221 
223  return uniq_terms_ubound;
224  }
225 
227 
229  oldest_changeset = changeset;
230  }
231 
234  }
235 
237  uniq_terms_lbound = ub;
238  }
239 
241  uniq_terms_ubound = ub;
242  }
243 
245  ++doccount;
247  doclen_ubound = std::max(doclen_ubound, doclen);
248  total_doclen += doclen;
249  }
250 
252  --doccount;
253  total_doclen -= doclen;
254  // If the database no longer contains any postings, we can reset
255  // doclen_lbound, doclen_ubound and wdf_ubound.
256  if (total_doclen == 0) {
257  doclen_lbound = 0;
258  doclen_ubound = 0;
259  wdf_ubound = 0;
260  }
261  }
262 
264  if (wdf > wdf_ubound) wdf_ubound = wdf;
265  }
266 
268 
273  void merge_stats(const HoneyVersion& o);
274 
275  void merge_stats(Xapian::doccount o_doccount,
276  Xapian::termcount o_doclen_lbound,
277  Xapian::termcount o_doclen_ubound,
278  Xapian::termcount o_wdf_ubound,
279  Xapian::totallength o_total_doclen,
280  Xapian::termcount o_spelling_wordfreq_ubound,
281  Xapian::termcount o_uniq_terms_lbound,
282  Xapian::termcount o_uniq_terms_ubound);
283 
284  bool single_file() const { return db_dir.empty(); }
285 
286  off_t get_offset() const { return offset; }
287 };
288 
289 #endif // XAPIAN_INCLUDED_HONEY_VERSION_H
The HoneyVersion class manages the revision files.
Definition: honey_version.h:79
void create()
Create the version file.
void set_spelling_wordfreq_upper_bound(Xapian::termcount ub)
Xapian::termcount get_spelling_wordfreq_upper_bound() const
void check_wdf(Xapian::termcount wdf)
void delete_document(Xapian::termcount doclen)
Xapian::doccount doccount
The number of documents in the database.
Xapian::termcount doclen_lbound
A lower bound on the smallest document length in this database.
honey_revision_number_t oldest_changeset
Oldest changeset removed when max_changesets is set.
honey_revision_number_t get_oldest_changeset() const
Xapian::termcount doclen_ubound
An upper bound on the greatest document length in this database.
void merge_stats(const HoneyVersion &o)
Merge the database stats.
std::string get_uuid_string() const
Return UUID in the standard 36 character string format.
off_t offset
Offset into the file at which the version data starts.
bool sync(const std::string &tmpfile, honey_revision_number_t new_rev, int flags)
Xapian::termcount get_doclength_lower_bound() const
Xapian::docid get_last_docid() const
const std::string write(honey_revision_number_t new_rev, int flags)
Xapian::totallength get_total_doclen() const
Xapian::termcount get_unique_terms_upper_bound() const
void set_unique_terms_upper_bound(Xapian::termcount ub)
honey_revision_number_t rev
Definition: honey_version.h:80
std::string db_dir
The database directory.
Xapian::docid last_docid
Greatest document id ever used in this database.
Xapian::totallength total_doclen
The total of the lengths of all documents in the database.
int fd
File descriptor.
Definition: honey_version.h:96
Honey::RootInfo old_root[Honey::MAX_]
Definition: honey_version.h:83
HoneyVersion(std::string_view db_dir_)
Xapian::termcount spelling_wordfreq_ubound
An upper bound on the spelling wordfreq in this database.
Xapian::termcount get_unique_terms_lower_bound() const
off_t get_offset() const
Honey::RootInfo * root_to_set(Honey::table_type tbl)
void set_unique_terms_lower_bound(Xapian::termcount ub)
void unserialise_stats()
void set_last_docid(Xapian::docid did)
honey_revision_number_t get_revision() const
void read()
Read the version file and check it's a version we understand.
Xapian::termcount get_wdf_upper_bound() const
bool single_file() const
std::string serialised_stats
The serialised database stats.
void serialise_stats()
const char * get_uuid() const
Return pointer to 16 byte UUID.
void add_document(Xapian::termcount doclen)
Honey::RootInfo root[Honey::MAX_]
Definition: honey_version.h:82
Xapian::doccount get_doccount() const
Xapian::termcount uniq_terms_ubound
An upper bound on the number of unique terms in a document in this database.
Xapian::docid get_next_docid()
void set_oldest_changeset(honey_revision_number_t changeset) const
Xapian::termcount uniq_terms_lbound
A lower bound on the number of unique terms in a document in this database.
Xapian::termcount get_doclength_upper_bound() const
Xapian::termcount wdf_ubound
An upper bound on the greatest wdf in this database.
const Honey::RootInfo & get_root(Honey::table_type tbl) const
Uuid uuid
The UUID of this database.
Definition: honey_version.h:86
void init(uint4 compress_min_)
bool unserialise(const char **p, const char *end)
uint4 compress_min
Should be >= 4 or 0 for no compression.
Definition: honey_version.h:45
const std::string & get_free_list() const
Definition: honey_version.h:59
void set_free_list(const std::string &s)
Definition: honey_version.h:64
void set_num_entries(honey_tablesize_t n)
Definition: honey_version.h:61
std::string fl_serialised
Definition: honey_version.h:46
void serialise(std::string &s) const
uint4 get_compress_min() const
Definition: honey_version.h:58
honey_tablesize_t get_num_entries() const
Definition: honey_version.h:57
off_t get_offset() const
Definition: honey_version.h:55
void set_offset(off_t offset_)
Definition: honey_version.h:62
void set_root(off_t root_)
Definition: honey_version.h:63
off_t get_root() const
Definition: honey_version.h:56
honey_tablesize_t num_entries
Definition: honey_version.h:43
Definition: uuids.h:28
std::string to_string() const
Definition: uuids.cc:120
const char * data() const
Definition: uuids.h:60
PositionList * p
Definitions, types, etc for use inside honey.
unsigned long long honey_tablesize_t
How many entries there are in a table.
Definition: honey_defs.h:107
uint4 honey_revision_number_t
The revision number of a honey database.
Definition: honey_defs.h:104
Types used internally.
uint32_t uint4
Definition: internaltypes.h:31
Return the smaller of two numbers which isn't zero.
constexpr std::enable_if_t< std::is_unsigned_v< T >, T > min_non_zero(const T &a, const T &b)
Return the smaller of two unsigned integers which isn't zero.
Definition: min_non_zero.h:39
table_type
Definition: honey_defs.h:68
@ MAX_
Definition: honey_defs.h:75
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:114
Various assertion macros.
typedefs for Xapian
Class for handling UUIDs.