xapian-core  2.0.0
honey_database.h
Go to the documentation of this file.
1 
4 /* Copyright 2004,2006,2007,2008,2009,2011,2014,2015,2016,2017,2024 Olly Betts
5  * Copyright 2007,2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_HONEY_DATABASE_H
23 #define XAPIAN_INCLUDED_HONEY_DATABASE_H
24 
26 
27 #include "honey_alldocspostlist.h"
28 #include "honey_docdata.h"
29 #include "honey_postlisttable.h"
30 #include "honey_positionlist.h"
31 #include "honey_spelling.h"
32 #include "honey_synonym.h"
33 #include "honey_termlisttable.h"
34 #include "honey_values.h"
35 #include "honey_version.h"
36 #include "xapian/compactor.h"
37 
38 #include <string_view>
39 
40 class HoneyAllTermsList;
41 class HoneyCursor;
42 class HoneyPostList;
45 class HoneyTermList;
46 
49  friend class HoneyAllTermsList;
50  friend class HoneyPosPostList;
51  friend class HoneyPostList;
52  friend class HoneySpellingWordsList;
53  friend class HoneySynonymTermList;
54  friend class HoneyTermList;
55 
58 
60  HoneyDatabase(const HoneyDatabase&) = delete;
61 
63  std::string path;
64 
67 
69 
71 
73 
75 
77 
79 
81 
83 
84  mutable HoneyCursor* doclen_cursor = NULL;
85 
86  [[noreturn]]
88 
89  public:
90  explicit
91  HoneyDatabase(std::string_view path_, int flags = Xapian::DB_READONLY_);
92 
93  explicit
94  HoneyDatabase(int fd, int flags = Xapian::DB_READONLY_);
95 
97 
98  void readahead_for_query(const Xapian::Query& query) const;
99 
101 
104 
107 
109 
115 
116  // Return the max_wdf in the document
118 
127  void get_freqs(std::string_view term,
128  Xapian::doccount* termfreq_ptr,
129  Xapian::termcount* collfreq_ptr) const;
130 
142 
150  std::string get_value_lower_bound(Xapian::valueno slot) const;
151 
159  std::string get_value_upper_bound(Xapian::valueno slot) const;
160 
163 
166 
168  Xapian::termcount get_wdf_upper_bound(std::string_view term) const;
169 
172 
175 
176  bool term_exists(std::string_view term) const;
177 
179  bool has_positions() const;
180 
181  PostList* open_post_list(std::string_view term) const;
182 
183  LeafPostList* open_leaf_post_list(std::string_view term,
184  bool need_read_pos) const;
185 
196 
198 
205 
206  TermList* open_allterms(std::string_view prefix) const;
207 
209  std::string_view term) const;
210 
226  bool lazy) const;
227 
234  TermList* open_spelling_termlist(std::string_view word) const;
235 
242 
244  Xapian::doccount get_spelling_frequency(std::string_view word) const;
245 
253  void add_spelling(std::string_view word,
254  Xapian::termcount freqinc) const;
255 
266  Xapian::termcount remove_spelling(std::string_view word,
267  Xapian::termcount freqdec) const;
268 
273  TermList* open_synonym_termlist(std::string_view term) const;
274 
280  TermList* open_synonym_keylist(std::string_view prefix) const;
281 
287  void add_synonym(std::string_view term,
288  std::string_view synonym) const;
289 
294  void remove_synonym(std::string_view term,
295  std::string_view synonym) const;
296 
301  void clear_synonyms(std::string_view term) const;
302 
307  std::string get_metadata(std::string_view key) const;
308 
316  TermList* open_metadata_keylist(std::string_view prefix) const;
317 
322  void set_metadata(std::string_view key, std::string_view value);
323 
329  bool reopen();
330 
332  void close();
333 
349  void request_document(Xapian::docid did) const;
350 
352  Xapian::rev get_revision() const;
353 
364  std::string get_uuid() const;
365 
375  int get_backend_info(std::string* path) const;
376 
382  void get_used_docid_range(Xapian::docid& first, Xapian::docid& last) const;
383 
384  static
385  void compact(Xapian::Compactor* compactor,
386  const char* destdir,
387  int fd,
388  int source_backend,
389  const std::vector<const Xapian::Database::Internal*>& sources,
390  const std::vector<Xapian::docid>& offset,
392  unsigned flags,
393  Xapian::docid last_docid);
394 
395  bool has_uncommitted_changes() const {
396  return false;
397  }
398 
399  bool single_file() const {
400  return false;
401  }
402 
404  return postlist_table.cursor_get();
405  }
406 
408  std::string get_description() const;
409 };
410 
411 #endif // XAPIAN_INCLUDED_HONEY_DATABASE_H
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
Database using honey backend.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
HoneyCursor * doclen_cursor
Xapian::doccount get_spelling_frequency(std::string_view word) const
Return the number of times word was added as a spelling.
void add_spelling(std::string_view word, Xapian::termcount freqinc) const
Add a word to the spelling dictionary.
HoneySpellingTable spelling_table
bool single_file() const
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
Honey::DocLenChunkReader doclen_chunk_reader
HoneyCursor * get_postlist_cursor() const
TermList * open_spelling_wordlist() const
Return a termlist which returns the words which are spelling correction targets.
std::string path
Path of the directory.
void add_synonym(std::string_view term, std::string_view synonym) const
Add a synonym for a term.
HoneyVersion version_file
Version file ("iamhoney").
HoneyPostListTable postlist_table
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
void remove_synonym(std::string_view term, std::string_view synonym) const
Remove a synonym for a term.
void request_document(Xapian::docid did) const
Request a document.
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a handle on a document.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
PositionList * open_position_list(Xapian::docid did, std::string_view term) const
HoneyTermListTable termlist_table
int get_backend_info(std::string *path) const
Get backend information about this database.
bool reopen()
Reopen the database to the latest available revision.
HoneyDatabase & operator=(const HoneyDatabase &)=delete
Don't allow assignment.
TermList * open_synonym_termlist(std::string_view term) const
Open a termlist returning synonyms for a term.
std::string get_description() const
Return a string describing this object.
Xapian::termcount remove_spelling(std::string_view word, Xapian::termcount freqdec) const
Remove a word from the spelling dictionary.
TermList * open_allterms(std::string_view prefix) const
Xapian::termcount get_doclength(Xapian::docid did) const
void close()
Close the database.
Xapian::termcount get_unique_terms_upper_bound() const
Get an upper bound on the unique terms size of a document in this DB.
TermList * open_metadata_keylist(std::string_view prefix) const
Open a termlist returning each metadata key.
Xapian::termcount get_wdfdocmax(Xapian::docid did) const
Get the max wdf in document.
std::string get_metadata(std::string_view key) const
Get the metadata associated with a given key.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
Find lowest and highest docids actually in use.
HoneySynonymTable synonym_table
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
bool has_uncommitted_changes() const
HoneyValueManager value_manager
HoneyPositionTable position_table
std::string get_uuid() const
Get a UUID for the database.
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
Get an upper bound on the wdf of term term.
PostList * open_post_list(std::string_view term) const
Return a PostList suitable for use in a PostingIterator.
Xapian::termcount get_unique_terms_lower_bound() const
Get a lower bound on the unique terms size of a document in this DB.
Xapian::rev get_revision() const
Get the current revision of the database.
TermList * open_term_list_direct(Xapian::docid did) const
Like open_term_list() but without MultiTermList wrapper.
TermList * open_synonym_keylist(std::string_view prefix) const
Open a termlist returning each term which has synonyms.
void throw_termlist_table_close_exception() const
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
bool has_positions() const
Check whether this database contains any positional information.
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, int source_backend, const std::vector< const Xapian::Database::Internal * > &sources, const std::vector< Xapian::docid > &offset, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
HoneyDocDataTable docdata_table
TermList * open_term_list(Xapian::docid did) const
void set_metadata(std::string_view key, std::string_view value)
Set the metadata associated with a given key.
void readahead_for_query(const Xapian::Query &query) const
ValueList * open_value_list(Xapian::valueno slot) const
Open a value stream.
LeafPostList * open_leaf_post_list(std::string_view term, bool need_read_pos) const
Create a LeafPostList for use during a match.
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
HoneyDatabase(const HoneyDatabase &)=delete
Don't allow copying.
bool term_exists(std::string_view term) const
Xapian::doccount get_doccount() const
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
void clear_synonyms(std::string_view term) const
Clear all synonyms for a term.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in document.
TermList * open_spelling_termlist(std::string_view word) const
Create a termlist tree from trigrams of word.
PostList in a honey database with positions.
PostList in a honey database.
HoneyCursor * cursor_get() const
Definition: honey_table.cc:454
A TermList in a honey database.
Xapian::docid did
The document id that this TermList is for.
The HoneyVersion class manages the revision files.
Definition: honey_version.h:79
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
Compact a database, or merge and compact several.
Definition: compactor.h:39
compaction_level
Compaction level.
Definition: compactor.h:42
Virtual base class for Database internals.
Abstract base class for a document.
Abstract base class for postlists.
Definition: postlist.h:40
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
Class representing a query.
Definition: query.h:45
Abstract base class for termlists.
Definition: termlist.h:42
Abstract base class for value streams.
Definition: valuelist.h:31
Compact a database, or merge and compact several.
string term
Virtual base class for Database internals.
A PostList which iterates over all documents in a HoneyDatabase.
Subclass of HoneyTable which holds document data.
A position list in a honey database.
Subclass of HoneyTable which holds postlists.
Spelling correction data for a honey database.
Synonym data for a honey database.
Subclass of HoneyTable which holds termlists.
HoneyValueManager class.
HoneyVersion class.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:108
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:114