xapian-core  2.0.0
glass_database.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2024 Olly Betts
7  * Copyright 2008 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, see
21  * <https://www.gnu.org/licenses/>.
22  */
23 
24 #ifndef XAPIAN_INCLUDED_GLASS_DATABASE_H
25 #define XAPIAN_INCLUDED_GLASS_DATABASE_H
26 
27 #include "backends/backends.h"
29 #include "glass_changes.h"
30 #include "glass_docdata.h"
31 #include "glass_inverter.h"
32 #include "glass_positionlist.h"
33 #include "glass_postlist.h"
34 #include "glass_spelling.h"
35 #include "glass_synonym.h"
36 #include "glass_termlisttable.h"
37 #include "glass_values.h"
38 #include "glass_version.h"
39 #include "../flint_lock.h"
40 #include "glass_defs.h"
41 #include "backends/valuestats.h"
42 
43 #include "xapian/compactor.h"
44 #include "xapian/constants.h"
45 
46 #include <map>
47 #include <string_view>
48 
49 class GlassTermList;
51 class HoneyDatabase;
52 class RemoteConnection;
53 
58  friend class GlassWritableDatabase;
59  friend class GlassTermList;
60  friend class GlassPostList;
61  friend class GlassAllTermsList;
62  friend class GlassAllDocsPostList;
63  friend class GlassSpellingWordsList;
64  friend class GlassSynonymTermList;
65  friend class HoneyDatabase; // For compacting to convert.
66 
67  private:
70  std::string db_dir;
71 
74  bool readonly;
75 
81 
89 
93 
97 
100 
104 
108 
112 
115 
118 
122  bool database_exists();
123 
127  void create_and_open_tables(int flags, unsigned int blocksize);
128 
137  bool open_tables(int flags);
138 
148  void get_database_write_lock(int flags, bool creating);
149 
156 
167  void set_revision_number(int flags, glass_revision_number_t new_revision);
168 
172  bool reopen();
173 
176  void close();
177 
184  const std::string & msg);
185 
194  void apply();
195 
198  void cancel();
199 
202  void send_whole_database(RemoteConnection & conn, double end_time);
203 
206  void get_changeset_revisions(const string & path,
207  glass_revision_number_t * startrev,
208  glass_revision_number_t * endrev) const;
209 
210  public:
230  explicit GlassDatabase(std::string_view db_dir_,
231  int flags = Xapian::DB_READONLY_,
232  unsigned int block_size = 0u);
233 
234  explicit GlassDatabase(int fd);
235 
236  ~GlassDatabase();
237 
240  return postlist_table.cursor_get();
241  }
242 
251  void get_freqs(std::string_view term,
252  Xapian::doccount* termfreq_ptr,
253  Xapian::termcount* collfreq_ptr) const;
255  std::string get_value_lower_bound(Xapian::valueno slot) const;
256  std::string get_value_upper_bound(Xapian::valueno slot) const;
259  Xapian::termcount get_wdf_upper_bound(std::string_view term) const;
261  bool term_exists(std::string_view term) const;
262  bool has_positions() const;
263 
264  PostList* open_post_list(std::string_view tname) const;
265  LeafPostList* open_leaf_post_list(std::string_view term,
266  bool need_read_pos) const;
269  bool lazy) const;
270 
271  virtual void read_position_list(GlassRePositionList* pos_list,
272  Xapian::docid did,
273  std::string_view term) const;
275  std::string_view term) const;
277  std::string_view term) const;
280  TermList* open_allterms(std::string_view prefix) const;
281 
282  TermList* open_spelling_termlist(std::string_view word) const;
284  Xapian::doccount get_spelling_frequency(std::string_view word) const;
285 
286  TermList* open_synonym_termlist(std::string_view term) const;
287  TermList* open_synonym_keylist(std::string_view prefix) const;
288 
289  string get_metadata(std::string_view key) const;
290  TermList* open_metadata_keylist(std::string_view prefix) const;
291  void write_changesets_to_fd(int fd,
292  std::string_view start_revision,
293  bool need_whole_db,
294  Xapian::ReplicationInfo * info);
299  Xapian::rev get_revision() const;
300  string get_uuid() const;
301 
302  void request_document(Xapian::docid /*did*/) const;
303  void readahead_for_query(const Xapian::Query &query) const;
305 
306  [[noreturn]]
308 
309  int get_backend_info(string * path) const {
310  if (path) *path = db_dir;
311  return BACKEND_GLASS;
312  }
313 
314  bool single_file() const { return version_file.single_file(); }
315 
316  void get_used_docid_range(Xapian::docid & first,
317  Xapian::docid & last) const;
318 
320  virtual bool has_uncommitted_changes() const;
321 
322  bool locked() const;
323 
325 
326  static void compact(Xapian::Compactor * compactor,
327  const char * destdir,
328  int fd,
329  const std::vector<const Xapian::Database::Internal*>& sources,
330  const std::vector<Xapian::docid> & offset,
331  unsigned block_size,
333  unsigned flags,
334  Xapian::docid last_docid);
335 
336  std::string get_description() const;
337 };
338 
343 
344  mutable std::map<Xapian::valueno, ValueStats> value_stats;
345 
350 
353 
361 
365 
370  void check_flush_threshold();
371 
373  void flush_postlist_changes();
374 
376  void close();
377 
379  void apply();
380 
382 
385  void commit();
386 
388  void cancel();
389 
392  const Xapian::Document& document);
393  // Stop the default implementation of delete_document(term) and
394  // replace_document(term) from being hidden. This isn't really
395  // a problem as we only try to call them through the base class
396  // (where they aren't hidden) but some compilers generate a warning
397  // about the hiding.
400  void delete_document(Xapian::docid did);
401  void replace_document(Xapian::docid did, const Xapian::Document & document);
402 
404  bool lazy) const;
405 
407 
408  public:
420  GlassWritableDatabase(std::string_view dir, int flags, int block_size);
421 
423 
428  void get_freqs(std::string_view term,
429  Xapian::doccount* termfreq_ptr,
430  Xapian::termcount* collfreq_ptr) const;
432  std::string get_value_lower_bound(Xapian::valueno slot) const;
433  std::string get_value_upper_bound(Xapian::valueno slot) const;
434  bool term_exists(std::string_view term) const;
435  bool has_positions() const;
436 
437  PostList* open_post_list(std::string_view term) const;
438  LeafPostList* open_leaf_post_list(std::string_view term,
439  bool need_read_pos) const;
441 
443  Xapian::docid did,
444  std::string_view term) const;
446  std::string_view term) const;
448  std::string_view term) const;
449  TermList* open_allterms(std::string_view prefix) const;
450 
451  void add_spelling(std::string_view word, Xapian::termcount freqinc) const;
452  Xapian::termcount remove_spelling(std::string_view word,
453  Xapian::termcount freqdec) const;
455 
456  TermList* open_synonym_keylist(std::string_view prefix) const;
457  void add_synonym(std::string_view word, std::string_view synonym) const;
458  void remove_synonym(std::string_view word, std::string_view synonym) const;
459  void clear_synonyms(std::string_view word) const;
460 
461  void set_metadata(std::string_view key, std::string_view value);
464 
466  bool has_uncommitted_changes() const;
467 
469 };
470 
471 #ifdef DISABLE_GPL_LIBXAPIAN
472 # error GPL source we cannot relicense included in libxapian
473 #endif
474 
475 #endif /* XAPIAN_INCLUDED_GLASS_DATABASE_H */
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:62
BACKEND_* constants.
@ BACKEND_GLASS
Definition: backends.h:29
A cursor pointing to a position in a Btree table, for reading several entries in order,...
Definition: glass_cursor.h:148
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
Find lowest and highest docids actually in use.
string get_metadata(std::string_view key) const
Get the metadata associated with a given key.
TermList * open_term_list(Xapian::docid did) const
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
bool locked() const
Return true if the database is open for writing.
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
Get an upper bound on the wdf of term term.
void get_database_write_lock(int flags, bool creating)
Get a write lock on the database, or throw an Xapian::DatabaseLockError if failure.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
LeafPostList * open_leaf_post_list(std::string_view term, bool need_read_pos) const
Create a LeafPostList for use during a match.
void apply()
Apply any outstanding changes to the tables.
PostList * open_post_list(std::string_view tname) const
Return a PostList suitable for use in a PostingIterator.
void set_revision_number(int flags, glass_revision_number_t new_revision)
Set the revision number in the tables.
ValueList * open_value_list(Xapian::valueno slot) const
Open a value stream.
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
GlassSpellingTable spelling_table
Table storing spelling correction data.
Xapian::Database::Internal * update_lock(int flags)
Lock a read-only database for writing or unlock a writable database.
std::string db_dir
Directory to store databases in.
void cancel()
Cancel any outstanding changes to the tables.
void get_changeset_revisions(const string &path, glass_revision_number_t *startrev, glass_revision_number_t *endrev) const
Get the revision stored in a changeset.
TermList * open_allterms(std::string_view prefix) const
PositionList * open_position_list(Xapian::docid did, std::string_view term) const
TermList * open_metadata_keylist(std::string_view prefix) const
Open a termlist returning each metadata key.
GlassTermListTable termlist_table
Table storing term lists.
Xapian::termcount get_doclength(Xapian::docid did) const
GlassValueManager value_manager
Value manager.
bool term_exists(std::string_view term) const
TermList * open_spelling_termlist(std::string_view word) const
Create a termlist tree from trigrams of word.
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, const std::vector< const Xapian::Database::Internal * > &sources, const std::vector< Xapian::docid > &offset, unsigned block_size, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a handle on a document.
void close()
Close all the tables permanently.
Xapian::doccount get_doccount() const
Virtual methods of Database::Internal.
virtual void read_position_list(GlassRePositionList *pos_list, Xapian::docid did, std::string_view term) const
string get_uuid() const
Get a UUID for the database.
void send_whole_database(RemoteConnection &conn, double end_time)
Send a set of messages which transfer the whole database.
GlassCursor * get_postlist_cursor() const
Get a postlist table cursor (used by GlassValueList).
GlassChanges changes
Replication changesets.
FlintLock lock
Lock object.
GlassVersion version_file
The file describing the Glass database.
void create_and_open_tables(int flags, unsigned int blocksize)
Create new tables, and open them.
void readahead_for_query(const Xapian::Query &query) const
glass_revision_number_t get_next_revision_number() const
Get an object holding the next revision number which should be used in the tables.
Xapian::termcount get_wdfdocmax(Xapian::docid did) const
Get the max wdf in document.
bool readonly
Whether the database is readonly.
void request_document(Xapian::docid) const
Request a document.
virtual Xapian::termcount positionlist_count(Xapian::docid did, std::string_view term) const
TermList * open_spelling_wordlist() const
Return a termlist which returns the words which are spelling correction targets.
Xapian::termcount get_unique_terms_lower_bound() const
Get a lower bound on the unique terms size of a document in this DB.
void modifications_failed(glass_revision_number_t new_revision, const std::string &msg)
Called if a modifications fail.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in document.
GlassDatabase(std::string_view db_dir_, int flags=Xapian::DB_READONLY_, unsigned int block_size=0u)
Create and open a glass database.
virtual bool has_uncommitted_changes() const
Return true if there are uncommitted changes.
std::string get_description() const
Return a string describing this object.
TermList * open_synonym_keylist(std::string_view prefix) const
Open a termlist returning each term which has synonyms.
GlassPositionListTable position_table
Table storing position lists.
bool reopen()
Re-open tables to recover from an overwritten condition, or just get most up-to-date version.
TermList * open_term_list_direct(Xapian::docid did) const
Like open_term_list() but without MultiTermList wrapper.
GlassDocDataTable docdata_table
Table storing document data.
bool database_exists()
Return true if a database exists at the path specified for this database.
Xapian::doccount get_spelling_frequency(std::string_view word) const
Return the number of times word was added as a spelling.
int get_backend_info(string *path) const
Get backend information about this database.
void write_changesets_to_fd(int fd, std::string_view start_revision, bool need_whole_db, Xapian::ReplicationInfo *info)
Write a set of changesets to a file descriptor.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
TermList * open_synonym_termlist(std::string_view term) const
Open a termlist returning synonyms for a term.
GlassPostListTable postlist_table
Table storing posting lists.
bool single_file() const
friend class GlassWritableDatabase
GlassSynonymTable synonym_table
Table storing synonym data.
bool open_tables(int flags)
Open all tables at most recent revision.
void throw_termlist_table_close_exception() const
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Xapian::rev get_revision() const
Get the revision number which the tables are opened at.
bool has_positions() const
Check whether this database contains any positional information.
A postlist in a glass database.
A reusable position list in a glass database.
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
A TermList in a glass database.
The GlassVersion class manages the revision files.
Definition: glass_version.h:96
bool single_file() const
A writable glass database.
void flush_postlist_changes()
Flush any unflushed postlist changes, but don't commit them.
Xapian::docid add_document_(Xapian::docid did, const Xapian::Document &document)
bool term_exists(std::string_view term) const
void remove_synonym(std::string_view word, std::string_view synonym) const
Remove a synonym for a term.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in document.
TermList * open_synonym_keylist(std::string_view prefix) const
Open a termlist returning each term which has synonyms.
std::map< Xapian::valueno, ValueStats > value_stats
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
TermList * open_spelling_wordlist() const
Return a termlist which returns the words which are spelling correction targets.
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
void close()
Close all the tables permanently.
bool has_uncommitted_changes() const
Return true if there are uncommitted changes.
ValueList * open_value_list(Xapian::valueno slot) const
Open a value stream.
void commit()
Implementation of virtual methods: see Database::Internal for details.
void replace_document(Xapian::docid did, const Xapian::Document &document)
void delete_document(Xapian::docid did)
void invalidate_doc_object(Xapian::Document::Internal *obj) const
Notify the database that document is no longer valid.
void add_synonym(std::string_view word, std::string_view synonym) const
Add a synonym for a term.
void apply()
Apply changes.
void cancel()
Cancel pending modifications to the database.
LeafPostList * open_leaf_post_list(std::string_view term, bool need_read_pos) const
Create a LeafPostList for use during a match.
void read_position_list(GlassRePositionList *pos_list, Xapian::docid did, std::string_view term) const
PostList * open_post_list(std::string_view term) const
Return a PostList suitable for use in a PostingIterator.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
Xapian::Document::Internal * modify_shortcut_document
A pointer to the last document which was returned by open_document(), or NULL if there is no such val...
void add_spelling(std::string_view word, Xapian::termcount freqinc) const
Add a word to the spelling dictionary.
Xapian::Database::Internal * update_lock(int flags)
Lock a read-only database for writing or unlock a writable database.
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal.
void check_flush_threshold()
Check if we should autoflush.
TermList * open_allterms(std::string_view prefix) const
Xapian::doccount flush_threshold
If change_count reaches this threshold we automatically flush.
Xapian::docid add_document(const Xapian::Document &document)
Xapian::docid modify_shortcut_docid
The document ID for the last document returned by open_document().
Xapian::termcount positionlist_count(Xapian::docid did, std::string_view term) const
PositionList * open_position_list(Xapian::docid did, std::string_view term) const
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a handle on a document.
bool has_positions() const
Check whether this database contains any positional information.
void set_metadata(std::string_view key, std::string_view value)
Set the metadata associated with a given key.
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
Xapian::termcount remove_spelling(std::string_view word, Xapian::termcount freqdec) const
Remove a word from the spelling dictionary.
Xapian::doccount change_count
The number of documents added, deleted, or replaced since the last flush.
void clear_synonyms(std::string_view word) const
Clear all synonyms for a term.
Database using honey backend.
std::string path
Path of the directory.
Class which "inverts the file".
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
A RemoteConnection object provides a bidirectional connection to another RemoteConnection object on a...
Compact a database, or merge and compact several.
Definition: compactor.h:39
compaction_level
Compaction level.
Definition: compactor.h:42
Virtual base class for Database internals.
virtual void replace_document(docid did, const Document &document)
virtual void delete_document(docid did)
Abstract base class for a document.
Class representing a document.
Definition: document.h:64
Abstract base class for postlists.
Definition: postlist.h:40
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
Class representing a query.
Definition: query.h:45
Abstract base class for termlists.
Definition: termlist.h:42
Abstract base class for value streams.
Definition: valuelist.h:31
Compact a database, or merge and compact several.
Constants in the Xapian namespace.
string term
Virtual base class for Database internals.
Glass changesets.
Definitions, types, etc for use inside glass.
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
Subclass of GlassTable which holds document data.
Inverter class which "inverts the file".
A position list in a glass database.
Postlists in glass databases.
Spelling correction data for a glass database.
Synonym data for a glass database.
Subclass of GlassTable which holds termlists.
GlassValueManager class.
GlassVersion class.
double end_time(double timeout)
Return the end time for a timeout in timeout seconds.
Definition: realtime.h:95
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:108
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:114
Information about the steps involved in performing a replication.
Definition: replication.h:32
Statistics about values.