xapian-core  1.4.27
chert_database.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
7  * Copyright 2008 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #ifndef OM_HGUARD_CHERT_DATABASE_H
26 #define OM_HGUARD_CHERT_DATABASE_H
27 
28 #include "backends/backends.h"
29 #include "backends/database.h"
30 #include "chert_dbstats.h"
31 #include "chert_positionlist.h"
32 #include "chert_postlist.h"
33 #include "chert_record.h"
34 #include "chert_spelling.h"
35 #include "chert_synonym.h"
36 #include "chert_termlisttable.h"
37 #include "chert_values.h"
38 #include "chert_version.h"
39 #include "../flint_lock.h"
40 #include "chert_types.h"
41 #include "backends/valuestats.h"
42 
43 #include "noreturn.h"
44 
45 #include "xapian/compactor.h"
46 #include "xapian/constants.h"
47 
48 #include <map>
49 #include <vector>
50 #include <string>
51 
52 class ChertTermList;
54 class RemoteConnection;
55 
60  friend class ChertWritableDatabase;
61  friend class ChertTermList;
62  friend class ChertPostList;
63  friend class ChertAllTermsList;
64  friend class ChertAllDocsPostList;
65  private:
68  std::string db_dir;
69 
72  bool readonly;
73 
79 
87 
91 
95 
98 
102 
106 
116 
119 
122  unsigned int max_changesets;
123 
126 
130  bool database_exists();
131 
135  void create_and_open_tables(unsigned int blocksize);
136 
145  bool open_tables_consistent();
146 
156  void get_database_write_lock(int flags, bool creating);
157 
164 
171 
182  void set_revision_number(chert_revision_number_t new_revision);
183 
187  bool reopen();
188 
191  void close();
192 
199  chert_revision_number_t new_revision,
200  const std::string & msg);
201 
210  void apply();
211 
214  void cancel();
215 
218  void send_whole_database(RemoteConnection & conn, double end_time);
219 
222  void get_changeset_revisions(const string & path,
223  chert_revision_number_t * startrev,
224  chert_revision_number_t * endrev) const;
225  public:
245  ChertDatabase(const string &db_dir_, int action = Xapian::DB_READONLY_,
246  unsigned int block_size = 0u);
247 
248  ~ChertDatabase();
249 
252  return postlist_table.cursor_get();
253  }
254 
261 
269  void get_freqs(const string & term,
270  Xapian::doccount * termfreq_ptr,
271  Xapian::termcount * collfreq_ptr) const;
273  std::string get_value_lower_bound(Xapian::valueno slot) const;
274  std::string get_value_upper_bound(Xapian::valueno slot) const;
277  Xapian::termcount get_wdf_upper_bound(const string & term) const;
278  bool term_exists(const string & tname) const;
279  bool has_positions() const;
280 
281  LeafPostList * open_post_list(const string & tname) const;
284 
285  PositionList * open_position_list(Xapian::docid did, const string & term) const;
287  TermList * open_allterms(const string & prefix) const;
288 
289  TermList * open_spelling_termlist(const string & word) const;
291  Xapian::doccount get_spelling_frequency(const string & word) const;
292 
293  TermList * open_synonym_termlist(const string & term) const;
294  TermList * open_synonym_keylist(const string & prefix) const;
295 
296  string get_metadata(const string & key) const;
297  TermList * open_metadata_keylist(const std::string &prefix) const;
298  void write_changesets_to_fd(int fd,
299  const string & start_revision,
300  bool need_whole_db,
301  Xapian::ReplicationInfo * info);
302  string get_revision_info() const;
303  string get_uuid() const;
304 
305  void request_document(Xapian::docid /*did*/) const;
308 
309  XAPIAN_NORETURN(void throw_termlist_table_close_exception() const);
310 
311  int get_backend_info(string * path) const {
312  if (path) *path = db_dir;
313  return BACKEND_CHERT;
314  }
315 
316  void get_used_docid_range(Xapian::docid & first,
317  Xapian::docid & last) const;
318 
319  bool locked() const;
320 
322  virtual bool has_uncommitted_changes() const;
323 
324  static void compact(Xapian::Compactor * compactor,
325  const char * destdir,
326  const std::vector<Xapian::Database::Internal *> & sources,
327  const std::vector<Xapian::docid> & offset,
328  size_t block_size,
330  unsigned flags,
331  Xapian::docid last_docid);
332 };
333 
338  mutable map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >
340 
342  mutable map<Xapian::docid, Xapian::termcount> doclens;
343 
345  mutable map<string, map<Xapian::docid,
346  pair<char, Xapian::termcount> > > mod_plists;
347 
348  mutable map<Xapian::valueno, ValueStats> value_stats;
349 
354 
357 
365 
368  mutable Xapian::docid modify_shortcut_docid;
369 
374  void check_flush_threshold();
375 
377  void flush_postlist_changes() const;
378 
380  void close();
381 
383  void apply();
384 
391  void add_freq_delta(const string & tname,
392  Xapian::termcount_diff tf_delta,
393  Xapian::termcount_diff cf_delta);
394 
401  void insert_mod_plist(Xapian::docid did,
402  const string & tname,
403  Xapian::termcount wdf);
404 
416  void update_mod_plist(Xapian::docid did,
417  const string & tname,
418  char type,
419  Xapian::termcount wdf);
420 
422 
425  void commit();
426 
428  void cancel();
429 
430  Xapian::docid add_document(const Xapian::Document & document);
431  Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document);
432  // Stop the default implementation of delete_document(term) and
433  // replace_document(term) from being hidden. This isn't really
434  // a problem as we only try to call them through the base class
435  // (where they aren't hidden) but some compilers generate a warning
436  // about the hiding.
437 #ifndef _MSC_VER
440 #endif
441  void delete_document(Xapian::docid did);
442  void replace_document(Xapian::docid did, const Xapian::Document & document);
443 
444  Xapian::Document::Internal * open_document(Xapian::docid did,
445  bool lazy) const;
446 
448 
449  public:
461  ChertWritableDatabase(const string &dir, int action, int block_size);
462 
464 
467  Xapian::termcount get_doclength(Xapian::docid did) const;
468  Xapian::termcount get_unique_terms(Xapian::docid did) const;
469  void get_freqs(const string & term,
470  Xapian::doccount * termfreq_ptr,
471  Xapian::termcount * collfreq_ptr) const;
473  std::string get_value_lower_bound(Xapian::valueno slot) const;
474  std::string get_value_upper_bound(Xapian::valueno slot) const;
475  bool term_exists(const string & tname) const;
476 
477  LeafPostList * open_post_list(const string & tname) const;
479  TermList * open_allterms(const string & prefix) const;
480 
481  void add_spelling(const string & word, Xapian::termcount freqinc) const;
482  void remove_spelling(const string & word, Xapian::termcount freqdec) const;
484 
485  TermList * open_synonym_keylist(const string & prefix) const;
486  void add_synonym(const string & word, const string & synonym) const;
487  void remove_synonym(const string & word, const string & synonym) const;
488  void clear_synonyms(const string & word) const;
489 
490  void set_metadata(const string & key, const string & value);
493 
495  bool has_uncommitted_changes() const;
496 };
497 
498 #endif /* OM_HGUARD_CHERT_DATABASE_H */
bool term_exists(const string &tname) const
Virtual methods of Database::Internal.
Subclass of ChertTable which holds termlists.
Xapian::docid get_lastdocid() const
Virtual methods of Database::Internal.
TermList * open_spelling_termlist(const string &word) const
Virtual methods of Database::Internal.
static void compact(Xapian::Compactor *compactor, const char *destdir, const std::vector< Xapian::Database::Internal *> &sources, const std::vector< Xapian::docid > &offset, size_t block_size, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
A TermList in a chert database.
Define the XAPIAN_NORETURN macro.
virtual void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: database.cc:186
A RemoteConnection object provides a bidirectional connection to another RemoteConnection object on a...
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: chert_cursor.h:66
Statistics about values.
Records in chert databases.
virtual bool has_uncommitted_changes() const
Return true if there are uncommitted changes.
Xapian::termcount get_wdf_upper_bound(const string &term) const
Virtual methods of Database::Internal.
std::string get_value_upper_bound(Xapian::valueno slot) const
Virtual methods of Database::Internal.
void get_database_write_lock(int flags, bool creating)
Get a write lock on the database, or throw an Xapian::DatabaseLockError if failure.
void readahead_for_query(const Xapian::Query &query)
Virtual methods of Database::Internal.
bool database_exists()
Return true if a database exists at the path specified for this database.
friend class ChertWritableDatabase
TermList * open_synonym_termlist(const string &term) const
Virtual methods of Database::Internal.
Xapian::termcount get_doclength_upper_bound() const
Virtual methods of Database::Internal.
void write_changesets_to_fd(int fd, const string &start_revision, bool need_whole_db, Xapian::ReplicationInfo *info)
Virtual methods of Database::Internal.
map< Xapian::docid, Xapian::termcount > doclens
Document lengths of new and modified documents which haven&#39;t been flushed yet.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
Find lowest and highest docids actually in use.
ChertRecordTable record_table
Table storing records.
virtual Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: database.cc:161
virtual void commit()
Commit pending modifications to the database.
Definition: database.cc:102
void cancel()
Cancel any outstanding changes to the tables.
Base class for databases.
Definition: database.h:57
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
A writable chert database.
Xapian::doccount get_doccount() const
Virtual methods of Database::Internal.
bool open_tables_consistent()
Open all tables at most recent consistent revision.
Constants in the Xapian namespace.
A record in a chert database.
Definition: chert_record.h:37
double end_time(double timeout)
Return the end time for a timeout in timeout seconds.
Definition: realtime.h:95
ChertSpellingTable spelling_table
Table storing spelling correction data.
virtual void clear_synonyms(const string &term) const
Clear all synonyms for a term.
Definition: database.cc:286
A document in the database, possibly plus modifications.
Definition: document.h:43
Compact a database, or merge and compact several.
TermList * open_allterms(const string &prefix) const
Virtual methods of Database::Internal.
chert_revision_number_t get_revision_number() const
Get an object holding the revision number which the tables are opened at.
Abstract base class for termlists.
Definition: termlist.h:39
void set_revision_number(chert_revision_number_t new_revision)
Set the revision number in the tables.
ChertVersion version_file
The file describing the Chert database.
virtual void set_metadata(const string &key, const string &value)
Set the metadata associated with a given key.
Definition: database.cc:306
ChertPositionListTable position_table
Table storing position lists.
TermList * open_synonym_keylist(const string &prefix) const
Virtual methods of Database::Internal.
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
Synonym data for a chert database.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:39
Chert class for database statistics.
void close()
Close all the tables permanently.
std::string term
The term name for this postlist (empty for an alldocs postlist).
Definition: leafpostlist.h:52
ChertPostListTable postlist_table
Table storing posting lists.
PositionList * open_position_list(Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
ChertSynonymTable synonym_table
Table storing synonym data.
void create_and_open_tables(unsigned int blocksize)
Create new tables, and open them.
Types used by chert backend and the Btree manager.
FlintLock lock
Lock object.
bool has_positions() const
Virtual methods of Database::Internal.
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
Xapian::doccount get_spelling_frequency(const string &word) const
Virtual methods of Database::Internal.
Xapian::docid modify_shortcut_docid
The document ID for the last document returned by open_document().
virtual void delete_document(Xapian::docid did)
Delete a document in the database.
Definition: database.cc:169
ValueList * open_value_list(Xapian::valueno slot) const
Virtual methods of Database::Internal.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Virtual methods of Database::Internal.
virtual void add_synonym(const string &term, const string &synonym) const
Add a synonym for a term.
Definition: database.cc:274
The ChertVersion class manages the "iamchert" file.
Definition: chert_version.h:34
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Virtual methods of Database::Internal.
int get_backend_info(string *path) const
Get backend information about this database.
void send_whole_database(RemoteConnection &conn, double end_time)
Send a set of messages which transfer the whole database.
bool reopen()
Re-open tables to recover from an overwritten condition, or just get most up-to-date version...
BACKEND_* constants.
std::string get_value_lower_bound(Xapian::valueno slot) const
Virtual methods of Database::Internal.
std::string db_dir
Directory to store databases in.
ChertValueManager value_manager
Value manager.
map< string, map< Xapian::docid, pair< char, Xapian::termcount > > > mod_plists
Modifications to posting lists.
string get_revision_info() const
Virtual methods of Database::Internal.
TermList * open_spelling_wordlist() const
Virtual methods of Database::Internal.
string get_uuid() const
Virtual methods of Database::Internal.
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Virtual methods of Database::Internal.
unsigned int max_changesets
The maximum number of changesets which should be kept in the database.
ChertDatabase(const string &db_dir_, int action=Xapian::DB_READONLY_, unsigned int block_size=0u)
Create and open a chert database.
bool locked() const
Return true if the database is open for writing.
void throw_termlist_table_close_exception() const
Compact a database, or merge and compact several.
Definition: compactor.h:42
ChertCursor * get_postlist_cursor() const
Get a postlist table cursor (used by ChertValueList).
virtual void remove_synonym(const string &term, const string &synonym) const
Remove a synonym for a term.
Definition: database.cc:280
Information about the steps involved in performing a replication.
Definition: replication.h:33
virtual void remove_spelling(const string &word, Xapian::termcount freqdec) const
Remove a word from the spelling dictionary.
Definition: database.cc:250
void open_tables(chert_revision_number_t revision)
Open tables at specified revision number.
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
TermList * open_term_list(Xapian::docid did) const
Virtual methods of Database::Internal.
ChertValueManager class.
string get_metadata(const string &key) const
Virtual methods of Database::Internal.
A position list in a chert database.
map< Xapian::valueno, ValueStats > value_stats
Xapian::doccount flush_threshold
If change_count reaches this threshold we automatically flush.
void apply()
Apply any outstanding changes to the tables.
ChertVersion class.
void get_changeset_revisions(const string &path, chert_revision_number_t *startrev, chert_revision_number_t *endrev) const
Get the revision stored in a changeset.
Abstract base class for value streams.
Definition: valuelist.h:31
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:79
ChertTermListTable termlist_table
Table storing term lists.
Spelling correction data for a chert database.
Xapian::doccount change_count
The number of documents added, deleted, or replaced since the last flush.
Xapian::termcount get_doclength_lower_bound() const
Virtual methods of Database::Internal.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal.
virtual void add_spelling(const string &word, Xapian::termcount freqinc) const
Add a word to the spelling dictionary.
Definition: database.cc:244
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
Postlists in chert databases.
Xapian::docid did
Document id we&#39;re currently at.
compaction_level
Compaction level.
Definition: compactor.h:48
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
virtual void invalidate_doc_object(Xapian::Document::Internal *obj) const
Notify the database that document is no longer valid.
Definition: database.cc:353
map< string, pair< Xapian::termcount_diff, Xapian::termcount_diff > > freq_deltas
Unflushed changes to term frequencies and collection frequencies.
A postlist in a chert database.
bool readonly
Whether the database is readonly.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
ChertDatabaseStats stats
Database statistics.
Class representing a query.
Definition: query.h:46
void modifications_failed(chert_revision_number_t old_revision, chert_revision_number_t new_revision, const std::string &msg)
Called if a modifications fail.
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
void request_document(Xapian::docid) const
Virtual methods of Database::Internal.
Chert class for database statistics.
Definition: chert_dbstats.h:32
chert_revision_number_t get_next_revision_number() const
Get an object holding the next revision number which should be used in the tables.
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::Document::Internal * modify_shortcut_document
A pointer to the last document which was returned by open_document(), or NULL if there is no such val...
LeafPostList * open_post_list(const string &tname) const
Virtual methods of Database::Internal.
void get_freqs(const string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Virtual methods of Database::Internal.
TermList * open_metadata_keylist(const std::string &prefix) const
Virtual methods of Database::Internal.
Xapian::totallength get_total_length() const
Virtual methods of Database::Internal.