xapian-core  2.0.0
glass_spelling.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011,2014,2015,2016,2017,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_GLASS_SPELLING_H
22 #define XAPIAN_INCLUDED_GLASS_SPELLING_H
23 
24 #include <xapian/types.h>
25 
26 #include "glass_lazytable.h"
27 #include "api/termlist.h"
28 
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <string_view>
33 #include <cstring> // For memcpy() and memcmp().
34 
35 namespace Glass {
36 
37 class RootInfo;
38 
39 struct fragment {
40  char data[4];
41 
42  // Default constructor.
43  fragment() { }
44 
45  // Allow implicit conversion.
46  explicit fragment(char data_[4]) { std::memcpy(data, data_, 4); }
47 
48  char & operator[] (unsigned i) { return data[i]; }
49  const char & operator[] (unsigned i) const { return data[i]; }
50 
51  operator std::string() const {
52  return std::string(data, data[0] == 'M' ? 4 : 3);
53  }
54 
55  bool operator<(const fragment &b) const {
56  return std::memcmp(data, b.data, 4) < 0;
57  }
58 };
59 
60 }
61 
62 using Glass::RootInfo;
63 
65  void toggle_word(std::string_view word);
66  void toggle_fragment(Glass::fragment frag, std::string_view word);
67 
68  std::map<std::string, Xapian::termcount, std::less<>> wordfreq_changes;
69 
78  std::map<Glass::fragment, std::set<std::string>> termlist_deltas;
79 
82 
83  public:
92  GlassSpellingTable(const std::string & dbdir, bool readonly)
93  : GlassLazyTable("spelling", dbdir + "/spelling.", readonly) { }
94 
95  GlassSpellingTable(int fd, off_t offset_, bool readonly)
96  : GlassLazyTable("spelling", fd, offset_, readonly) { }
97 
102  void merge_changes();
103 
104  void add_word(std::string_view word, Xapian::termcount freqinc);
105  Xapian::termcount remove_word(std::string_view word,
106  Xapian::termcount freqdec);
107 
108  TermList* open_termlist(std::string_view word);
109 
110  Xapian::doccount get_word_frequency(std::string_view word) const;
111 
114  }
115 
123  bool is_modified() const {
124  return !wordfreq_changes.empty() || GlassTable::is_modified();
125  }
126 
129  merge_changes();
131  return wordfreq_upper_bound;
132  }
133 
134  void cancel(const RootInfo & root_info, glass_revision_number_t rev) {
135  // Discard batched-up changes.
136  wordfreq_changes.clear();
137  termlist_deltas.clear();
138 
139  GlassTable::cancel(root_info, rev);
140  }
141 
142  // @}
143 };
144 
148  std::string data;
149 
151  unsigned p;
152 
155 
158 
159  public:
161  explicit GlassSpellingTermList(const std::string & data_)
162  : data(data_), p(0) { }
163 
165 
166  Xapian::termcount get_wdf() const;
167 
169 
170  TermList * next();
171 
172  TermList* skip_to(std::string_view term);
173 
175 
177 };
178 
179 #endif // XAPIAN_INCLUDED_GLASS_SPELLING_H
std::map< Glass::fragment, std::set< std::string > > termlist_deltas
Changes to make to the termlists.
void cancel(const RootInfo &root_info, glass_revision_number_t rev)
Override methods of GlassTable.
Xapian::termcount remove_word(std::string_view word, Xapian::termcount freqdec)
Xapian::termcount wordfreq_upper_bound
Used to track an upper bound on wordfreq.
std::map< std::string, Xapian::termcount, std::less<> > wordfreq_changes
bool is_modified() const
Override methods of GlassTable.
void merge_changes()
Merge in batched-up changes.
TermList * open_termlist(std::string_view word)
void toggle_fragment(Glass::fragment frag, std::string_view word)
Xapian::termcount flush_db()
Returns updated wordfreq upper bound.
void toggle_word(std::string_view word)
void add_word(std::string_view word, Xapian::termcount freqinc)
GlassSpellingTable(int fd, off_t offset_, bool readonly)
Xapian::doccount get_word_frequency(std::string_view word) const
GlassSpellingTable(const std::string &dbdir, bool readonly)
Create a new GlassSpellingTable object.
void set_wordfreq_upper_bound(Xapian::termcount ub)
The list of words containing a particular trigram.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
std::string data
The encoded data.
GlassSpellingTermList(const GlassSpellingTermList &)
Copying is not allowed.
TermList * skip_to(std::string_view term)
Skip forward to the specified term.
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
TermList * next()
Advance the current position to the next term in the termlist.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
void operator=(const GlassSpellingTermList &)
Assignment is not allowed.
GlassSpellingTermList(const std::string &data_)
Constructor.
unsigned p
Position in the data.
PositionList * positionlist_begin() const
Return PositionList for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
void flush_db()
Flush any outstanding changes to the DB file of the table.
bool is_modified() const
Determine whether the object contains uncommitted modifications.
Definition: glass_table.h:706
void cancel(const RootInfo &root_info, glass_revision_number_t rev)
Cancel any outstanding changes.
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
Abstract base class for termlists.
Definition: termlist.h:42
string term
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
Subclass of GlassTable for deriving lazy tables from.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:108
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
bool operator<(const fragment &b) const
fragment(char data_[4])
char & operator[](unsigned i)
Abstract base class for termlists.
typedefs for Xapian