xapian-core  1.4.26
chert_spelling.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_CHERT_SPELLING_H
22 #define XAPIAN_INCLUDED_CHERT_SPELLING_H
23 
24 #include <xapian/types.h>
25 
26 #include "chert_lazytable.h"
27 #include "api/termlist.h"
28 
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <cstring> // For memcpy() and memcmp().
33 
34 struct fragment {
35  char data[4];
36 
37  // Default constructor.
38  fragment() { }
39 
40  // Allow implicit conversion.
41  explicit fragment(char data_[4]) { std::memcpy(data, data_, 4); }
42 
43  char & operator[] (unsigned i) { return data[i]; }
44  const char & operator[] (unsigned i) const { return data[i]; }
45 
46  operator std::string () const {
47  return std::string(data, data[0] == 'M' ? 4 : 3);
48  }
49 };
50 
51 inline bool operator<(const fragment &a, const fragment &b) {
52  return std::memcmp(a.data, b.data, 4) < 0;
53 }
54 
56  void toggle_word(const std::string & word);
57  void toggle_fragment(fragment frag, const std::string & word);
58 
59  std::map<std::string, Xapian::termcount> wordfreq_changes;
60 
69  std::map<fragment, std::set<std::string> > termlist_deltas;
70 
71  public:
80  ChertSpellingTable(const std::string & dbdir, bool readonly)
81  : ChertLazyTable("spelling", dbdir + "/spelling.", readonly,
82  Z_DEFAULT_STRATEGY) { }
83 
84  // Merge in batched-up changes.
85  void merge_changes();
86 
87  void add_word(const std::string & word, Xapian::termcount freqinc);
88  void remove_word(const std::string & word, Xapian::termcount freqdec);
89 
90  TermList * open_termlist(const std::string & word);
91 
92  Xapian::doccount get_word_frequency(const std::string & word) const;
93 
101  bool is_modified() const {
102  return !wordfreq_changes.empty() || ChertTable::is_modified();
103  }
104 
105  void flush_db() {
106  merge_changes();
108  }
109 
110  void cancel() {
111  // Discard batched-up changes.
112  wordfreq_changes.clear();
113  termlist_deltas.clear();
114 
116  }
117 
118  // @}
119 };
120 
124  std::string data;
125 
127  unsigned p;
128 
130  std::string current_term;
131 
134 
136  void operator=(const ChertSpellingTermList &);
137 
138  public:
140  explicit ChertSpellingTermList(const std::string & data_)
141  : data(data_), p(0) { }
142 
143  Xapian::termcount get_approx_size() const;
144 
145  std::string get_termname() const;
146 
147  Xapian::termcount get_wdf() const;
148 
149  Xapian::doccount get_termfreq() const;
150 
151  Xapian::termcount get_collection_freq() const;
152 
153  TermList * next();
154 
155  TermList * skip_to(const std::string & term);
156 
157  bool at_end() const;
158 
159  Xapian::termcount positionlist_count() const;
160 
161  Xapian::PositionIterator positionlist_begin() const;
162 };
163 
164 #endif // XAPIAN_INCLUDED_CHERT_SPELLING_H
The list of words containing a particular trigram.
std::map< std::string, Xapian::termcount > wordfreq_changes
typedefs for Xapian
char data[4]
fragment(char data_[4])
Abstract base class for termlists.
Definition: termlist.h:39
std::string current_term
The current term.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
ChertSpellingTable(const std::string &dbdir, bool readonly)
Create a new ChertSpellingTable object.
std::map< fragment, std::set< std::string > > termlist_deltas
Changes to make to the termlists.
void cancel()
Cancel any outstanding changes.
bool is_modified() const
Determine whether the object contains uncommitted modifications.
Definition: chert_table.h:653
Class for iterating over term positions.
std::string data
The encoded data.
bool operator<(const fragment &a, const fragment &b)
void flush_db()
Override methods of ChertTable.
void cancel()
Override methods of ChertTable.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Subclass of ChertTable for deriving lazy tables from.
Abstract base class for termlists.
ChertSpellingTermList(const std::string &data_)
Constructor.
void flush_db()
Flush any outstanding changes to the DB file of the table.
unsigned p
Position in the data.
char & operator[](unsigned i)
bool is_modified() const
Override methods of ChertTable.