00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_FLINT_SPELLING_H
00022 #define XAPIAN_INCLUDED_FLINT_SPELLING_H
00023
00024 #include <xapian/types.h>
00025
00026 #include "flint_table.h"
00027 #include "termlist.h"
00028
00029 #include <map>
00030 #include <set>
00031 #include <string>
00032 #include <cstring>
00033
00034 struct F_fragment {
00035 char data[4];
00036
00037
00038 F_fragment() { }
00039
00040
00041 F_fragment(char data_[4]) { std::memcpy(data, data_, 4); }
00042
00043 char & operator[] (unsigned i) { return data[i]; }
00044 const char & operator[] (unsigned i) const { return data[i]; }
00045
00046 operator std::string () const {
00047 return std::string(data, data[0] == 'M' ? 4 : 3);
00048 }
00049 };
00050
00051 inline bool operator<(const F_fragment &a, const F_fragment &b) {
00052 return std::memcmp(a.data, b.data, 4) < 0;
00053 }
00054
00055 class FlintSpellingTable : public FlintTable {
00056 void toggle_word(const std::string & word);
00057 void toggle_fragment(F_fragment frag, const std::string & word);
00058
00059 std::map<std::string, Xapian::termcount> wordfreq_changes;
00060
00069 std::map<F_fragment, std::set<std::string> > termlist_deltas;
00070
00071 public:
00080 FlintSpellingTable(const std::string & dbdir, bool readonly)
00081 : FlintTable("spelling", dbdir + "/spelling.", readonly, Z_DEFAULT_STRATEGY, true) { }
00082
00083
00084 void merge_changes();
00085
00086 void add_word(const std::string & word, Xapian::termcount freqinc);
00087 void remove_word(const std::string & word, Xapian::termcount freqdec);
00088
00089 TermList * open_termlist(const std::string & word);
00090
00091 Xapian::doccount get_word_frequency(const std::string & word) const;
00092
00100 bool is_modified() const {
00101 return !wordfreq_changes.empty() || FlintTable::is_modified();
00102 }
00103
00104 void create_and_open(unsigned int blocksize) {
00105
00106
00107 FlintTable::erase();
00108 FlintTable::set_block_size(blocksize);
00109 }
00110
00111 void flush_db() {
00112 merge_changes();
00113 FlintTable::flush_db();
00114 }
00115
00116 void cancel() {
00117
00118 wordfreq_changes.clear();
00119 termlist_deltas.clear();
00120
00121 FlintTable::cancel();
00122 }
00123
00124
00125 };
00126
00128 class FlintSpellingTermList : public TermList {
00130 std::string data;
00131
00133 unsigned p;
00134
00136 std::string current_term;
00137
00139 FlintSpellingTermList(const FlintSpellingTermList &);
00140
00142 void operator=(const FlintSpellingTermList &);
00143
00144 public:
00146 FlintSpellingTermList(const std::string & data_)
00147 : data(data_), p(0) { }
00148
00149 Xapian::termcount get_approx_size() const;
00150
00151 std::string get_termname() const;
00152
00153 Xapian::termcount get_wdf() const;
00154
00155 Xapian::doccount get_termfreq() const;
00156
00157 Xapian::termcount get_collection_freq() const;
00158
00159 TermList * next();
00160
00161 TermList * skip_to(const std::string & term);
00162
00163 bool at_end() const;
00164
00165 Xapian::termcount positionlist_count() const;
00166
00167 Xapian::PositionIterator positionlist_begin() const;
00168 };
00169
00170 #endif // XAPIAN_INCLUDED_FLINT_SPELLING_H