32 #include "../prefix_compressed_strings.h"
40 #include <string_view>
42 using namespace Honey;
48 for (
auto i : termlist_deltas) {
49 const string& key = i.first;
50 const set<string>& changes = i.second;
52 auto d = changes.begin();
53 if (d == changes.end())
continue;
58 if (get_exact_entry(key, current)) {
60 updated.reserve(current.size());
61 while (!in.
at_end() && d != changes.end()) {
62 const string& word = *in;
63 Assert(d != changes.end());
64 int cmp = word.compare(*d);
85 while (d != changes.end()) {
88 if (!updated.empty()) {
94 termlist_deltas.clear();
96 for (
auto j = wordfreq_changes.begin(); j != wordfreq_changes.end(); ++j) {
103 if (wordfreq > wordfreq_upper_bound)
104 wordfreq_upper_bound = wordfreq;
109 wordfreq_changes.clear();
115 auto i = termlist_deltas.find(frag);
116 if (i == termlist_deltas.end()) {
117 i = termlist_deltas.insert(make_pair(frag, set<string>())).first;
121 auto res = i->second.insert(word);
124 i->second.erase(res.first);
131 if (word.size() <= 1)
return;
133 auto i = wordfreq_changes.find(word);
134 if (i != wordfreq_changes.end()) {
137 i->second += freqinc;
148 const char*
p = data.data();
152 wordfreq_changes[word] = freq + freqinc;
155 wordfreq_changes[word] = freqinc;
165 if (word.size() <= 1)
return freqdec;
167 auto i = wordfreq_changes.find(word);
168 if (i != wordfreq_changes.end()) {
169 if (i->second == 0) {
174 if (freqdec < i->second) {
175 i->second -= freqdec;
178 freqdec -= i->second;
190 const char*
p = data.data();
194 if (freqdec < freq) {
195 wordfreq_changes[word] = freq - freqdec;
201 wordfreq_changes[word] = 0;
215 if (word.size() <= 4) {
224 buf[2] = word[word.size() - 1];
225 toggle_fragment(buf, word);
232 toggle_fragment(buf, word);
236 buf[1] = word[word.size() - 2];
237 buf[2] = word[word.size() - 1];
238 toggle_fragment(buf, word);
240 if (word.size() > 2) {
244 for (
size_t start = 0; start <= word.size() - 3; ++start) {
245 memcpy(buf.
data + 1, word.data() + start, 3);
248 if (done.insert(buf).second)
249 toggle_fragment(buf, word);
268 if (!wordfreq_changes.empty()) merge_changes();
270 vector<TermList*> termlists;
275 if (word.size() <= 4) {
283 buf[2] = word[word.size() - 1];
284 if (get_exact_entry(
string(buf), data))
292 if (get_exact_entry(
string(buf), data))
295 if (word.size() == 2) {
302 if (get_exact_entry(
string(buf), data))
305 if (get_exact_entry(
string(buf), data))
311 buf[1] = word[word.size() - 2];
312 buf[2] = word[word.size() - 1];
313 if (get_exact_entry(
string(buf), data))
316 if (word.size() > 2) {
319 for (
size_t start = 0; start <= word.size() - 3; ++start) {
320 memcpy(buf.
data + 1, word.data() + start, 3);
321 if (get_exact_entry(
string(buf), data))
325 if (word.size() == 3) {
332 if (get_exact_entry(
string(buf), data))
338 if (get_exact_entry(
string(buf), data))
347 for (
auto& t : termlists) {
357 auto i = wordfreq_changes.find(word);
358 if (i != wordfreq_changes.end()) {
367 const char*
p = data.data();
402 if (
p == data.size()) {
407 if (
rare(tail < 0)) {
409 keep = current_term.size() - tail;
410 }
else if (
usual(!current_term.empty())) {
414 if (
p == data.size() ||
419 if (
rare(keep + tail > current_term.size())) {
422 string tail_string(current_term, current_term.size() - tail);
423 current_term.replace(keep, string::npos, data.data() +
p + 1, add);
424 current_term += tail_string;
426 current_term.replace(keep, current_term.size() - tail - keep,
427 data.data() +
p + 1, add);
437 while (current_term <
term) {
449 "positionlist_count() "
458 "positionlist_begin() "
void merge_changes()
Merge in batched-up changes.
void toggle_word(const std::string &word)
Xapian::termcount remove_word(const std::string &word, Xapian::termcount freqdec)
void toggle_fragment(Honey::fragment frag, const std::string &word)
TermList * open_termlist(std::string_view word)
Xapian::doccount get_word_frequency(std::string_view word) const
void add_word(const std::string &word, Xapian::termcount freqinc)
The list of words containing a particular trigram.
TermList * next()
Advance the current position to the next term in the termlist.
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
PositionList * positionlist_begin() const
Return PositionList for the current position.
TermList * skip_to(std::string_view term)
Skip forward to the specified term.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
void append(const std::string &word)
DatabaseCorruptError indicates database corruption was detected.
Abstract base class for iterating term positions in a document.
Abstract base class for termlists.
virtual Xapian::termcount get_approx_size() const =0
Return approximate size of this termlist.
UnimplementedError indicates an attempt to use an unimplemented feature.
Hierarchy of classes which Xapian can throw as exceptions.
Collate statistics and calculate the term weights for the ESet.
Spelling correction data for a honey database.
const unsigned KEY_PREFIX_MIDDLE
const unsigned KEY_PREFIX_TAIL
std::string make_spelling_wordlist_key(std::string_view word)
const unsigned KEY_PREFIX_BOOKEND
const unsigned KEY_PREFIX_HEAD
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Various assertion macros.
#define AssertRel(A, REL, B)
Pack types into strings and unpack them again.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
bool operator()(const TermList *a, const TermList *b) const
Build tree to merge TermList objects.
TermList * make_termlist_merger(std::vector< TermList * > &termlists)