32 #include "../prefix_compressed_strings.h"
40 #include <string_view>
42 using namespace Glass;
48 for (
auto i : termlist_deltas) {
49 const string& key = i.first;
50 const set<string>& changes = i.second;
52 auto d = changes.begin();
53 if (d == changes.end())
continue;
58 if (get_exact_entry(key, current)) {
60 updated.reserve(current.size());
61 while (!in.
at_end() && d != changes.end()) {
62 const string & word = *in;
63 Assert(d != changes.end());
64 int cmp = word.compare(*d);
85 while (d != changes.end()) {
88 if (!updated.empty()) {
94 termlist_deltas.clear();
96 for (
auto&& j : wordfreq_changes) {
97 string key =
"W" + j.first;
103 if (wordfreq > wordfreq_upper_bound)
104 wordfreq_upper_bound = wordfreq;
109 wordfreq_changes.clear();
115 auto i = termlist_deltas.find(frag);
116 if (i == termlist_deltas.end()) {
117 i = termlist_deltas.insert(make_pair(frag, set<string>())).first;
121 auto res = i->second.emplace(word);
124 i->second.erase(res.first);
131 if (word.size() <= 1)
return;
133 auto i = wordfreq_changes.find(word);
134 if (i != wordfreq_changes.end()) {
137 i->second += freqinc;
144 string key =
"W"s.append(word);
146 if (get_exact_entry(key, data)) {
149 const char *
p = data.data();
153 wordfreq_changes.emplace(word, freq + freqinc);
156 wordfreq_changes.emplace(word, freqinc);
166 if (word.size() <= 1)
return freqdec;
168 auto i = wordfreq_changes.find(word);
169 if (i != wordfreq_changes.end()) {
170 if (i->second == 0) {
175 if (freqdec < i->second) {
176 i->second -= freqdec;
179 freqdec -= i->second;
184 string key =
"W"s.append(word);
186 if (!get_exact_entry(key, data)) {
192 const char *
p = data.data();
196 if (freqdec < freq) {
197 wordfreq_changes.emplace(word, freq - freqdec);
203 wordfreq_changes.emplace(word, 0);
221 toggle_fragment(buf, word);
225 buf[1] = word[word.size() - 2];
226 buf[2] = word[word.size() - 1];
228 toggle_fragment(buf, word);
230 if (word.size() <= 4) {
240 toggle_fragment(buf, word);
242 if (word.size() > 2) {
246 for (
size_t start = 0; start <= word.size() - 3; ++start) {
247 memcpy(buf.
data + 1, word.data() + start, 3);
250 if (done.insert(buf).second)
251 toggle_fragment(buf, word);
270 if (!wordfreq_changes.empty()) merge_changes();
272 vector<TermList*> termlists;
281 if (get_exact_entry(
string(buf), data))
286 buf[1] = word[word.size() - 2];
287 buf[2] = word[word.size() - 1];
288 if (get_exact_entry(
string(buf), data))
291 if (word.size() <= 4) {
300 if (get_exact_entry(
string(buf), data))
303 if (word.size() > 2) {
306 for (
size_t start = 0; start <= word.size() - 3; ++start) {
307 memcpy(buf.
data + 1, word.data() + start, 3);
308 if (get_exact_entry(
string(buf), data))
312 if (word.size() == 3) {
319 if (get_exact_entry(
string(buf), data))
325 if (get_exact_entry(
string(buf), data))
337 if (get_exact_entry(
string(buf), data))
340 if (get_exact_entry(
string(buf), data))
348 for (
auto& t : termlists) {
358 auto i = wordfreq_changes.find(word);
359 if (i != wordfreq_changes.end()) {
364 string key =
"W"s.append(word);
366 if (get_exact_entry(key, data)) {
369 const char *
p = data.data();
404 if (
p == data.size()) {
407 if (!current_term.empty()) {
411 if (
p == data.size() ||
414 current_term.append(data.data() +
p + 1, add);
422 while (current_term <
term) {
Xapian::termcount remove_word(std::string_view word, Xapian::termcount freqdec)
void merge_changes()
Merge in batched-up changes.
TermList * open_termlist(std::string_view word)
void toggle_fragment(Glass::fragment frag, std::string_view word)
void toggle_word(std::string_view word)
void add_word(std::string_view word, Xapian::termcount freqinc)
Xapian::doccount get_word_frequency(std::string_view word) const
The list of words containing a particular trigram.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
TermList * skip_to(std::string_view term)
Skip forward to the specified term.
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
TermList * next()
Advance the current position to the next term in the termlist.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
PositionList * positionlist_begin() const
Return PositionList for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
void append(const std::string &word)
DatabaseCorruptError indicates database corruption was detected.
Abstract base class for iterating term positions in a document.
Abstract base class for termlists.
virtual Xapian::termcount get_approx_size() const =0
Return approximate size of this termlist.
UnimplementedError indicates an attempt to use an unimplemented feature.
Hierarchy of classes which Xapian can throw as exceptions.
Collate statistics and calculate the term weights for the ESet.
Spelling correction data for a glass database.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Various assertion macros.
#define AssertRel(A, REL, B)
Pack types into strings and unpack them again.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
bool operator()(const TermList *a, const TermList *b) const
Build tree to merge TermList objects.
TermList * make_termlist_merger(std::vector< TermList * > &termlists)