xapian-core  2.0.0
honey_spellingwordslist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2017,2018,2024 Olly Betts
5  * Copyright (C) 2007 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
25 
26 #include "xapian/error.h"
27 #include "xapian/types.h"
28 
29 #include "debuglog.h"
30 #include "honey_database.h"
31 #include "pack.h"
32 #include "stringutils.h"
33 
34 using namespace std;
35 
37 {
38  LOGCALL_DTOR(DB, "HoneySpellingWordsList");
39  delete cursor;
40 }
41 
44 {
45  // This is an over-estimate, but we only use this value to build a balanced
46  // or-tree, and it'll do a decent enough job for that.
47  return database->spelling_table.get_approx_entry_count();
48 }
49 
52 {
53  LOGCALL(DB, Xapian::doccount, "HoneySpellingWordsList::get_termfreq", NO_ARGS);
54  Assert(cursor);
55  Assert(!cursor->after_end());
56  Assert(!cursor->current_key.empty());
57  AssertRel(static_cast<unsigned char>(cursor->current_key[0]), >=,
59  cursor->read_tag();
60 
61  Xapian::termcount freq;
62  const char* p = cursor->current_tag.data();
63  if (!unpack_uint_last(&p, p + cursor->current_tag.size(), &freq)) {
64  throw Xapian::DatabaseCorruptError("Bad spelling word freq");
65  }
66  RETURN(freq);
67 }
68 
69 TermList*
71 {
72  LOGCALL(DB, TermList*, "HoneySpellingWordsList::next", NO_ARGS);
73  Assert(cursor);
74 
75  if (cursor->after_end()) {
76  // This is the first action on a new HoneySpellingWordsList.
77  (void)cursor->find_entry_ge(string(1, char(Honey::KEY_PREFIX_WORD)));
78  } else {
79  cursor->next();
80  }
81  if (cursor->after_end()) {
82  // We've reached the end of the prefixed terms.
83  RETURN(this);
84  }
85  const string& key = cursor->current_key;
86  unsigned char first = key[0];
88  if (first > Honey::KEY_PREFIX_WORD) {
89  current_term = key;
90  } else {
91  current_term.assign(key, 1);
92  }
93  RETURN(NULL);
94 }
95 
96 TermList*
98 {
99  LOGCALL(DB, TermList*, "HoneySpellingWordsList::skip_to", term);
100  Assert(cursor);
101 
102  if (cursor->find_entry_ge(Honey::make_spelling_wordlist_key(term))) {
103  // Exact match.
104  current_term = term;
105  } else {
106  // The exact term we asked for isn't there, so check if the next term
107  // after it also has a W prefix.
108  if (cursor->after_end()) {
109  // We've reached the end of the prefixed terms.
110  RETURN(this);
111  }
112  const string& key = cursor->current_key;
113  unsigned char first = key[0];
114  AssertRel(first, >=, Honey::KEY_PREFIX_WORD);
115  if (first > Honey::KEY_PREFIX_WORD) {
116  current_term = key;
117  } else {
118  current_term.assign(key, 1);
119  }
120  }
121  RETURN(NULL);
122 }
Xapian::doccount get_termfreq() const
Returns the term frequency of the current term.
TermList * next()
Advance to the next term in the list.
TermList * skip_to(std::string_view term)
Advance to the first term which is >= term.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
Abstract base class for termlists.
Definition: termlist.h:42
string term
PositionList * p
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:481
Hierarchy of classes which Xapian can throw as exceptions.
Database using honey backend.
A termlist containing all words which are spelling targets.
const unsigned KEY_PREFIX_WORD
std::string make_spelling_wordlist_key(std::string_view word)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define Assert(COND)
Definition: omassert.h:122
Pack types into strings and unpack them again.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
Definition: pack.h:118
Various handy string-related helpers.
typedefs for Xapian