xapian-core  2.0.0
glass_synonym.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2011,2017,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 #include "glass_synonym.h"
23 
24 #include "xapian/error.h"
25 
26 #include "glass_cursor.h"
27 #include "glass_database.h"
28 #include "debuglog.h"
29 #include "stringutils.h"
30 #include "api/vectortermlist.h"
31 
32 #include <set>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36 
37 using namespace std;
38 
39 // We XOR the length values with this so that they are more likely to coincide
40 // with lower case ASCII letters, which are likely to be common. This means
41 // that zlib should do a better job of compressing tag values.
42 #define MAGIC_XOR_VALUE 96
43 
44 void
46 {
47  if (last_term.empty()) return;
48 
49  if (last_synonyms.empty()) {
50  del(last_term);
51  } else {
52  string tag;
53  for (const auto& synonym : last_synonyms) {
54  tag += uint8_t(synonym.size() ^ MAGIC_XOR_VALUE);
55  tag += synonym;
56  }
57  add(last_term, tag);
58  last_synonyms.clear();
59  }
60  last_term.resize(0);
61 }
62 
63 void
64 GlassSynonymTable::add_synonym(string_view term, string_view synonym)
65 {
66  if (last_term != term) {
67  merge_changes();
68  last_term = term;
69 
70  string tag;
71  if (get_exact_entry(term, tag)) {
72  const char * p = tag.data();
73  const char * end = p + tag.size();
74  while (p != end) {
75  size_t len;
76  if (p == end ||
77  (len = uint8_t(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
78  throw Xapian::DatabaseCorruptError("Bad synonym data");
79  ++p;
80  last_synonyms.insert(string(p, len));
81  p += len;
82  }
83  }
84  }
85 
86  last_synonyms.emplace(synonym);
87 }
88 
89 void
90 GlassSynonymTable::remove_synonym(string_view term, string_view synonym)
91 {
92  if (last_term != term) {
93  merge_changes();
94  last_term = term;
95 
96  string tag;
97  if (get_exact_entry(term, tag)) {
98  const char * p = tag.data();
99  const char * end = p + tag.size();
100  while (p != end) {
101  size_t len;
102  if (p == end ||
103  (len = uint8_t(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
104  throw Xapian::DatabaseCorruptError("Bad synonym data");
105  ++p;
106  last_synonyms.emplace(p, len);
107  p += len;
108  }
109  }
110  }
111 
112 #ifdef __cpp_lib_associative_heterogeneous_erasure // C++23
113  last_synonyms.erase(synonym);
114 #else
115  last_synonyms.erase(string(synonym));
116 #endif
117 }
118 
119 void
121 {
122  // We don't actually ever need to merge_changes() here, but it's quite
123  // likely that someone might clear_synonyms() and then add_synonym() for
124  // the same term. The alternative we could otherwise optimise for (modify
125  // synonyms for a term, then clear those for another, then modify those for
126  // the first term again) seems much less likely.
127  if (last_term == term) {
128  last_synonyms.clear();
129  } else {
130  merge_changes();
131  last_term = term;
132  }
133 }
134 
135 TermList*
137 {
138  vector<string> synonyms;
139 
140  if (last_term == term) {
141  if (last_synonyms.empty()) return NULL;
142 
143  synonyms.reserve(last_synonyms.size());
144  for (const auto& i : last_synonyms) {
145  synonyms.push_back(i);
146  }
147  } else {
148  string tag;
149  if (!get_exact_entry(term, tag)) return NULL;
150 
151  const char * p = tag.data();
152  const char * end = p + tag.size();
153  while (p != end) {
154  size_t len;
155  if (p == end ||
156  (len = uint8_t(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
157  throw Xapian::DatabaseCorruptError("Bad synonym data");
158  ++p;
159  synonyms.push_back(string(p, len));
160  p += len;
161  }
162  }
163 
164  return new VectorTermList(synonyms.begin(), synonyms.end());
165 }
166 
168 
170 {
171  LOGCALL_DTOR(DB, "GlassSynonymTermList");
172  delete cursor;
173 }
174 
177 {
178  // This is an over-estimate, but we only use this value to build a balanced
179  // or-tree, and it'll do a decent enough job for that.
180  return database->synonym_table.get_entry_count();
181 }
182 
185 {
186  throw Xapian::InvalidOperationError("GlassSynonymTermList::get_termfreq() not meaningful");
187 }
188 
189 TermList *
191 {
192  LOGCALL(DB, TermList *, "GlassSynonymTermList::next", NO_ARGS);
193  Assert(!cursor->after_end());
194 
195  if (!cursor->next() || !startswith(cursor->current_key, prefix)) {
196  // We've reached the end of the prefixed terms.
197  RETURN(this);
198  }
199  current_term = cursor->current_key;
200 
201  RETURN(NULL);
202 }
203 
204 TermList*
206 {
207  LOGCALL(DB, TermList *, "GlassSynonymTermList::skip_to", tname);
208  Assert(!cursor->after_end());
209 
210  if (cursor->find_entry_ge(tname)) {
211  // Exact match.
212  current_term = tname;
213  } else {
214  // The exact term we asked for isn't there, so check if the next
215  // term after it also has the right prefix.
216  if (cursor->after_end() || !startswith(cursor->current_key, prefix)) {
217  // We've reached the end of the prefixed terms.
218  RETURN(this);
219  }
220  current_term = cursor->current_key;
221  }
222  RETURN(NULL);
223 }
void clear_synonyms(std::string_view term)
Remove all synonyms for term.
void add_synonym(std::string_view term, std::string_view synonym)
Add a synonym for term.
TermList * open_termlist(std::string_view term)
Open synonym termlist for a term.
void remove_synonym(std::string_view term, std::string_view synonym)
Remove a synonym for term.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
TermList * skip_to(std::string_view tname)
Advance to the first term which is >= tname.
~GlassSynonymTermList()
Destructor.
TermList * next()
Advance to the next term in the list.
This class stores a list of terms.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
Abstract base class for termlists.
Definition: termlist.h:42
string term
PositionList * p
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:481
Hierarchy of classes which Xapian can throw as exceptions.
Interface to Btree cursors.
C++ class definition for glass database.
#define MAGIC_XOR_VALUE
Synonym data for a glass database.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
#define Assert(COND)
Definition: omassert.h:122
Various handy string-related helpers.
bool startswith(std::string_view s, char pfx)
Definition: stringutils.h:56
A vector-like container of terms which can be iterated.