xapian-core  1.4.25
chert_synonym.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2011 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 #include "chert_synonym.h"
23 
24 #include "xapian/error.h"
25 
26 #include "chert_cursor.h"
27 #include "debuglog.h"
28 #include "stringutils.h"
29 #include "api/vectortermlist.h"
30 
31 #include <set>
32 #include <string>
33 #include <vector>
34 
35 using namespace std;
36 
37 // We XOR the length values with this so that they are more likely to coincide
38 // with lower case ASCII letters, which are likely to be common. This means
39 // that zlib should do a better job of compressing tag values.
40 #define MAGIC_XOR_VALUE 96
41 
42 void
44 {
45  if (last_term.empty()) return;
46 
47  if (last_synonyms.empty()) {
48  del(last_term);
49  } else {
50  string tag;
51 
52  set<string>::const_iterator i;
53  for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
54  const string & synonym = *i;
55  tag += uint8_t(synonym.size() ^ MAGIC_XOR_VALUE);
56  tag += synonym;
57  }
58 
59  add(last_term, tag);
60  last_synonyms.clear();
61  }
62  last_term.resize(0);
63 }
64 
65 void
66 ChertSynonymTable::add_synonym(const string & term, const string & synonym)
67 {
68  if (last_term != term) {
69  merge_changes();
70  last_term = term;
71 
72  string tag;
73  if (get_exact_entry(term, tag)) {
74  const char * p = tag.data();
75  const char * end = p + tag.size();
76  while (p != end) {
77  size_t len;
78  if (p == end ||
79  (len = uint8_t(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
80  throw Xapian::DatabaseCorruptError("Bad synonym data");
81  ++p;
82  last_synonyms.insert(string(p, len));
83  p += len;
84  }
85  }
86  }
87 
88  last_synonyms.insert(synonym);
89 }
90 
91 void
92 ChertSynonymTable::remove_synonym(const string & term, const string & synonym)
93 {
94  if (last_term != term) {
95  merge_changes();
96  last_term = term;
97 
98  string tag;
99  if (get_exact_entry(term, tag)) {
100  const char * p = tag.data();
101  const char * end = p + tag.size();
102  while (p != end) {
103  size_t len;
104  if (p == end ||
105  (len = uint8_t(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
106  throw Xapian::DatabaseCorruptError("Bad synonym data");
107  ++p;
108  last_synonyms.insert(string(p, len));
109  p += len;
110  }
111  }
112  }
113 
114  last_synonyms.erase(synonym);
115 }
116 
117 void
119 {
120  // We don't actually ever need to merge_changes() here, but it's quite
121  // likely that someone might clear_synonyms() and then add_synonym() for
122  // the same term. The alternative we could otherwise optimise for (modify
123  // synonyms for a term, then clear those for another, then modify those for
124  // the first term again) seems much less likely.
125  if (last_term == term) {
126  last_synonyms.clear();
127  } else {
128  merge_changes();
129  last_term = term;
130  }
131 }
132 
133 TermList *
135 {
136  vector<string> synonyms;
137 
138  if (last_term == term) {
139  if (last_synonyms.empty()) return NULL;
140 
141  synonyms.reserve(last_synonyms.size());
142  set<string>::const_iterator i;
143  for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
144  synonyms.push_back(*i);
145  }
146  } else {
147  string tag;
148  if (!get_exact_entry(term, tag)) return NULL;
149 
150  const char * p = tag.data();
151  const char * end = p + tag.size();
152  while (p != end) {
153  size_t len;
154  if (p == end ||
155  (len = uint8_t(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
156  throw Xapian::DatabaseCorruptError("Bad synonym data");
157  ++p;
158  synonyms.push_back(string(p, len));
159  p += len;
160  }
161  }
162 
163  return new VectorTermList(synonyms.begin(), synonyms.end());
164 }
165 
167 
169 {
170  LOGCALL_DTOR(DB, "ChertSynonymTermList");
171  delete cursor;
172 }
173 
174 string
176 {
177  LOGCALL(DB, string, "ChertSynonymTermList::get_termname", NO_ARGS);
178  Assert(cursor);
179  Assert(!cursor->current_key.empty());
180  Assert(!at_end());
181  RETURN(cursor->current_key);
182 }
183 
186 {
187  throw Xapian::InvalidOperationError("ChertSynonymTermList::get_termfreq() not meaningful");
188 }
189 
190 TermList *
192 {
193  LOGCALL(DB, TermList *, "ChertSynonymTermList::next", NO_ARGS);
194  Assert(!at_end());
195 
196  cursor->next();
197  if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
198  // We've reached the end of the prefixed terms.
199  cursor->to_end();
200  }
201 
202  RETURN(NULL);
203 }
204 
205 TermList *
206 ChertSynonymTermList::skip_to(const string &tname)
207 {
208  LOGCALL(DB, TermList *, "ChertSynonymTermList::skip_to", tname);
209  Assert(!at_end());
210 
211  if (!cursor->find_entry_ge(tname)) {
212  // The exact term we asked for isn't there, so check if the next
213  // term after it also has the right prefix.
214  if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
215  // We've reached the end of the prefixed terms.
216  cursor->to_end();
217  }
218  }
219  RETURN(NULL);
220 }
221 
222 bool
224 {
225  LOGCALL(DB, bool, "ChertSynonymTermList::at_end", NO_ARGS);
226  RETURN(cursor->after_end());
227 }
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
~ChertSynonymTermList()
Destructor.
void clear_synonyms(const std::string &term)
Remove all synonyms for term.
TermList * skip_to(const string &tname)
Advance to the first term which is >= tname.
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
Synonym data for a chert database.
TermList * open_termlist(const std::string &term)
Open synonym termlist for a term.
A vector-like container of terms which can be iterated.
string get_termname() const
Returns the current termname.
#define MAGIC_XOR_VALUE
Hierarchy of classes which Xapian can throw as exceptions.
void add_synonym(const std::string &term, const std::string &synonym)
Add a synonym for term.
TermList * next()
Advance to the next term in the list.
Interface to Btree cursors.
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
bool at_end() const
True if we&#39;re off the end of the list.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
This class stores a list of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Various handy helpers which std::string really should provide.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
void remove_synonym(const std::string &term, const std::string &synonym)
Remove a synonym for term.