00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022 #include "flint_synonym.h"
00023
00024 #include "xapian/error.h"
00025
00026 #include "debuglog.h"
00027 #include "flint_cursor.h"
00028 #include "flint_utils.h"
00029 #include "stringutils.h"
00030 #include "vectortermlist.h"
00031
00032 #include <set>
00033 #include <string>
00034 #include <vector>
00035
00036 using namespace std;
00037
00038
00039
00040
00041 #define MAGIC_XOR_VALUE 96
00042
00043 void
00044 FlintSynonymTable::merge_changes()
00045 {
00046 if (last_term.empty()) return;
00047
00048 if (last_synonyms.empty()) {
00049 del(last_term);
00050 } else {
00051 string tag;
00052
00053 set<string>::const_iterator i;
00054 for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
00055 const string & synonym = *i;
00056 tag += byte(synonym.size() ^ MAGIC_XOR_VALUE);
00057 tag += synonym;
00058 }
00059
00060 add(last_term, tag);
00061 last_synonyms.clear();
00062 }
00063 last_term.resize(0);
00064 }
00065
00066 void
00067 FlintSynonymTable::add_synonym(const string & term, const string & synonym)
00068 {
00069 if (last_term != term) {
00070 merge_changes();
00071 last_term = term;
00072
00073 string tag;
00074 if (get_exact_entry(term, tag)) {
00075 const char * p = tag.data();
00076 const char * end = p + tag.size();
00077 while (p != end) {
00078 size_t len;
00079 if (p == end ||
00080 (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00081 throw Xapian::DatabaseCorruptError("Bad synonym data");
00082 ++p;
00083 last_synonyms.insert(string(p, len));
00084 p += len;
00085 }
00086 }
00087 }
00088
00089 last_synonyms.insert(synonym);
00090 }
00091
00092 void
00093 FlintSynonymTable::remove_synonym(const string & term, const string & synonym)
00094 {
00095 if (last_term != term) {
00096 merge_changes();
00097 last_term = term;
00098
00099 string tag;
00100 if (get_exact_entry(term, tag)) {
00101 const char * p = tag.data();
00102 const char * end = p + tag.size();
00103 while (p != end) {
00104 size_t len;
00105 if (p == end ||
00106 (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00107 throw Xapian::DatabaseCorruptError("Bad synonym data");
00108 ++p;
00109 last_synonyms.insert(string(p, len));
00110 p += len;
00111 }
00112 }
00113 }
00114
00115 last_synonyms.erase(synonym);
00116 }
00117
00118 void
00119 FlintSynonymTable::clear_synonyms(const string & term)
00120 {
00121
00122
00123
00124
00125
00126 if (last_term == term) {
00127 last_synonyms.clear();
00128 } else {
00129 merge_changes();
00130 last_term = term;
00131 }
00132 }
00133
00134 TermList *
00135 FlintSynonymTable::open_termlist(const string & term)
00136 {
00137 vector<string> synonyms;
00138
00139 if (last_term == term) {
00140 if (last_synonyms.empty()) return NULL;
00141
00142 synonyms.reserve(last_synonyms.size());
00143 set<string>::const_iterator i;
00144 for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
00145 synonyms.push_back(*i);
00146 }
00147 } else {
00148 string tag;
00149 if (!get_exact_entry(term, tag)) return NULL;
00150
00151 const char * p = tag.data();
00152 const char * end = p + tag.size();
00153 while (p != end) {
00154 size_t len;
00155 if (p == end ||
00156 (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
00157 throw Xapian::DatabaseCorruptError("Bad synonym data");
00158 ++p;
00159 synonyms.push_back(string(p, len));
00160 p += len;
00161 }
00162 }
00163
00164 return new VectorTermList(synonyms.begin(), synonyms.end());
00165 }
00166
00168
00169 FlintSynonymTermList::~FlintSynonymTermList()
00170 {
00171 LOGCALL_DTOR(DB, "FlintSynonymTermList");
00172 delete cursor;
00173 }
00174
00175 string
00176 FlintSynonymTermList::get_termname() const
00177 {
00178 LOGCALL(DB, string, "FlintSynonymTermList::get_termname", NO_ARGS);
00179 Assert(cursor);
00180 Assert(!cursor->current_key.empty());
00181 Assert(!at_end());
00182 RETURN(cursor->current_key);
00183 }
00184
00185 Xapian::doccount
00186 FlintSynonymTermList::get_termfreq() const
00187 {
00188 throw Xapian::InvalidOperationError("FlintSynonymTermList::get_termfreq() not meaningful");
00189 }
00190
00191 Xapian::termcount
00192 FlintSynonymTermList::get_collection_freq() const
00193 {
00194 throw Xapian::InvalidOperationError("FlintSynonymTermList::get_collection_freq() not meaningful");
00195 }
00196
00197 TermList *
00198 FlintSynonymTermList::next()
00199 {
00200 LOGCALL(DB, TermList *, "FlintSynonymTermList::next", NO_ARGS);
00201 Assert(!at_end());
00202
00203 cursor->next();
00204 if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
00205
00206 cursor->to_end();
00207 }
00208
00209 RETURN(NULL);
00210 }
00211
00212 TermList *
00213 FlintSynonymTermList::skip_to(const string &tname)
00214 {
00215 LOGCALL(DB, TermList *, "FlintSynonymTermList::skip_to", tname);
00216 Assert(!at_end());
00217
00218 if (!cursor->find_entry_ge(tname)) {
00219
00220
00221 if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
00222
00223 cursor->to_end();
00224 }
00225 }
00226 RETURN(NULL);
00227 }
00228
00229 bool
00230 FlintSynonymTermList::at_end() const
00231 {
00232 LOGCALL(DB, bool, "FlintSynonymTermList::at_end", NO_ARGS);
00233 RETURN(cursor->after_end());
00234 }