00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include <xapian/document.h>
00024 #include <xapian/error.h>
00025 #include <xapian/positioniterator.h>
00026 #include <xapian/termiterator.h>
00027 #include <xapian/valueiterator.h>
00028
00029 #include "omassert.h"
00030 #include "omenquireinternal.h"
00031 #include "serialise.h"
00032 #include "serialise-double.h"
00033 #include "utils.h"
00034 #include "weightinternal.h"
00035
00036 #include <string>
00037 #include <cstring>
00038
00039 using namespace std;
00040
00041 size_t
00042 decode_length(const char ** p, const char *end, bool check_remaining)
00043 {
00044 if (*p == end) {
00045 throw Xapian::NetworkError("Bad encoded length: no data");
00046 }
00047
00048 size_t len = static_cast<unsigned char>(*(*p)++);
00049 if (len == 0xff) {
00050 len = 0;
00051 unsigned char ch;
00052 int shift = 0;
00053 do {
00054 if (*p == end || shift > 28)
00055 throw Xapian::NetworkError("Bad encoded length: insufficient data");
00056 ch = *(*p)++;
00057 len |= size_t(ch & 0x7f) << shift;
00058 shift += 7;
00059 } while ((ch & 0x80) == 0);
00060 len += 255;
00061 }
00062 if (check_remaining && len > size_t(end - *p)) {
00063 throw Xapian::NetworkError("Bad encoded length: length greater than data");
00064 }
00065 return len;
00066 }
00067
00068 string
00069 serialise_error(const Xapian::Error &e)
00070 {
00071 string result;
00072 result += encode_length(strlen(e.get_type()));
00073 result += e.get_type();
00074 result += encode_length(e.get_context().length());
00075 result += e.get_context();
00076 result += encode_length(e.get_msg().length());
00077 result += e.get_msg();
00078
00079 const char * err = e.get_error_string();
00080 if (err) result += err;
00081 return result;
00082 }
00083
00084 void
00085 unserialise_error(const string &serialised_error, const string &prefix,
00086 const string &new_context)
00087 {
00088
00089 const char * p = serialised_error.c_str();
00090 const char * end = p + serialised_error.size();
00091 size_t len;
00092 len = decode_length(&p, end, true);
00093 if (len == 7 && memcmp(p, "UNKNOWN", 7) == 0) {
00094 throw Xapian::InternalError("UNKNOWN");
00095 }
00096 string type(p, len);
00097 p += len;
00098
00099 len = decode_length(&p, end, true);
00100 string context(p, len);
00101 p += len;
00102
00103 len = decode_length(&p, end, true);
00104 string msg(prefix);
00105 msg.append(p, len);
00106 p += len;
00107
00108 const char * error_string = (p == end) ? NULL : p;
00109
00110 if (!context.empty() && !new_context.empty()) {
00111 msg += "; context was: ";
00112 msg += context;
00113 context = new_context;
00114 }
00115
00116 #include <xapian/errordispatch.h>
00117
00118 string newmsg = "Unknown remote exception type ";
00119 newmsg += type;
00120 newmsg += ": ";
00121 newmsg += msg;
00122 throw Xapian::InternalError(newmsg, context);
00123 }
00124
00125 string
00126 serialise_stats(const Xapian::Weight::Internal &stats)
00127 {
00128 string result;
00129
00130 result += encode_length(stats.total_length);
00131 result += encode_length(stats.collection_size);
00132 result += encode_length(stats.rset_size);
00133
00134 result += encode_length(stats.termfreqs.size());
00135 map<string, TermFreqs>::const_iterator i;
00136 for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) {
00137 result += encode_length(i->first.size());
00138 result += i->first;
00139 result += encode_length(i->second.termfreq);
00140 if (stats.rset_size != 0)
00141 result += encode_length(i->second.reltermfreq);
00142 }
00143
00144 return result;
00145 }
00146
00147 Xapian::Weight::Internal
00148 unserialise_stats(const string &s)
00149 {
00150 const char * p = s.data();
00151 const char * p_end = p + s.size();
00152
00153 Xapian::Weight::Internal stat;
00154
00155 stat.total_length = decode_length(&p, p_end, false);
00156 stat.collection_size = decode_length(&p, p_end, false);
00157 stat.rset_size = decode_length(&p, p_end, false);
00158
00159 size_t n = decode_length(&p, p_end, false);
00160 while (n--) {
00161 size_t len = decode_length(&p, p_end, true);
00162 string term(p, len);
00163 p += len;
00164 Xapian::doccount termfreq(decode_length(&p, p_end, false));
00165 if (stat.rset_size == 0) {
00166 stat.termfreqs.insert(make_pair(term, TermFreqs(termfreq, 0)));
00167 } else {
00168 Xapian::doccount reltermfreq(decode_length(&p, p_end, false));
00169 stat.termfreqs.insert(make_pair(term,
00170 TermFreqs(termfreq, reltermfreq)));
00171 }
00172 }
00173
00174 return stat;
00175 }
00176
00177 string
00178 serialise_mset(const Xapian::MSet &mset)
00179 {
00180 string result;
00181
00182 result += encode_length(mset.get_firstitem());
00183 result += encode_length(mset.get_matches_lower_bound());
00184 result += encode_length(mset.get_matches_estimated());
00185 result += encode_length(mset.get_matches_upper_bound());
00186 result += encode_length(mset.get_uncollapsed_matches_lower_bound());
00187 result += encode_length(mset.get_uncollapsed_matches_estimated());
00188 result += encode_length(mset.get_uncollapsed_matches_upper_bound());
00189 result += serialise_double(mset.get_max_possible());
00190 result += serialise_double(mset.get_max_attained());
00191
00192 result += serialise_double(mset.internal->percent_factor);
00193
00194 result += encode_length(mset.size());
00195 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
00196 result += serialise_double(i.get_weight());
00197 result += encode_length(*i);
00198 result += encode_length(i.get_collapse_key().size());
00199 result += i.get_collapse_key();
00200 result += encode_length(i.get_collapse_count());
00201 }
00202
00203 const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
00204 = mset.internal->termfreqandwts;
00205
00206 map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
00207 for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
00208 result += encode_length(j->first.size());
00209 result += j->first;
00210 result += encode_length(j->second.termfreq);
00211 result += serialise_double(j->second.termweight);
00212 }
00213
00214 return result;
00215 }
00216
00217 Xapian::MSet
00218 unserialise_mset(const char * p, const char * p_end)
00219 {
00220 Xapian::doccount firstitem = decode_length(&p, p_end, false);
00221 Xapian::doccount matches_lower_bound = decode_length(&p, p_end, false);
00222 Xapian::doccount matches_estimated = decode_length(&p, p_end, false);
00223 Xapian::doccount matches_upper_bound = decode_length(&p, p_end, false);
00224 Xapian::doccount uncollapsed_lower_bound = decode_length(&p, p_end, false);
00225 Xapian::doccount uncollapsed_estimated = decode_length(&p, p_end, false);
00226 Xapian::doccount uncollapsed_upper_bound = decode_length(&p, p_end, false);
00227 Xapian::weight max_possible = unserialise_double(&p, p_end);
00228 Xapian::weight max_attained = unserialise_double(&p, p_end);
00229
00230 double percent_factor = unserialise_double(&p, p_end);
00231
00232 vector<Xapian::Internal::MSetItem> items;
00233 size_t msize = decode_length(&p, p_end, false);
00234 while (msize-- > 0) {
00235 Xapian::weight wt = unserialise_double(&p, p_end);
00236 Xapian::docid did = decode_length(&p, p_end, false);
00237 size_t len = decode_length(&p, p_end, true);
00238 string key(p, len);
00239 p += len;
00240 Xapian::doccount collapse_cnt = decode_length(&p, p_end, false);
00241 items.push_back(Xapian::Internal::MSetItem(wt, did, key, collapse_cnt));
00242 }
00243
00244 map<string, Xapian::MSet::Internal::TermFreqAndWeight> terminfo;
00245 while (p != p_end) {
00246 Xapian::MSet::Internal::TermFreqAndWeight tfaw;
00247 size_t len = decode_length(&p, p_end, true);
00248 string term(p, len);
00249 p += len;
00250 tfaw.termfreq = decode_length(&p, p_end, false);
00251 tfaw.termweight = unserialise_double(&p, p_end);
00252 terminfo.insert(make_pair(term, tfaw));
00253 }
00254
00255 return Xapian::MSet(new Xapian::MSet::Internal(
00256 firstitem,
00257 matches_upper_bound,
00258 matches_lower_bound,
00259 matches_estimated,
00260 uncollapsed_upper_bound,
00261 uncollapsed_lower_bound,
00262 uncollapsed_estimated,
00263 max_possible, max_attained,
00264 items, terminfo, percent_factor));
00265 }
00266
00267 string
00268 serialise_rset(const Xapian::RSet &rset)
00269 {
00270 const set<Xapian::docid> & items = rset.internal->get_items();
00271 string result;
00272 set<Xapian::docid>::const_iterator i;
00273 Xapian::docid lastdid = 0;
00274 for (i = items.begin(); i != items.end(); ++i) {
00275 Xapian::docid did = *i;
00276 result += encode_length(did - lastdid - 1);
00277 lastdid = did;
00278 }
00279 return result;
00280 }
00281
00282 Xapian::RSet
00283 unserialise_rset(const string &s)
00284 {
00285 Xapian::RSet rset;
00286
00287 const char * p = s.data();
00288 const char * p_end = p + s.size();
00289
00290 Xapian::docid did = 0;
00291 while (p != p_end) {
00292 did += decode_length(&p, p_end, false) + 1;
00293 rset.add_document(did);
00294 }
00295
00296 return rset;
00297 }
00298
00299 string
00300 serialise_document(const Xapian::Document &doc)
00301 {
00302 string result;
00303
00304 size_t n = doc.values_count();
00305 result += encode_length(n);
00306 Xapian::ValueIterator value;
00307 for (value = doc.values_begin(); value != doc.values_end(); ++value) {
00308 result += encode_length(value.get_valueno());
00309 result += encode_length((*value).size());
00310 result += *value;
00311 --n;
00312 }
00313 Assert(n == 0);
00314
00315 n = doc.termlist_count();
00316 result += encode_length(n);
00317 Xapian::TermIterator term;
00318 for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) {
00319 result += encode_length((*term).size());
00320 result += *term;
00321 result += encode_length(term.get_wdf());
00322
00323 size_t x = term.positionlist_count();
00324 result += encode_length(x);
00325 Xapian::PositionIterator pos;
00326 Xapian::termpos oldpos = 0;
00327 for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) {
00328 Xapian::termpos diff = *pos - oldpos;
00329 string delta = encode_length(diff);
00330 result += delta;
00331 oldpos = *pos;
00332 --x;
00333 }
00334 Assert(x == 0);
00335 --n;
00336 }
00337 Assert(n == 0);
00338
00339 result += doc.get_data();
00340 return result;
00341 }
00342
00343 Xapian::Document
00344 unserialise_document(const string &s)
00345 {
00346 Xapian::Document doc;
00347 const char * p = s.data();
00348 const char * p_end = p + s.size();
00349
00350 size_t n_values = decode_length(&p, p_end, false);
00351 while (n_values--) {
00352 Xapian::valueno slot = decode_length(&p, p_end, false);
00353 size_t len = decode_length(&p, p_end, true);
00354 doc.add_value(slot, string(p, len));
00355 p += len;
00356 }
00357
00358 size_t n_terms = decode_length(&p, p_end, false);
00359 while (n_terms--) {
00360 size_t len = decode_length(&p, p_end, true);
00361 string term(p, len);
00362 p += len;
00363
00364
00365 Xapian::termcount wdf = decode_length(&p, p_end, false);
00366 doc.add_term(term, wdf);
00367
00368 size_t n_pos = decode_length(&p, p_end, false);
00369 Xapian::termpos pos = 0;
00370 while (n_pos--) {
00371 pos += decode_length(&p, p_end, false);
00372 doc.add_posting(term, pos, 0);
00373 }
00374 }
00375
00376 doc.set_data(string(p, p_end - p));
00377 return doc;
00378 }