00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include <xapian/document.h>
00024 #include <xapian/error.h>
00025 #include <xapian/positioniterator.h>
00026 #include <xapian/termiterator.h>
00027 #include <xapian/valueiterator.h>
00028
00029 #include "omassert.h"
00030 #include "omenquireinternal.h"
00031 #include "serialise.h"
00032 #include "serialise-double.h"
00033 #include "stats.h"
00034 #include "utils.h"
00035
00036 #include <string>
00037 #include <string.h>
00038
00039 using namespace std;
00040
00041 string
00042 encode_length(size_t len)
00043 {
00044 string result;
00045 if (len < 255) {
00046 result += static_cast<unsigned char>(len);
00047 } else {
00048 result += '\xff';
00049 len -= 255;
00050 while (true) {
00051 unsigned char byte = static_cast<unsigned char>(len & 0x7f);
00052 len >>= 7;
00053 if (!len) {
00054 result += (byte | static_cast<unsigned char>(0x80));
00055 break;
00056 }
00057 result += byte;
00058 }
00059 }
00060 return result;
00061 }
00062
00063 size_t
00064 decode_length(const char ** p, const char *end, bool check_remaining)
00065 {
00066 if (*p == end) {
00067 throw Xapian::NetworkError("Bad encoded length: no data");
00068 }
00069
00070 size_t len = static_cast<unsigned char>(*(*p)++);
00071 if (len == 0xff) {
00072 len = 0;
00073 unsigned char ch;
00074 int shift = 0;
00075 do {
00076 if (*p == end || shift > 28)
00077 throw Xapian::NetworkError("Bad encoded length: insufficient data");
00078 ch = *(*p)++;
00079 len |= size_t(ch & 0x7f) << shift;
00080 shift += 7;
00081 } while ((ch & 0x80) == 0);
00082 len += 255;
00083 }
00084 if (check_remaining && len > size_t(end - *p)) {
00085 throw Xapian::NetworkError("Bad encoded length: length greater than data");
00086 }
00087 return len;
00088 }
00089
00090 string
00091 serialise_error(const Xapian::Error &e)
00092 {
00093 string result;
00094 result += encode_length(strlen(e.get_type()));
00095 result += e.get_type();
00096 result += encode_length(e.get_context().length());
00097 result += e.get_context();
00098 result += encode_length(e.get_msg().length());
00099 result += e.get_msg();
00100
00101 const char * err = e.get_error_string();
00102 if (err) result += err;
00103 return result;
00104 }
00105
00106 void
00107 unserialise_error(const string &serialised_error, const string &prefix,
00108 const string &new_context)
00109 {
00110
00111 const char * p = serialised_error.c_str();
00112 const char * end = p + serialised_error.size();
00113 size_t len;
00114 len = decode_length(&p, end, true);
00115 if (len == 7 && memcmp(p, "UNKNOWN", 7) == 0) {
00116 throw Xapian::InternalError("UNKNOWN");
00117 }
00118 string type(p, len);
00119 p += len;
00120
00121 len = decode_length(&p, end, true);
00122 string context(p, len);
00123 p += len;
00124
00125 len = decode_length(&p, end, true);
00126 string msg(prefix);
00127 msg.append(p, len);
00128 p += len;
00129
00130 const char * error_string = (p == end) ? NULL : p;
00131
00132 if (!context.empty() && !new_context.empty()) {
00133 msg += "; context was: ";
00134 msg += context;
00135 context = new_context;
00136 }
00137
00138 #include <xapian/errordispatch.h>
00139
00140 string newmsg = "Unknown remote exception type ";
00141 newmsg += type;
00142 newmsg += ": ";
00143 newmsg += msg;
00144 throw Xapian::InternalError(newmsg, context);
00145 }
00146
00147 string serialise_stats(const Stats &stats)
00148 {
00149 string result;
00150
00151 result += encode_length(stats.collection_size);
00152 result += encode_length(stats.rset_size);
00153 result += serialise_double(stats.average_length);
00154
00155 map<string, Xapian::doccount>::const_iterator i;
00156
00157 result += encode_length(stats.termfreq.size());
00158 for (i = stats.termfreq.begin(); i != stats.termfreq.end(); ++i) {
00159 result += encode_length(i->first.size());
00160 result += i->first;
00161 result += encode_length(i->second);
00162 }
00163
00164 for (i = stats.reltermfreq.begin(); i != stats.reltermfreq.end(); ++i) {
00165 result += encode_length(i->first.size());
00166 result += i->first;
00167 result += encode_length(i->second);
00168 }
00169
00170 return result;
00171 }
00172
00173 Stats
00174 unserialise_stats(const string &s)
00175 {
00176 const char * p = s.data();
00177 const char * p_end = p + s.size();
00178
00179 Stats stat;
00180
00181 stat.collection_size = decode_length(&p, p_end, false);
00182 stat.rset_size = decode_length(&p, p_end, false);
00183 stat.average_length = unserialise_double(&p, p_end);
00184
00185 size_t n = decode_length(&p, p_end, false);
00186 while (n--) {
00187 size_t len = decode_length(&p, p_end, true);
00188 string term(p, len);
00189 p += len;
00190 stat.termfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
00191 }
00192
00193 while (p != p_end) {
00194 size_t len = decode_length(&p, p_end, true);
00195 string term(p, len);
00196 p += len;
00197 stat.reltermfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
00198 }
00199
00200 return stat;
00201 }
00202
00203 string
00204 serialise_mset_pre_30_5(const Xapian::MSet &mset)
00205 {
00206 string result;
00207
00208 result += encode_length(mset.get_firstitem());
00209 result += encode_length(mset.get_matches_lower_bound());
00210 result += encode_length(mset.get_matches_estimated());
00211 result += encode_length(mset.get_matches_upper_bound());
00212 result += serialise_double(mset.get_max_possible());
00213 result += serialise_double(mset.get_max_attained());
00214 result += encode_length(mset.size());
00215 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
00216 result += serialise_double(i.get_weight());
00217 result += encode_length(*i);
00218 result += encode_length(i.get_collapse_key().size());
00219 result += i.get_collapse_key();
00220 result += encode_length(i.get_collapse_count());
00221 }
00222
00223 const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
00224 = mset.internal->termfreqandwts;
00225
00226 map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
00227 for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
00228 result += encode_length(j->first.size());
00229 result += j->first;
00230 result += encode_length(j->second.termfreq);
00231 result += serialise_double(j->second.termweight);
00232 }
00233
00234 return result;
00235 }
00236
00237 string
00238 serialise_mset(const Xapian::MSet &mset)
00239 {
00240 string result;
00241
00242 result += encode_length(mset.get_firstitem());
00243 result += encode_length(mset.get_matches_lower_bound());
00244 result += encode_length(mset.get_matches_estimated());
00245 result += encode_length(mset.get_matches_upper_bound());
00246 result += serialise_double(mset.get_max_possible());
00247 result += serialise_double(mset.get_max_attained());
00248
00249 result += serialise_double(mset.internal->percent_factor);
00250
00251 result += encode_length(mset.size());
00252 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
00253 result += serialise_double(i.get_weight());
00254 result += encode_length(*i);
00255 result += encode_length(i.get_collapse_key().size());
00256 result += i.get_collapse_key();
00257 result += encode_length(i.get_collapse_count());
00258 }
00259
00260 const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
00261 = mset.internal->termfreqandwts;
00262
00263 map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
00264 for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
00265 result += encode_length(j->first.size());
00266 result += j->first;
00267 result += encode_length(j->second.termfreq);
00268 result += serialise_double(j->second.termweight);
00269 }
00270
00271 return result;
00272 }
00273
00274 Xapian::MSet
00275 unserialise_mset(const string &s)
00276 {
00277 const char * p = s.data();
00278 const char * p_end = p + s.size();
00279
00280 Xapian::doccount firstitem = decode_length(&p, p_end, false);
00281 Xapian::doccount matches_lower_bound = decode_length(&p, p_end, false);
00282 Xapian::doccount matches_estimated = decode_length(&p, p_end, false);
00283 Xapian::doccount matches_upper_bound = decode_length(&p, p_end, false);
00284 Xapian::weight max_possible = unserialise_double(&p, p_end);
00285 Xapian::weight max_attained = unserialise_double(&p, p_end);
00286
00287 double percent_factor = unserialise_double(&p, p_end);
00288
00289 vector<Xapian::Internal::MSetItem> items;
00290 size_t msize = decode_length(&p, p_end, false);
00291 while (msize-- > 0) {
00292 Xapian::weight wt = unserialise_double(&p, p_end);
00293 Xapian::docid did = decode_length(&p, p_end, false);
00294 size_t len = decode_length(&p, p_end, true);
00295 string key(p, len);
00296 p += len;
00297 items.push_back(Xapian::Internal::MSetItem(wt, did, key,
00298 decode_length(&p, p_end, false)));
00299 }
00300
00301 map<string, Xapian::MSet::Internal::TermFreqAndWeight> terminfo;
00302 while (p != p_end) {
00303 Xapian::MSet::Internal::TermFreqAndWeight tfaw;
00304 size_t len = decode_length(&p, p_end, true);
00305 string term(p, len);
00306 p += len;
00307 tfaw.termfreq = decode_length(&p, p_end, false);
00308 tfaw.termweight = unserialise_double(&p, p_end);
00309 terminfo.insert(make_pair(term, tfaw));
00310 }
00311
00312 return Xapian::MSet(new Xapian::MSet::Internal(
00313 firstitem,
00314 matches_upper_bound,
00315 matches_lower_bound,
00316 matches_estimated,
00317 max_possible, max_attained,
00318 items, terminfo, percent_factor));
00319 }
00320
00321 string
00322 serialise_rset(const Xapian::RSet &rset)
00323 {
00324 const set<Xapian::docid> & items = rset.internal->get_items();
00325 string result;
00326 set<Xapian::docid>::const_iterator i;
00327 Xapian::docid lastdid = 0;
00328 for (i = items.begin(); i != items.end(); ++i) {
00329 Xapian::docid did = *i;
00330 result += encode_length(did - lastdid - 1);
00331 lastdid = did;
00332 }
00333 return result;
00334 }
00335
00336 Xapian::RSet
00337 unserialise_rset(const string &s)
00338 {
00339 Xapian::RSet rset;
00340
00341 const char * p = s.data();
00342 const char * p_end = p + s.size();
00343
00344 Xapian::docid did = 0;
00345 while (p != p_end) {
00346 did += decode_length(&p, p_end, false) + 1;
00347 rset.add_document(did);
00348 }
00349
00350 return rset;
00351 }
00352
00353 string
00354 serialise_document(const Xapian::Document &doc)
00355 {
00356 string result;
00357
00358 size_t n = doc.values_count();
00359 result += encode_length(n);
00360 Xapian::ValueIterator value;
00361 for (value = doc.values_begin(); value != doc.values_end(); ++value) {
00362 result += encode_length(value.get_valueno());
00363 result += encode_length((*value).size());
00364 result += *value;
00365 --n;
00366 }
00367 Assert(n == 0);
00368
00369 n = doc.termlist_count();
00370 result += encode_length(n);
00371 Xapian::TermIterator term;
00372 for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) {
00373 result += encode_length((*term).size());
00374 result += *term;
00375 result += encode_length(term.get_wdf());
00376
00377 size_t x = term.positionlist_count();
00378 result += encode_length(x);
00379 Xapian::PositionIterator pos;
00380 Xapian::termpos oldpos = 0;
00381 for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) {
00382 Xapian::termpos diff = *pos - oldpos;
00383 string delta = encode_length(diff);
00384 result += delta;
00385 oldpos = *pos;
00386 --x;
00387 }
00388 Assert(x == 0);
00389 --n;
00390 }
00391 Assert(n == 0);
00392
00393 result += doc.get_data();
00394 return result;
00395 }
00396
00397 Xapian::Document
00398 unserialise_document(const string &s)
00399 {
00400 Xapian::Document doc;
00401 const char * p = s.data();
00402 const char * p_end = p + s.size();
00403
00404 size_t n_values = decode_length(&p, p_end, false);
00405 while (n_values--) {
00406 Xapian::valueno valno = decode_length(&p, p_end, false);
00407 size_t len = decode_length(&p, p_end, true);
00408 doc.add_value(valno, string(p, len));
00409 p += len;
00410 }
00411
00412 size_t n_terms = decode_length(&p, p_end, false);
00413 while (n_terms--) {
00414 size_t len = decode_length(&p, p_end, true);
00415 string term(p, len);
00416 p += len;
00417
00418
00419 Xapian::termcount wdf = decode_length(&p, p_end, false);
00420 doc.add_term(term, wdf);
00421
00422 size_t n_pos = decode_length(&p, p_end, false);
00423 Xapian::termpos pos = 0;
00424 while (n_pos--) {
00425 pos += decode_length(&p, p_end, false);
00426 doc.add_posting(term, pos, 0);
00427 }
00428 }
00429
00430 doc.set_data(string(p, p_end - p));
00431 return doc;
00432 }