00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include <xapian/document.h>
00024 #include <xapian/error.h>
00025 #include <xapian/termiterator.h>
00026
00027 #include "flint_termlisttable.h"
00028 #include "flint_utils.h"
00029 #include "debuglog.h"
00030 #include "omassert.h"
00031 #include "str.h"
00032 #include "stringutils.h"
00033
00034 #include <string>
00035
00036 using namespace std;
00037
00038 void
00039 FlintTermListTable::set_termlist(Xapian::docid did,
00040 const Xapian::Document & doc,
00041 flint_doclen_t doclen)
00042 {
00043 LOGCALL_VOID(DB, "FlintTermListTable::set_termlist", did | doc | doclen);
00044
00045 Xapian::doccount termlist_size = doc.termlist_count();
00046 if (termlist_size == 0) {
00047
00048 Assert(doclen == 0);
00049 Assert(doc.termlist_begin() == doc.termlist_end());
00050 add(flint_docid_to_key(did), string());
00051 return;
00052 }
00053
00054 string tag = F_pack_uint(doclen);
00055
00056 Xapian::TermIterator t = doc.termlist_begin();
00057 if (t != doc.termlist_end()) {
00058 tag += F_pack_uint(termlist_size);
00059 string prev_term = *t;
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069 if (prev_term.size() == '0') tag += '0';
00070
00071 tag += char(prev_term.size());
00072 tag += prev_term;
00073 tag += F_pack_uint(t.get_wdf());
00074 --termlist_size;
00075
00076 while (++t != doc.termlist_end()) {
00077 const string & term = *t;
00078
00079
00080
00081 size_t reuse = common_prefix_length(prev_term, term);
00082
00083
00084
00085
00086
00087
00088
00089
00090 size_t packed = 0;
00091 Xapian::termcount wdf = t.get_wdf();
00092
00093
00094
00095 if (wdf < 127)
00096 packed = (wdf + 1) * (prev_term.size() + 1) + reuse;
00097
00098 if (packed && packed < 256) {
00099
00100 tag += char(packed);
00101 tag += char(term.size() - reuse);
00102 tag.append(term.data() + reuse, term.size() - reuse);
00103 } else {
00104 tag += char(reuse);
00105 tag += char(term.size() - reuse);
00106 tag.append(term.data() + reuse, term.size() - reuse);
00107
00108
00109 tag += F_pack_uint(wdf);
00110 }
00111
00112 prev_term = *t;
00113 --termlist_size;
00114 }
00115 }
00116 Assert(termlist_size == 0);
00117 add(flint_docid_to_key(did), tag);
00118 }
00119
00120 flint_doclen_t
00121 FlintTermListTable::get_doclength(Xapian::docid did) const
00122 {
00123 LOGCALL(DB, flint_doclen_t, "FlintTermListTable::get_doclength", did);
00124
00125 string tag;
00126 if (!get_exact_entry(flint_docid_to_key(did), tag))
00127 throw Xapian::DocNotFoundError("No termlist found for document " +
00128 str(did));
00129
00130 if (tag.empty()) RETURN(0);
00131
00132 const char * pos = tag.data();
00133 const char * end = pos + tag.size();
00134
00135 flint_doclen_t doclen;
00136 if (!F_unpack_uint(&pos, end, &doclen)) {
00137 const char *msg;
00138 if (pos == 0) {
00139 msg = "Too little data for doclen in termlist";
00140 } else {
00141 msg = "Overflowed value for doclen in termlist";
00142 }
00143 throw Xapian::DatabaseCorruptError(msg);
00144 }
00145
00146 RETURN(doclen);
00147 }