00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024 #include "flint_termlist.h"
00025
00026 #include "xapian/error.h"
00027
00028 #include "expandweight.h"
00029 #include "flint_positionlist.h"
00030 #include "flint_utils.h"
00031 #include "debuglog.h"
00032 #include "omassert.h"
00033 #include "str.h"
00034
00035 using namespace std;
00036
00037 FlintTermList::FlintTermList(Xapian::Internal::RefCntPtr<const FlintDatabase> db_,
00038 Xapian::docid did_)
00039 : db(db_), did(did_), current_wdf(0), current_termfreq(0)
00040 {
00041 LOGCALL_CTOR(DB, "FlintTermList", db_ | did_);
00042
00043 if (!db->termlist_table.get_exact_entry(flint_docid_to_key(did), data))
00044 throw Xapian::DocNotFoundError("No termlist for document " + str(did));
00045
00046 pos = data.data();
00047 end = pos + data.size();
00048
00049 if (pos == end) {
00050 doclen = 0;
00051 termlist_size = 0;
00052 return;
00053 }
00054
00055
00056 if (!F_unpack_uint(&pos, end, &doclen)) {
00057 const char *msg;
00058 if (pos == 0) {
00059 msg = "Too little data for doclen in termlist";
00060 } else {
00061 msg = "Overflowed value for doclen in termlist";
00062 }
00063 throw Xapian::DatabaseCorruptError(msg);
00064 }
00065
00066
00067 if (!F_unpack_uint(&pos, end, &termlist_size)) {
00068 const char *msg;
00069 if (pos == 0) {
00070 msg = "Too little data for list size in termlist";
00071 } else {
00072 msg = "Overflowed value for list size in termlist";
00073 }
00074 throw Xapian::DatabaseCorruptError(msg);
00075 }
00076
00077
00078
00079 if (pos != end && *pos == '0') ++pos;
00080 }
00081
00082 flint_doclen_t
00083 FlintTermList::get_doclength() const
00084 {
00085 LOGCALL(DB, flint_doclen_t, "FlintTermList::get_doclength", NO_ARGS);
00086 RETURN(doclen);
00087 }
00088
00089 Xapian::termcount
00090 FlintTermList::get_approx_size() const
00091 {
00092 LOGCALL(DB, Xapian::termcount, "FlintTermList::get_approx_size", NO_ARGS);
00093 RETURN(termlist_size);
00094 }
00095
00096 void
00097 FlintTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
00098 {
00099 LOGCALL_VOID(DB, "FlintTermList::accumulate_stats", stats);
00100 Assert(!at_end());
00101 stats.accumulate(current_wdf, doclen, get_termfreq(), db->get_doccount());
00102 }
00103
00104 string
00105 FlintTermList::get_termname() const
00106 {
00107 LOGCALL(DB, string, "FlintTermList::get_termname", NO_ARGS);
00108 RETURN(current_term);
00109 }
00110
00111 Xapian::termcount
00112 FlintTermList::get_wdf() const
00113 {
00114 LOGCALL(DB, Xapian::termcount, "FlintTermList::get_wdf", NO_ARGS);
00115 RETURN(current_wdf);
00116 }
00117
00118 Xapian::doccount
00119 FlintTermList::get_termfreq() const
00120 {
00121 LOGCALL(DB, Xapian::doccount, "FlintTermList::get_termfreq", NO_ARGS);
00122 if (current_termfreq == 0)
00123 current_termfreq = db->get_termfreq(current_term);
00124 RETURN(current_termfreq);
00125 }
00126
00127 TermList *
00128 FlintTermList::next()
00129 {
00130 LOGCALL(DB, TermList *, "FlintTermList::next", NO_ARGS);
00131 Assert(!at_end());
00132 if (pos == end) {
00133 pos = NULL;
00134 RETURN(NULL);
00135 }
00136
00137
00138 current_termfreq = 0;
00139
00140 bool wdf_in_reuse = false;
00141 if (!current_term.empty()) {
00142
00143 size_t len = static_cast<unsigned char>(*pos++);
00144 if (len > current_term.size()) {
00145
00146 wdf_in_reuse = true;
00147 size_t divisor = current_term.size() + 1;
00148 current_wdf = len / divisor - 1;
00149 len %= divisor;
00150 }
00151 current_term.resize(len);
00152 }
00153
00154
00155 size_t append_len = static_cast<unsigned char>(*pos++);
00156 current_term.append(pos, append_len);
00157 pos += append_len;
00158
00159
00160 if (!wdf_in_reuse && !F_unpack_uint(&pos, end, ¤t_wdf)) {
00161 const char *msg;
00162 if (pos == 0) {
00163 msg = "Too little data for wdf in termlist";
00164 } else {
00165 msg = "Overflowed value for wdf in termlist";
00166 }
00167 throw Xapian::DatabaseCorruptError(msg);
00168 }
00169
00170 RETURN(NULL);
00171 }
00172
00173 TermList *
00174 FlintTermList::skip_to(const string & term)
00175 {
00176 LOGCALL(API, TermList *, "FlintTermList::skip_to", term);
00177 while (pos != NULL && current_term < term) {
00178 (void)FlintTermList::next();
00179 }
00180 RETURN(NULL);
00181 }
00182
00183 bool
00184 FlintTermList::at_end() const
00185 {
00186 LOGCALL(DB, bool, "FlintTermList::at_end", NO_ARGS);
00187 RETURN(pos == NULL);
00188 }
00189
00190 Xapian::termcount
00191 FlintTermList::positionlist_count() const
00192 {
00193 LOGCALL(DB, Xapian::termcount, "FlintTermList::positionlist_count", NO_ARGS);
00194 RETURN(db->position_table.positionlist_count(did, current_term));
00195 }
00196
00197 Xapian::PositionIterator
00198 FlintTermList::positionlist_begin() const
00199 {
00200 LOGCALL(DB, Xapian::PositionIterator, "FlintTermList::positionlist_begin", NO_ARGS);
00201 return Xapian::PositionIterator(
00202 new FlintPositionList(&db->position_table, did, current_term));
00203 }