00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024 #include "chert_termlist.h"
00025
00026 #include "xapian/error.h"
00027
00028 #include "expandweight.h"
00029 #include "chert_positionlist.h"
00030 #include "debuglog.h"
00031 #include "omassert.h"
00032 #include "pack.h"
00033 #include "str.h"
00034
00035 using namespace std;
00036
00037 ChertTermList::ChertTermList(Xapian::Internal::RefCntPtr<const ChertDatabase> db_,
00038 Xapian::docid did_)
00039 : db(db_), did(did_), current_wdf(0), current_termfreq(0)
00040 {
00041 LOGCALL_CTOR(DB, "ChertTermList", db_ | did_);
00042
00043 if (!db->termlist_table.get_exact_entry(ChertTermListTable::make_key(did),
00044 data))
00045 throw Xapian::DocNotFoundError("No termlist for document " + str(did));
00046
00047 pos = data.data();
00048 end = pos + data.size();
00049
00050 if (pos == end) {
00051 doclen = 0;
00052 termlist_size = 0;
00053 return;
00054 }
00055
00056
00057 if (!unpack_uint(&pos, end, &doclen)) {
00058 const char *msg;
00059 if (pos == 0) {
00060 msg = "Too little data for doclen in termlist";
00061 } else {
00062 msg = "Overflowed value for doclen in termlist";
00063 }
00064 throw Xapian::DatabaseCorruptError(msg);
00065 }
00066
00067
00068 if (!unpack_uint(&pos, end, &termlist_size)) {
00069 const char *msg;
00070 if (pos == 0) {
00071 msg = "Too little data for list size in termlist";
00072 } else {
00073 msg = "Overflowed value for list size in termlist";
00074 }
00075 throw Xapian::DatabaseCorruptError(msg);
00076 }
00077 }
00078
00079 chert_doclen_t
00080 ChertTermList::get_doclength() const
00081 {
00082 LOGCALL(DB, chert_doclen_t, "ChertTermList::get_doclength", NO_ARGS);
00083 RETURN(doclen);
00084 }
00085
00086 Xapian::termcount
00087 ChertTermList::get_approx_size() const
00088 {
00089 LOGCALL(DB, Xapian::termcount, "ChertTermList::get_approx_size", NO_ARGS);
00090 RETURN(termlist_size);
00091 }
00092
00093 void
00094 ChertTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
00095 {
00096 LOGCALL_VOID(DB, "ChertTermList::accumulate_stats", stats);
00097 Assert(!at_end());
00098 stats.accumulate(current_wdf, doclen, get_termfreq(), db->get_doccount());
00099 }
00100
00101 string
00102 ChertTermList::get_termname() const
00103 {
00104 LOGCALL(DB, string, "ChertTermList::get_termname", NO_ARGS);
00105 RETURN(current_term);
00106 }
00107
00108 Xapian::termcount
00109 ChertTermList::get_wdf() const
00110 {
00111 LOGCALL(DB, Xapian::termcount, "ChertTermList::get_wdf", NO_ARGS);
00112 RETURN(current_wdf);
00113 }
00114
00115 Xapian::doccount
00116 ChertTermList::get_termfreq() const
00117 {
00118 LOGCALL(DB, Xapian::doccount, "ChertTermList::get_termfreq", NO_ARGS);
00119 if (current_termfreq == 0)
00120 current_termfreq = db->get_termfreq(current_term);
00121 RETURN(current_termfreq);
00122 }
00123
00124 TermList *
00125 ChertTermList::next()
00126 {
00127 LOGCALL(DB, TermList *, "ChertTermList::next", NO_ARGS);
00128 Assert(!at_end());
00129 if (pos == end) {
00130 pos = NULL;
00131 RETURN(NULL);
00132 }
00133
00134
00135 current_termfreq = 0;
00136
00137 bool wdf_in_reuse = false;
00138 if (!current_term.empty()) {
00139
00140 size_t len = static_cast<unsigned char>(*pos++);
00141 if (len > current_term.size()) {
00142
00143 wdf_in_reuse = true;
00144 size_t divisor = current_term.size() + 1;
00145 current_wdf = len / divisor - 1;
00146 len %= divisor;
00147 }
00148 current_term.resize(len);
00149 }
00150
00151
00152 size_t append_len = static_cast<unsigned char>(*pos++);
00153 current_term.append(pos, append_len);
00154 pos += append_len;
00155
00156
00157 if (!wdf_in_reuse && !unpack_uint(&pos, end, ¤t_wdf)) {
00158 const char *msg;
00159 if (pos == 0) {
00160 msg = "Too little data for wdf in termlist";
00161 } else {
00162 msg = "Overflowed value for wdf in termlist";
00163 }
00164 throw Xapian::DatabaseCorruptError(msg);
00165 }
00166
00167 RETURN(NULL);
00168 }
00169
00170 TermList *
00171 ChertTermList::skip_to(const string & term)
00172 {
00173 LOGCALL(API, TermList *, "ChertTermList::skip_to", term);
00174 while (pos != NULL && current_term < term) {
00175 (void)ChertTermList::next();
00176 }
00177 RETURN(NULL);
00178 }
00179
00180 bool
00181 ChertTermList::at_end() const
00182 {
00183 LOGCALL(DB, bool, "ChertTermList::at_end", NO_ARGS);
00184 RETURN(pos == NULL);
00185 }
00186
00187 Xapian::termcount
00188 ChertTermList::positionlist_count() const
00189 {
00190 LOGCALL(DB, Xapian::termcount, "ChertTermList::positionlist_count", NO_ARGS);
00191 RETURN(db->position_table.positionlist_count(did, current_term));
00192 }
00193
00194 Xapian::PositionIterator
00195 ChertTermList::positionlist_begin() const
00196 {
00197 LOGCALL(DB, Xapian::PositionIterator, "ChertTermList::positionlist_begin", NO_ARGS);
00198 return Xapian::PositionIterator(
00199 new ChertPositionList(&db->position_table, did, current_term));
00200 }