00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "dbcheck.h"
00025
00026 #include "utils.h"
00027 #include "testsuite.h"
00028
00029 using namespace std;
00030
00031 string
00032 positions_to_string(Xapian::PositionIterator & it,
00033 const Xapian::PositionIterator & end,
00034 Xapian::termcount * count)
00035 {
00036 string result;
00037 bool need_comma = false;
00038 Xapian::termcount c = 0;
00039 while (it != end) {
00040 if (need_comma)
00041 result += ", ";
00042 result += om_tostring(*it);
00043 need_comma = true;
00044 ++it;
00045 ++c;
00046 }
00047 if (count) {
00048 *count = c;
00049 }
00050 return result;
00051 }
00052
00053 string
00054 postlist_to_string(const Xapian::Database & db, const string & tname)
00055 {
00056 string result;
00057 bool need_comma = false;
00058
00059 for (Xapian::PostingIterator p = db.postlist_begin(tname);
00060 p != db.postlist_end(tname);
00061 ++p) {
00062 if (need_comma)
00063 result += ", ";
00064
00065 Xapian::PositionIterator it(p.positionlist_begin());
00066 string posrepr = positions_to_string(it, p.positionlist_end());
00067 if (!posrepr.empty()) {
00068 posrepr = ", pos=[" + posrepr + "]";
00069 }
00070
00071 result += "(" + om_tostring(*p) +
00072 ", doclen=" + om_tostring(p.get_doclength()) +
00073 ", wdf=" + om_tostring(p.get_wdf()) +
00074 posrepr + ")";
00075 need_comma = true;
00076 }
00077 return result;
00078 }
00079
00080 string
00081 docterms_to_string(const Xapian::Database & db, Xapian::docid did)
00082 {
00083 string result;
00084 bool need_comma = false;
00085
00086 for (Xapian::TermIterator t = db.termlist_begin(did);
00087 t != db.termlist_end(did);
00088 ++t) {
00089 Xapian::PositionIterator it(t.positionlist_begin());
00090 string posrepr = positions_to_string(it, t.positionlist_end());
00091 if (!posrepr.empty()) {
00092 posrepr = ", pos=[" + posrepr + "]";
00093 }
00094 if (need_comma)
00095 result += ", ";
00096 result += "Term(" + *t + ", wdf=" + om_tostring(t.get_wdf()) + posrepr + ")";
00097 need_comma = true;
00098 }
00099 return result;
00100 }
00101
00102 string
00103 docstats_to_string(const Xapian::Database & db, Xapian::docid did)
00104 {
00105 string result;
00106
00107 result += "len=" + om_tostring(db.get_doclength(did));
00108
00109 return result;
00110 }
00111
00112 string
00113 termstats_to_string(const Xapian::Database & db, const string & term)
00114 {
00115 string result;
00116
00117 result += "tf=" + om_tostring(db.get_termfreq(term));
00118 result += ",cf=" + om_tostring(db.get_collection_freq(term));
00119
00120 return result;
00121 }
00122
00123 string
00124 dbstats_to_string(const Xapian::Database & db)
00125 {
00126 string result;
00127
00128 result += "dc=" + om_tostring(db.get_doccount());
00129 result += ",al=" + om_tostring(db.get_avlength());
00130 result += ",ld=" + om_tostring(db.get_lastdocid());
00131
00132 return result;
00133 }
00134
00135 void
00136 dbcheck(const Xapian::Database & db,
00137 Xapian::doccount expected_doccount,
00138 Xapian::docid expected_lastdocid)
00139 {
00140 TEST_EQUAL(db.get_doccount(), expected_doccount);
00141 TEST_EQUAL(db.get_lastdocid(), expected_lastdocid);
00142
00143
00144
00145 unsigned long totlen = 0;
00146
00147
00148
00149
00150 map<string, string> posting_reprs;
00151
00152 for (Xapian::PostingIterator dociter = db.postlist_begin(string());
00153 dociter != db.postlist_end(string());
00154 ++dociter) {
00155 Xapian::docid did = *dociter;
00156 TEST_EQUAL(dociter.get_wdf(), 1);
00157 Xapian::Document doc(db.get_document(did));
00158 Xapian::termcount doclen(db.get_doclength(did));
00159 totlen += doclen;
00160
00161 Xapian::termcount found_termcount = 0;
00162 Xapian::termcount wdf_sum = 0;
00163 Xapian::TermIterator t, t2;
00164 for (t = doc.termlist_begin(), t2 = db.termlist_begin(did);
00165 t != doc.termlist_end();
00166 ++t, ++t2) {
00167 TEST(t2 != db.termlist_end(did));
00168
00169 ++found_termcount;
00170 wdf_sum += t.get_wdf();
00171
00172 TEST_EQUAL(*t, *t2);
00173 TEST_EQUAL(t.get_wdf(), t2.get_wdf());
00174 TEST_EQUAL(db.get_termfreq(*t), t.get_termfreq());
00175 TEST_EQUAL(db.get_termfreq(*t), t2.get_termfreq());
00176
00177
00178 Xapian::termcount tc1, tc2;
00179 Xapian::PositionIterator it1(t.positionlist_begin());
00180 string posrepr = positions_to_string(it1, t.positionlist_end(), &tc1);
00181 Xapian::PositionIterator it2(t2.positionlist_begin());
00182 string posrepr2 = positions_to_string(it2, t2.positionlist_end(), &tc2);
00183 TEST_EQUAL(posrepr, posrepr2);
00184 TEST_EQUAL(tc1, tc2);
00185 try {
00186 TEST_EQUAL(tc1, t.positionlist_count());
00187 } catch (const Xapian::UnimplementedError &) {
00188
00189 }
00190
00191
00192 if (!posrepr.empty()) {
00193 posrepr = ",[" + posrepr + "]";
00194 }
00195 string posting_repr = "(" + om_tostring(did) + "," +
00196 om_tostring(t.get_wdf()) + "/" + om_tostring(doclen) +
00197 posrepr + ")";
00198
00199
00200 map<string, string>::iterator i = posting_reprs.find(*t);
00201 if (i == posting_reprs.end()) {
00202 posting_reprs[*t] = posting_repr;
00203 } else {
00204 i->second += "," + posting_repr;
00205 }
00206 }
00207 TEST(t2 == db.termlist_end(did));
00208 Xapian::termcount expected_termcount = doc.termlist_count();
00209 TEST_EQUAL(expected_termcount, found_termcount);
00210 TEST_EQUAL(doclen, wdf_sum);
00211 }
00212
00213 Xapian::TermIterator t;
00214 map<string, string>::const_iterator i;
00215 for (t = db.allterms_begin(), i = posting_reprs.begin();
00216 t != db.allterms_end();
00217 ++t, ++i) {
00218 TEST(db.term_exists(*t));
00219 TEST(i != posting_reprs.end());
00220 TEST_EQUAL(i->first, *t);
00221
00222 Xapian::doccount tf_count = 0;
00223 Xapian::termcount cf_count = 0;
00224 string posting_repr;
00225 bool need_comma = false;
00226 for (Xapian::PostingIterator p = db.postlist_begin(*t);
00227 p != db.postlist_end(*t);
00228 ++p) {
00229 if (need_comma) {
00230 posting_repr += ",";
00231 }
00232
00233 ++tf_count;
00234 cf_count += p.get_wdf();
00235
00236 Xapian::PositionIterator it(p.positionlist_begin());
00237 string posrepr = positions_to_string(it, p.positionlist_end());
00238 if (!posrepr.empty()) {
00239 posrepr = ",[" + posrepr + "]";
00240 }
00241 posting_repr += "(" + om_tostring(*p) + "," +
00242 om_tostring(p.get_wdf()) + "/" + om_tostring(p.get_doclength()) +
00243 posrepr + ")";
00244 need_comma = true;
00245 }
00246
00247 TEST_EQUAL(posting_repr, i->second);
00248 TEST_EQUAL(tf_count, t.get_termfreq());
00249 TEST_EQUAL(tf_count, db.get_termfreq(*t));
00250 TEST_EQUAL(cf_count, db.get_collection_freq(*t));
00251 }
00252 TEST(i == posting_reprs.end());
00253
00254 if (expected_doccount == 0) {
00255 TEST_EQUAL(0, db.get_avlength());
00256 } else {
00257 TEST_EQUAL_DOUBLE(double(totlen) / expected_doccount,
00258 db.get_avlength());
00259 }
00260 }