00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <config.h>
00023
00024 #include "dbcheck.h"
00025
00026 #include "str.h"
00027 #include "testsuite.h"
00028
00029 using namespace std;
00030
00031 string
00032 positions_to_string(Xapian::PositionIterator & it,
00033 const Xapian::PositionIterator & end,
00034 Xapian::termcount * count)
00035 {
00036 string result;
00037 bool need_comma = false;
00038 Xapian::termcount c = 0;
00039 while (it != end) {
00040 if (need_comma)
00041 result += ", ";
00042 result += str(*it);
00043 need_comma = true;
00044 ++it;
00045 ++c;
00046 }
00047 if (count) {
00048 *count = c;
00049 }
00050 return result;
00051 }
00052
00053 string
00054 postlist_to_string(const Xapian::Database & db, const string & tname)
00055 {
00056 string result;
00057 bool need_comma = false;
00058
00059 for (Xapian::PostingIterator p = db.postlist_begin(tname);
00060 p != db.postlist_end(tname);
00061 ++p) {
00062 if (need_comma)
00063 result += ", ";
00064
00065 Xapian::PositionIterator it(p.positionlist_begin());
00066 string posrepr = positions_to_string(it, p.positionlist_end());
00067 if (!posrepr.empty()) {
00068 posrepr = ", pos=[" + posrepr + "]";
00069 }
00070
00071 result += "(" + str(*p) +
00072 ", doclen=" + str(p.get_doclength()) +
00073 ", wdf=" + str(p.get_wdf()) +
00074 posrepr + ")";
00075 need_comma = true;
00076 }
00077 return result;
00078 }
00079
00080 string
00081 docterms_to_string(const Xapian::Database & db, Xapian::docid did)
00082 {
00083 string result;
00084 bool need_comma = false;
00085
00086 for (Xapian::TermIterator t = db.termlist_begin(did);
00087 t != db.termlist_end(did);
00088 ++t) {
00089 Xapian::PositionIterator it(t.positionlist_begin());
00090 string posrepr = positions_to_string(it, t.positionlist_end());
00091 if (!posrepr.empty()) {
00092 posrepr = ", pos=[" + posrepr + "]";
00093 }
00094 if (need_comma)
00095 result += ", ";
00096 result += "Term(" + *t + ", wdf=" + str(t.get_wdf()) + posrepr + ")";
00097 need_comma = true;
00098 }
00099 return result;
00100 }
00101
00102 string
00103 docstats_to_string(const Xapian::Database & db, Xapian::docid did)
00104 {
00105 string result;
00106
00107 result += "len=" + str(db.get_doclength(did));
00108
00109 return result;
00110 }
00111
00112 string
00113 termstats_to_string(const Xapian::Database & db, const string & term)
00114 {
00115 string result;
00116
00117 result += "tf=" + str(db.get_termfreq(term));
00118 result += ",cf=" + str(db.get_collection_freq(term));
00119
00120 return result;
00121 }
00122
00123 string
00124 dbstats_to_string(const Xapian::Database & db)
00125 {
00126 string result;
00127
00128 result += "dc=" + str(db.get_doccount());
00129 result += ",al=" + str(db.get_avlength());
00130 result += ",ld=" + str(db.get_lastdocid());
00131
00132 return result;
00133 }
00134
00135 void
00136 dbcheck(const Xapian::Database & db,
00137 Xapian::doccount expected_doccount,
00138 Xapian::docid expected_lastdocid)
00139 {
00140 TEST_EQUAL(db.get_doccount(), expected_doccount);
00141 TEST_EQUAL(db.get_lastdocid(), expected_lastdocid);
00142
00143
00144
00145 unsigned long totlen = 0;
00146
00147
00148
00149
00150 map<string, string> posting_reprs;
00151 map<Xapian::valueno, string> value_reprs;
00152
00153 Xapian::termcount doclen_lower_bound = Xapian::termcount(-1);
00154 Xapian::termcount doclen_upper_bound = 0;
00155
00156 for (Xapian::PostingIterator dociter = db.postlist_begin(string());
00157 dociter != db.postlist_end(string());
00158 ++dociter) {
00159 Xapian::docid did = *dociter;
00160 TEST_EQUAL(dociter.get_wdf(), 1);
00161 Xapian::Document doc(db.get_document(did));
00162 Xapian::termcount doclen(db.get_doclength(did));
00163 if (doclen < doclen_lower_bound)
00164 doclen_lower_bound = doclen;
00165 if (doclen > doclen_upper_bound)
00166 doclen_upper_bound = doclen;
00167 totlen += doclen;
00168
00169 Xapian::termcount found_termcount = 0;
00170 Xapian::termcount wdf_sum = 0;
00171 Xapian::TermIterator t, t2;
00172 for (t = doc.termlist_begin(), t2 = db.termlist_begin(did);
00173 t != doc.termlist_end();
00174 ++t, ++t2) {
00175 TEST(t2 != db.termlist_end(did));
00176
00177 ++found_termcount;
00178 wdf_sum += t.get_wdf();
00179
00180 TEST_EQUAL(*t, *t2);
00181 TEST_EQUAL(t.get_wdf(), t2.get_wdf());
00182 TEST_EQUAL(db.get_termfreq(*t), t.get_termfreq());
00183 TEST_EQUAL(db.get_termfreq(*t), t2.get_termfreq());
00184
00185
00186 Xapian::termcount tc1, tc2;
00187 Xapian::PositionIterator it1(t.positionlist_begin());
00188 string posrepr = positions_to_string(it1, t.positionlist_end(), &tc1);
00189 Xapian::PositionIterator it2(t2.positionlist_begin());
00190 string posrepr2 = positions_to_string(it2, t2.positionlist_end(), &tc2);
00191 TEST_EQUAL(posrepr, posrepr2);
00192 TEST_EQUAL(tc1, tc2);
00193 try {
00194 TEST_EQUAL(tc1, t.positionlist_count());
00195 } catch (const Xapian::UnimplementedError &) {
00196
00197 }
00198
00199
00200 if (!posrepr.empty()) {
00201 posrepr = ",[" + posrepr + "]";
00202 }
00203 string posting_repr = "(" + str(did) + "," +
00204 str(t.get_wdf()) + "/" + str(doclen) +
00205 posrepr + ")";
00206
00207
00208 map<string, string>::iterator i = posting_reprs.find(*t);
00209 if (i == posting_reprs.end()) {
00210 posting_reprs[*t] = posting_repr;
00211 } else {
00212 i->second += "," + posting_repr;
00213 }
00214 }
00215
00216 Xapian::termcount vcount = 0;
00217 for (Xapian::ValueIterator v = doc.values_begin();
00218 v != doc.values_end();
00219 ++v, ++vcount) {
00220 TEST((*v).size() != 0);
00221 string value_repr = "(" + str(did) + "," + *v + ")";
00222
00223
00224 map<Xapian::valueno, string>::iterator i;
00225 i = value_reprs.find(v.get_valueno());
00226 if (i == value_reprs.end()) {
00227 value_reprs[v.get_valueno()] = value_repr;
00228 } else {
00229 i->second += "," + value_repr;
00230 }
00231 }
00232 TEST_EQUAL(vcount, doc.values_count());
00233 TEST(t2 == db.termlist_end(did));
00234 Xapian::termcount expected_termcount = doc.termlist_count();
00235 TEST_EQUAL(expected_termcount, found_termcount);
00236 TEST_EQUAL(doclen, wdf_sum);
00237 }
00238
00239 TEST_REL(doclen_lower_bound, >=, db.get_doclength_lower_bound());
00240 TEST_REL(doclen_upper_bound, <=, db.get_doclength_upper_bound());
00241
00242 Xapian::TermIterator t;
00243 map<string, string>::const_iterator i;
00244 for (t = db.allterms_begin(), i = posting_reprs.begin();
00245 t != db.allterms_end();
00246 ++t, ++i) {
00247 TEST(db.term_exists(*t));
00248 TEST(i != posting_reprs.end());
00249 TEST_EQUAL(i->first, *t);
00250
00251 Xapian::doccount tf_count = 0;
00252 Xapian::termcount cf_count = 0;
00253 Xapian::termcount wdf_upper_bound = 0;
00254 string posting_repr;
00255 bool need_comma = false;
00256 for (Xapian::PostingIterator p = db.postlist_begin(*t);
00257 p != db.postlist_end(*t);
00258 ++p) {
00259 if (need_comma) {
00260 posting_repr += ",";
00261 }
00262
00263 ++tf_count;
00264 cf_count += p.get_wdf();
00265
00266 Xapian::PositionIterator it(p.positionlist_begin());
00267 string posrepr = positions_to_string(it, p.positionlist_end());
00268 if (!posrepr.empty()) {
00269 posrepr = ",[" + posrepr + "]";
00270 }
00271 posting_repr += "(" + str(*p) + "," +
00272 str(p.get_wdf()) + "/" + str(p.get_doclength()) +
00273 posrepr + ")";
00274 if (wdf_upper_bound < p.get_wdf())
00275 wdf_upper_bound = p.get_wdf();
00276 need_comma = true;
00277 }
00278
00279 TEST_EQUAL(posting_repr, i->second);
00280 TEST_EQUAL(tf_count, t.get_termfreq());
00281 TEST_EQUAL(tf_count, db.get_termfreq(*t));
00282 TEST_EQUAL(cf_count, db.get_collection_freq(*t));
00283 TEST_REL(wdf_upper_bound, <=, db.get_wdf_upper_bound(*t));
00284 }
00285 TEST(i == posting_reprs.end());
00286
00287 map<Xapian::valueno, string>::const_iterator j;
00288 for (j = value_reprs.begin(); j != value_reprs.end(); ++j) {
00289 string value_repr;
00290 string value_lower_bound;
00291 string value_upper_bound;
00292 bool first = true;
00293 for (Xapian::ValueIterator v = db.valuestream_begin(j->first);
00294 v != db.valuestream_end(j->first); ++v) {
00295 if (first) {
00296 value_lower_bound = *v;
00297 value_upper_bound = *v;
00298 first = false;
00299 } else {
00300 value_repr += ",";
00301 if (*v > value_upper_bound) {
00302 value_upper_bound = *v;
00303 }
00304 if (*v < value_lower_bound) {
00305 value_lower_bound = *v;
00306 }
00307 }
00308 value_repr += "(" + str(v.get_docid()) + "," + *v + ")";
00309 }
00310 TEST_EQUAL(value_repr, j->second);
00311 try {
00312 TEST_REL(value_upper_bound, <=, db.get_value_upper_bound(j->first));
00313 TEST_REL(value_lower_bound, >=, db.get_value_lower_bound(j->first));
00314 } catch (const Xapian::UnimplementedError &) {
00315
00316
00317 }
00318 }
00319
00320 if (expected_doccount == 0) {
00321 TEST_EQUAL(0, db.get_avlength());
00322 } else {
00323 TEST_EQUAL_DOUBLE(double(totlen) / expected_doccount,
00324 db.get_avlength());
00325 }
00326 }