00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "api_opsynonym.h"
00026
00027 #include <map>
00028 #include <set>
00029 #include <vector>
00030
00031 #include <xapian.h>
00032
00033 #include "backendmanager.h"
00034 #include "testsuite.h"
00035 #include "testutils.h"
00036
00037 #include "apitest.h"
00038
00039 using namespace std;
00040
00041
00042
00043
00044
00045 DEFINE_TESTCASE(synonym1, backend) {
00046 Xapian::Database db(get_database("etext"));
00047
00048 TEST_REL(db.get_doclength_upper_bound(), >, 0);
00049
00050 Xapian::doccount lots = 214;
00051
00052
00053
00054 vector<vector<Xapian::Query> > subqueries_list;
00055
00056
00057
00058
00059 vector<int> subqueries_sameweight_count;
00060 vector<int> subqueries_diffweight_count;
00061
00062 vector<Xapian::Query> subqueries;
00063 subqueries.push_back(Xapian::Query("date"));
00064 subqueries_list.push_back(subqueries);
00065
00066 subqueries_sameweight_count.push_back(33);
00067 subqueries_diffweight_count.push_back(0);
00068
00069
00070 subqueries.clear();
00071 subqueries.push_back(Xapian::Query("sky"));
00072 subqueries.push_back(Xapian::Query("date"));
00073 subqueries_list.push_back(subqueries);
00074
00075 subqueries_sameweight_count.push_back(0);
00076 subqueries_diffweight_count.push_back(34);
00077
00078
00079
00080 subqueries.clear();
00081 subqueries.push_back(Xapian::Query("gutenberg"));
00082 subqueries.push_back(Xapian::Query("blockhead"));
00083 subqueries_list.push_back(subqueries);
00084
00085 subqueries_sameweight_count.push_back(0);
00086 subqueries_diffweight_count.push_back(18);
00087
00088 subqueries.clear();
00089 subqueries.push_back(Xapian::Query("date"));
00090 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
00091 Xapian::Query("sky"),
00092 Xapian::Query("glove")));
00093 subqueries_list.push_back(subqueries);
00094
00095 subqueries_sameweight_count.push_back(0);
00096 subqueries_diffweight_count.push_back(34);
00097
00098 subqueries.clear();
00099 subqueries.push_back(Xapian::Query("date"));
00100 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
00101 Xapian::Query("sky"),
00102 Xapian::Query("date")));
00103 subqueries_list.push_back(subqueries);
00104
00105 subqueries_sameweight_count.push_back(0);
00106 subqueries_diffweight_count.push_back(34);
00107
00108 subqueries.clear();
00109 subqueries.push_back(Xapian::Query("date"));
00110 subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_MAYBE,
00111 Xapian::Query("sky"),
00112 Xapian::Query("date")));
00113 subqueries_list.push_back(subqueries);
00114
00115 subqueries_sameweight_count.push_back(0);
00116 subqueries_diffweight_count.push_back(34);
00117
00118 subqueries.clear();
00119 subqueries.push_back(Xapian::Query("date"));
00120 subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_NOT,
00121 Xapian::Query("sky"),
00122 Xapian::Query("date")));
00123 subqueries_list.push_back(subqueries);
00124
00125 subqueries_sameweight_count.push_back(0);
00126 subqueries_diffweight_count.push_back(34);
00127
00128 subqueries.clear();
00129 subqueries.push_back(Xapian::Query("date"));
00130 subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND,
00131 Xapian::Query("sky"),
00132 Xapian::Query("date")));
00133 subqueries_list.push_back(subqueries);
00134
00135
00136
00137
00138
00139 subqueries_sameweight_count.push_back(32);
00140 subqueries_diffweight_count.push_back(1);
00141
00142 subqueries.clear();
00143 subqueries.push_back(Xapian::Query("date"));
00144 subqueries.push_back(Xapian::Query(Xapian::Query::OP_XOR,
00145 Xapian::Query("sky"),
00146 Xapian::Query("date")));
00147 subqueries_list.push_back(subqueries);
00148
00149 subqueries_sameweight_count.push_back(0);
00150 subqueries_diffweight_count.push_back(34);
00151
00152 subqueries.clear();
00153 subqueries.push_back(Xapian::Query("date"));
00154 subqueries.push_back(Xapian::Query(Xapian::Query::OP_SYNONYM,
00155 Xapian::Query("sky"),
00156 Xapian::Query("date")));
00157 subqueries_list.push_back(subqueries);
00158
00159
00160
00161
00162
00163
00164
00165
00166 subqueries_sameweight_count.push_back(1);
00167 subqueries_diffweight_count.push_back(33);
00168
00169 subqueries.clear();
00170 subqueries.push_back(Xapian::Query("sky"));
00171 subqueries.push_back(Xapian::Query("date"));
00172 subqueries.push_back(Xapian::Query("stein"));
00173 subqueries.push_back(Xapian::Query("ally"));
00174 subqueries_list.push_back(subqueries);
00175
00176 subqueries_sameweight_count.push_back(0);
00177 subqueries_diffweight_count.push_back(35);
00178
00179 subqueries.clear();
00180 subqueries.push_back(Xapian::Query("attitud"));
00181 subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
00182 Xapian::Query("german"),
00183 Xapian::Query("adventur")));
00184 subqueries_list.push_back(subqueries);
00185
00186
00187
00188
00189
00190
00191 subqueries_sameweight_count.push_back(1);
00192 subqueries_diffweight_count.push_back(3);
00193
00194 subqueries.clear();
00195 subqueries.push_back(Xapian::Query("attitud"));
00196 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
00197 Xapian::Query("german"),
00198 Xapian::Query(Xapian::Query::OP_SYNONYM,
00199 Xapian::Query("sky"),
00200 Xapian::Query("date"))));
00201 subqueries_list.push_back(subqueries);
00202
00203 subqueries_sameweight_count.push_back(0);
00204 subqueries_diffweight_count.push_back(54);
00205
00206 for (vector<vector<Xapian::Query> >::size_type subqgroup = 0;
00207 subqgroup != subqueries_list.size(); ++subqgroup)
00208 {
00209 vector<Xapian::Query> * qlist = &(subqueries_list[subqgroup]);
00210
00211
00212 Xapian::Enquire enquire(db);
00213
00214
00215 Xapian::Query orquery(Xapian::Query::OP_OR, qlist->begin(), qlist->end());
00216 enquire.set_query(orquery);
00217 Xapian::MSet ormset = enquire.get_mset(0, lots);
00218
00219
00220 Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end());
00221 enquire.set_query(synquery);
00222 Xapian::MSet synmset = enquire.get_mset(0, lots);
00223
00224 tout << "Comparing " << orquery << " with " << synquery << '\n';
00225
00226
00227 TEST_NOT_EQUAL(synmset.size(), 0);
00228
00229 TEST_EQUAL(synmset.size(), ormset.size());
00230 map<Xapian::docid, Xapian::weight> values_or;
00231 map<Xapian::docid, Xapian::weight> values_synonym;
00232 for (Xapian::doccount i = 0; i < synmset.size(); ++i) {
00233 values_or[*ormset[i]] = ormset[i].get_weight();
00234 values_synonym[*synmset[i]] = synmset[i].get_weight();
00235 }
00236 TEST_EQUAL(values_or.size(), values_synonym.size());
00237
00238
00239
00240 int same_weight = 0;
00241 int different_weight = 0;
00242 for (map<Xapian::docid, Xapian::weight>::const_iterator
00243 j = values_or.begin(); j != values_or.end(); ++j) {
00244 Xapian::docid did = j->first;
00245
00246
00247 TEST(values_synonym.find(did) != values_synonym.end());
00248 if (values_or[did] == values_synonym[did]) {
00249 ++same_weight;
00250 } else {
00251 ++different_weight;
00252 }
00253 }
00254
00255 int expected_same = subqueries_sameweight_count[subqgroup];
00256 int expected_diff = subqueries_diffweight_count[subqgroup];
00257
00258 TEST_EQUAL(different_weight, expected_diff);
00259 TEST_EQUAL(same_weight, expected_same);
00260
00261
00262
00263
00264 Xapian::MSet mset_top = enquire.get_mset(0, 1);
00265 TEST_EQUAL(mset_top.size(), 1);
00266 TEST(mset_range_is_same(mset_top, 0, synmset, 0, 1));
00267 }
00268 return true;
00269 }
00270
00271
00272 DEFINE_TESTCASE(synonym2, backend) {
00273 Xapian::Query query;
00274 vector<Xapian::Query> subqueries;
00275 subqueries.push_back(Xapian::Query("file"));
00276 subqueries.push_back(Xapian::Query("the"));
00277 subqueries.push_back(Xapian::Query("next"));
00278 subqueries.push_back(Xapian::Query("reader"));
00279 query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end());
00280 subqueries.clear();
00281 subqueries.push_back(query);
00282 subqueries.push_back(Xapian::Query("gutenberg"));
00283 query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end());
00284
00285 tout << query << '\n';
00286
00287 Xapian::Database db(get_database("etext"));
00288 Xapian::Enquire enquire(db);
00289 enquire.set_query(query);
00290 Xapian::MSet mset = enquire.get_mset(0, 10);
00291 tout << mset << '\n';
00292
00293
00294 double maxposs = mset.get_max_possible();
00295 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0);
00296 enquire.set_query(query);
00297 mset = enquire.get_mset(0, 10);
00298 double maxposs2 = mset.get_max_possible();
00299
00300 TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2);
00301
00302 return true;
00303 }
00304
00305 static void
00306 check_msets_contain_same_docs(const Xapian::MSet & mset1,
00307 const Xapian::MSet & mset2)
00308 {
00309 TEST_EQUAL(mset1.size(), mset2.size());
00310
00311 set<Xapian::docid> docids;
00312 for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
00313 docids.insert(*mset1[i]);
00314 }
00315
00316
00317 for (Xapian::doccount j = 0; j < mset2.size(); ++j) {
00318
00319
00320
00321 TEST(docids.erase(*mset2[j]));
00322 }
00323 }
00324
00325
00326 DEFINE_TESTCASE(synonym3, backend) {
00327 Xapian::Query query = Xapian::Query(Xapian::Query::OP_SYNONYM,
00328 Xapian::Query("sky"),
00329 Xapian::Query("date"));
00330
00331 Xapian::Database db(get_database("etext"));
00332 Xapian::Enquire enquire(db);
00333 enquire.set_query(query);
00334 Xapian::MSet mset_orig = enquire.get_mset(0, db.get_doccount());
00335
00336 tout << query << '\n';
00337 tout << mset_orig << '\n';
00338
00339
00340
00341 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 0.0);
00342 enquire.set_query(query);
00343 Xapian::MSet mset_zero = enquire.get_mset(0, db.get_doccount());
00344
00345 tout << query << '\n';
00346 tout << mset_zero << '\n';
00347
00348
00349 TEST_NOT_EQUAL(mset_zero.size(), 0);
00350
00351
00352 check_msets_contain_same_docs(mset_orig, mset_zero);
00353 for (Xapian::doccount i = 0; i < mset_orig.size(); ++i) {
00354 TEST_NOT_EQUAL(mset_orig[i].get_weight(), 0.0);
00355 TEST_EQUAL(mset_zero[i].get_weight(), 0.0);
00356 }
00357
00358 return true;
00359 }
00360
00361
00362 DEFINE_TESTCASE(synonym4, backend) {
00363 Xapian::Database db(get_database("etext"));
00364 Xapian::Enquire enquire(db);
00365 Xapian::Query syn_query = Xapian::Query(Xapian::Query::OP_SYNONYM,
00366 Xapian::Query("gutenberg"),
00367 Xapian::Query("blockhead"));
00368 Xapian::Query or_query = Xapian::Query(Xapian::Query::OP_OR,
00369 Xapian::Query("gutenberg"),
00370 Xapian::Query("blockhead"));
00371 Xapian::Query date_query = Xapian::Query("date");
00372
00373
00374 static const Xapian::Query::op operators[] = {
00375 Xapian::Query::OP_AND_MAYBE,
00376 Xapian::Query::OP_AND_NOT,
00377 Xapian::Query::OP_AND,
00378 Xapian::Query::OP_XOR,
00379 Xapian::Query::OP_OR,
00380 Xapian::Query::OP_SYNONYM
00381 };
00382 const Xapian::Query::op * end;
00383 end = operators + sizeof(operators) / sizeof(operators[0]);
00384 for (const Xapian::Query::op * i = operators; i != end; ++i) {
00385 tout.str(string());
00386 Xapian::Query query1(*i, syn_query, date_query);
00387 Xapian::Query query2(*i, or_query, date_query);
00388
00389 enquire.set_query(query1);
00390 tout << "query1:" << query1 << '\n';
00391 Xapian::MSet mset1 = enquire.get_mset(0, db.get_doccount());
00392 tout << "mset1:" << mset1 << '\n';
00393 enquire.set_query(query2);
00394 tout << "query2:" << query2 << '\n';
00395 Xapian::MSet mset2 = enquire.get_mset(0, db.get_doccount());
00396 tout << "mset2:" << mset2 << '\n';
00397
00398 TEST_NOT_EQUAL(mset1.size(), 0);
00399 if (*i != Xapian::Query::OP_XOR) {
00400 TEST_EQUAL(mset1[0].get_percent(), 100);
00401 } else {
00402 TEST(mset1[0].get_percent() != 100);
00403 }
00404 check_msets_contain_same_docs(mset1, mset2);
00405 }
00406
00407 return true;
00408 }