xapian-core  1.4.25
api_weight.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2012,2013,2016,2017,2019 Olly Betts
5  * Copyright (C) 2013 Aarsh Shah
6  * Copyright (C) 2016 Vivek Pal
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_weight.h"
26 #include <cmath>
27 
28 #include <xapian.h>
29 
30 #include "apitest.h"
31 #include "testutils.h"
32 
33 using namespace std;
34 
35 // Test exception for junk after serialised weight.
36 DEFINE_TESTCASE(tradweight3, !backend) {
37  Xapian::TradWeight wt(42);
38  try {
40  Xapian::TradWeight * t2 = t.unserialise(wt.serialise() + "X");
41  // Make sure we actually use the weight.
42  bool empty = t2->name().empty();
43  delete t2;
44  if (empty)
45  FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
46  FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
47  } catch (const Xapian::SerialisationError &e) {
48  // Regression test for error in exception message fixed in 1.2.11 and
49  // 1.3.1.
50  TEST(e.get_msg().find("BM25") == string::npos);
51  TEST(e.get_msg().find("Trad") != string::npos);
52  }
53 }
54 
55 // Test Exception for junk after serialised weight.
56 DEFINE_TESTCASE(unigramlmweight3, !backend) {
58  try {
60  Xapian::LMWeight * t2 = t.unserialise(wt.serialise() + "X");
61  // Make sure we actually use the weight.
62  bool empty = t2->name().empty();
63  delete t2;
64  if (empty)
65  FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
66  FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
67  } catch (const Xapian::SerialisationError &e) {
68  TEST(e.get_msg().find("LM") != string::npos);
69  }
70 }
71 
72 // Test exception for junk after serialised weight.
73 DEFINE_TESTCASE(bm25weight3, !backend) {
74  Xapian::BM25Weight wt(2.0, 0.5, 1.3, 0.6, 0.01);
75  try {
77  Xapian::BM25Weight * b2 = b.unserialise(wt.serialise() + "X");
78  // Make sure we actually use the weight.
79  bool empty = b2->name().empty();
80  delete b2;
81  if (empty)
82  FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
83  FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
84  } catch (const Xapian::SerialisationError &e) {
85  TEST(e.get_msg().find("BM25") != string::npos);
86  }
87 }
88 
89 // Test parameter combinations which should be unaffected by doclength.
90 DEFINE_TESTCASE(bm25weight4, backend) {
91  Xapian::Database db = get_database("apitest_simpledata");
92  Xapian::Enquire enquire(db);
93  enquire.set_query(Xapian::Query("paragraph"));
94  Xapian::MSet mset;
95 
96  enquire.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
97  mset = enquire.get_mset(0, 10);
98  TEST_EQUAL(mset.size(), 5);
99  // Expect: wdf has an effect on weight, but doclen doesn't.
100  TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
101  TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
102  TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
103  TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
104 
105  enquire.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
106  mset = enquire.get_mset(0, 10);
107  TEST_EQUAL(mset.size(), 5);
108  // Expect: neither wdf nor doclen affects weight.
109  TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
110 }
111 
113 // Regression test for bug fixed in 1.2.17 and 1.3.2.
114 DEFINE_TESTCASE(bm25weight5, backend) {
115  Xapian::Database db = get_database("apitest_simpledata");
116  Xapian::Enquire enquire(db);
117  enquire.set_query(Xapian::Query("paragraph"));
118  Xapian::MSet mset;
119 
120  enquire.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
121  mset = enquire.get_mset(0, 10);
122  TEST_EQUAL(mset.size(), 5);
123  // Expect: wdf has no effect on weight; shorter docs rank higher.
124  mset_expect_order(mset, 3, 5, 1, 4, 2);
125  TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[1].get_weight());
126  TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
127  TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
128  TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
129 }
130 
131 // Test exception for junk after serialised weight.
132 DEFINE_TESTCASE(bm25plusweight1, !backend) {
133  Xapian::BM25PlusWeight wt(2.0, 0.1, 1.3, 0.6, 0.01, 0.5);
134  try {
136  Xapian::BM25PlusWeight * b2 = b.unserialise(wt.serialise() + "X");
137  // Make sure we actually use the weight.
138  bool empty = b2->name().empty();
139  delete b2;
140  if (empty)
141  FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised to empty name!");
142  FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised OK");
143  } catch (const Xapian::SerialisationError &e) {
144  TEST(e.get_msg().find("BM25Plus") != string::npos);
145  }
146 }
147 
148 // Test parameter combinations which should be unaffected by doclength.
149 DEFINE_TESTCASE(bm25plusweight2, backend) {
150  Xapian::Database db = get_database("apitest_simpledata");
151  Xapian::Enquire enquire(db);
152  enquire.set_query(Xapian::Query("paragraph"));
153  Xapian::MSet mset;
154 
155  enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0, 0.5, 1));
156  mset = enquire.get_mset(0, 10);
157  TEST_EQUAL(mset.size(), 5);
158  // Expect: wdf has an effect on weight, but doclen doesn't.
159  TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
160  TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
161  TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
162  TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
163 
164  enquire.set_weighting_scheme(Xapian::BM25PlusWeight(0, 0, 1, 1, 0.5, 1));
165  mset = enquire.get_mset(0, 10);
166  TEST_EQUAL(mset.size(), 5);
167  // Expect: neither wdf nor doclen affects weight.
168  TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
169 }
170 
171 // Regression test for a mistake corrected in the BM25+ implementation.
172 DEFINE_TESTCASE(bm25plusweight3, backend) {
173  Xapian::Database db = get_database("apitest_simpledata");
174  Xapian::Enquire enquire(db);
175  enquire.set_query(Xapian::Query("paragraph"));
176  Xapian::MSet mset;
177 
178  enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0.5, 0.5, 1));
179  mset = enquire.get_mset(0, 10);
180  TEST_EQUAL(mset.size(), 5);
181 
182  // The value of each doc weight calculated manually from the BM25+ formulae
183  // by using the respective document statistics.
184  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 0.7920796567487473);
185  TEST_EQUAL_DOUBLE(mset[1].get_weight(), 0.7846980783848447);
186  TEST_EQUAL_DOUBLE(mset[2].get_weight(), 0.7558817623365934);
187  TEST_EQUAL_DOUBLE(mset[3].get_weight(), 0.7210119356168847);
188  TEST_EQUAL_DOUBLE(mset[4].get_weight(), 0.7210119356168847);
189 }
190 
191 // Test exception for junk after serialised weight.
192 DEFINE_TESTCASE(inl2weight1, !backend) {
193  Xapian::InL2Weight wt(2.0);
194  try {
196  Xapian::InL2Weight * b2 = b.unserialise(wt.serialise() + "X");
197  // Make sure we actually use the weight.
198  bool empty = b2->name().empty();
199  delete b2;
200  if (empty)
201  FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
202  FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
203  } catch (const Xapian::SerialisationError &e) {
204  TEST(e.get_msg().find("InL2") != string::npos);
205  }
206 }
207 
208 // Test for invalid values of c.
209 DEFINE_TESTCASE(inl2weight2, !backend) {
210  // InvalidArgumentError should be thrown if the parameter c is invalid.
212  Xapian::InL2Weight wt(-2.0));
213 
215  Xapian::InL2Weight wt2(0.0));
216 
217  /* Parameter c should be set to 1.0 by constructor if none is given. */
218  Xapian::InL2Weight weight2;
220 }
221 
222 // Feature tests for Inl2Weight
223 DEFINE_TESTCASE(inl2weight3, backend) {
224  Xapian::Database db = get_database("apitest_simpledata");
225  Xapian::Enquire enquire(db);
226  Xapian::Query query("banana");
227 
228  enquire.set_query(query);
230 
231  Xapian::MSet mset1;
232  mset1 = enquire.get_mset(0, 10);
233  TEST_EQUAL(mset1.size(), 1);
234  mset_expect_order(mset1, 6);
235 
236  /* The value has been calculated in the python interpreter by looking at the
237  * database statistics. */
238  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.559711143842063);
239 
240  // Test with OP_SCALE_WEIGHT.
243 
244  Xapian::MSet mset2;
245  mset2 = enquire.get_mset(0, 10);
246  TEST_EQUAL(mset2.size(), 1);
247  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
248  TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
249 }
250 
251 // Test exception for junk after serialised weight.
252 DEFINE_TESTCASE(ifb2weight1, !backend) {
253  Xapian::IfB2Weight wt(2.0);
254  try {
256  Xapian::IfB2Weight * b2 = b.unserialise(wt.serialise() + "X");
257  // Make sure we actually use the weight.
258  bool empty = b2->name().empty();
259  delete b2;
260  if (empty)
261  FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
262  FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
263  } catch (const Xapian::SerialisationError &e) {
264  TEST(e.get_msg().find("IfB2") != string::npos);
265  }
266 }
267 
268 // Test for invalid values of c.
269 DEFINE_TESTCASE(ifb2weight2, !backend) {
270  // InvalidArgumentError should be thrown if the parameter c is invalid.
272  Xapian::IfB2Weight wt(-2.0));
273 
275  Xapian::IfB2Weight wt2(0.0));
276 
277  /* Parameter c should be set to 1.0 by constructor if none is given. */
278  Xapian::IfB2Weight weight2;
280 }
281 
282 // Feature test
283 DEFINE_TESTCASE(ifb2weight3, backend) {
284  Xapian::Database db = get_database("apitest_simpledata");
285  Xapian::Enquire enquire(db);
286  Xapian::Query query("banana");
287 
288  enquire.set_query(query);
290 
291  Xapian::MSet mset1;
292  mset1 = enquire.get_mset(0, 10);
293  TEST_EQUAL(mset1.size(), 1);
294 
295  /* The value of the weight has been manually calculated using the statistics
296  * of the test database. */
297  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.119422287684126);
298 
299  // Test with OP_SCALE_WEIGHT.
302 
303  Xapian::MSet mset2;
304  mset2 = enquire.get_mset(0, 10);
305  TEST_EQUAL(mset2.size(), 1);
306  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
307  TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
308 }
309 
310 // Test exception for junk after serialised weight.
311 DEFINE_TESTCASE(ineb2weight1, !backend) {
312  Xapian::IneB2Weight wt(2.0);
313  try {
315  Xapian::IneB2Weight * b2 = b.unserialise(wt.serialise() + "X");
316  // Make sure we actually use the weight.
317  bool empty = b2->name().empty();
318  delete b2;
319  if (empty)
320  FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
321  FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
322  } catch (const Xapian::SerialisationError &e) {
323  TEST(e.get_msg().find("IneB2") != string::npos);
324  }
325 }
326 
327 // Test for invalid values of c.
328 DEFINE_TESTCASE(ineb2weight2, !backend) {
329  // InvalidArgumentError should be thrown if parameter c is invalid.
331  Xapian::IneB2Weight wt(-2.0));
332 
334  Xapian::IneB2Weight wt2(0.0));
335 
336  /* Parameter c should be set to 1.0 by constructor if none is given. */
337  Xapian::IneB2Weight weight2;
339 }
340 
341 // Feature test.
342 DEFINE_TESTCASE(ineb2weight3, backend) {
343  Xapian::Database db = get_database("apitest_simpledata");
344  Xapian::Enquire enquire(db);
345  Xapian::Query query("paragraph");
346  enquire.set_query(query);
348 
349  Xapian::MSet mset1;
350  mset1 = enquire.get_mset(0, 10);
351  TEST_EQUAL(mset1.size(), 5);
352 
353  // The third document in the database is 4th in the ranking.
354  /* The weight value has been manually calculated by using the statistics
355  * of the test database. */
356  TEST_EQUAL_DOUBLE(mset1[4].get_weight(), 0.61709730297692400036);
357 
358  // Test with OP_SCALE_WEIGHT.
361 
362  Xapian::MSet mset2;
363  mset2 = enquire.get_mset(0, 10);
364  TEST_EQUAL(mset2.size(), 5);
365 
366  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
367  for (int i = 0; i < 5; ++i) {
368  TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
369  }
370 }
371 
372 // Test exception for junk after serialised weight.
373 DEFINE_TESTCASE(bb2weight1, !backend) {
374  Xapian::BB2Weight wt(2.0);
375  try {
377  Xapian::BB2Weight * b2 = b.unserialise(wt.serialise() + "X");
378  // Make sure we actually use the weight.
379  bool empty = b2->name().empty();
380  delete b2;
381  if (empty)
382  FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
383  FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
384  } catch (const Xapian::SerialisationError &e) {
385  TEST(e.get_msg().find("BB2") != string::npos);
386  }
387 }
388 
389 // Test for invalid values of c.
390 DEFINE_TESTCASE(bb2weight2, !backend) {
391  // InvalidArgumentError should be thrown if the parameter c is invalid.
393  Xapian::BB2Weight wt(-2.0));
394 
396  Xapian::BB2Weight wt2(0.0));
397 
398  /* Parameter c should be set to 1.0 by constructor if none is given. */
399  Xapian::BB2Weight weight2;
400  TEST_EQUAL(weight2.serialise(), Xapian::BB2Weight(1.0).serialise());
401 }
402 
403 // Feature test
404 DEFINE_TESTCASE(bb2weight3, backend) {
405  Xapian::Database db = get_database("apitest_simpledata");
406  Xapian::Enquire enquire(db);
407  Xapian::Query query("paragraph");
408 
409  enquire.set_query(query);
411 
412  Xapian::MSet mset1;
413  mset1 = enquire.get_mset(0, 10);
414  TEST_EQUAL(mset1.size(), 5);
415  /* The third document in the database has the highest weight and is the
416  * first in the mset. */
417  // Value calculated manually by using the statistics of the test database.
418  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.6823696969784483);
419 
420  // Test with OP_SCALE_WEIGHT.
423 
424  Xapian::MSet mset2;
425  mset2 = enquire.get_mset(0, 10);
426  TEST_EQUAL(mset2.size(), 5);
427 
428  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
429  for (int i = 0; i < 5; ++i) {
430  TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
431  }
432 
433  // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
434  // were applying the factor to the upper bound twice).
435  enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 1.0 / 1024));
437 
438  Xapian::MSet mset3;
439  mset3 = enquire.get_mset(0, 10);
440  TEST_EQUAL(mset3.size(), 5);
441 
442  for (int i = 0; i < 5; ++i) {
443  TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight() * 1024);
444  }
445 }
446 
447 // Regression test: we used to calculate log2(0) when there was only one doc.
448 DEFINE_TESTCASE(bb2weight4, backend) {
449  Xapian::Database db = get_database("apitest_onedoc");
450  Xapian::Enquire enquire(db);
451  Xapian::Query query("word");
452 
453  enquire.set_query(query);
455 
456  Xapian::MSet mset1;
457  mset1 = enquire.get_mset(0, 10);
458  TEST_EQUAL(mset1.size(), 1);
459  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.431020621347435);
460 }
461 
462 // Feature test.
463 DEFINE_TESTCASE(dlhweight1, backend) {
464  Xapian::Database db = get_database("apitest_simpledata");
465  Xapian::Enquire enquire(db);
466  Xapian::Query query("a");
467 
468  enquire.set_query(query);
470 
471  Xapian::MSet mset1;
472  mset1 = enquire.get_mset(0, 10);
473  TEST_EQUAL(mset1.size(), 3);
474  mset_expect_order(mset1, 3, 1, 2);
475  // Weights calculated manually using stats from the database.
476  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.0046477754371292362);
477  TEST_EQUAL_DOUBLE(mset1[1].get_weight(), 0.97621929514640352757);
478  // The following weight would be negative but gets clamped to 0.
479  TEST_EQUAL_DOUBLE(mset1[2].get_weight(), 0.0);
480 
481  // Test with OP_SCALE_WEIGHT.
484 
485  Xapian::MSet mset2;
486  mset2 = enquire.get_mset(0, 10);
487  TEST_EQUAL(mset2.size(), 3);
488 
489  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
490  for (Xapian::doccount i = 0; i < mset2.size(); ++i) {
491  TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
492  }
493 }
494 
495 // Test exception for junk after serialised weight.
496 DEFINE_TESTCASE(dlhweight2, !backend) {
498  try {
500  Xapian::DLHWeight * t2 = t.unserialise(wt.serialise() + "X");
501  // Make sure we actually use the weight.
502  bool empty = t2->name().empty();
503  delete t2;
504  if (empty)
505  FAIL_TEST("Serialised DLHWeight with junk appended unserialised to empty name!");
506  FAIL_TEST("Serialised DLHWeight with junk appended unserialised OK");
507  } catch (const Xapian::SerialisationError &e) {
508  TEST(e.get_msg().find("DLH") != string::npos);
509  }
510 }
511 
512 static void
514 {
515  Xapian::Document doc;
516  doc.add_term("solo", 37);
517  db.add_document(doc);
518 }
519 
520 // Test wdf == doclen.
521 DEFINE_TESTCASE(dlhweight3, backend) {
522  Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
523  Xapian::Enquire enquire(db);
524  Xapian::Query query("solo");
525 
526  enquire.set_query(query);
528 
529  Xapian::MSet mset1;
530  mset1 = enquire.get_mset(0, 10);
531  TEST_EQUAL(mset1.size(), 1);
532  // Weight gets clamped to zero.
533  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
534 }
535 
536 // Test exception for junk after serialised weight.
537 DEFINE_TESTCASE(pl2weight1, !backend) {
538  Xapian::PL2Weight wt(2.0);
539  try {
541  Xapian::PL2Weight * b2 = b.unserialise(wt.serialise() + "X");
542  // Make sure we actually use the weight.
543  bool empty = b2->name().empty();
544  delete b2;
545  if (empty)
546  FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
547  FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
548  } catch (const Xapian::SerialisationError &e) {
549  TEST(e.get_msg().find("PL2") != string::npos);
550  }
551 }
552 
553 // Test for invalid values of c.
554 DEFINE_TESTCASE(pl2weight2, !backend) {
555  // InvalidArgumentError should be thrown if parameter c is invalid.
557  Xapian::PL2Weight wt(-2.0));
558 
559  /* Parameter c should be set to 1.0 by constructor if none is given. */
560  Xapian::PL2Weight weight2;
561  TEST_EQUAL(weight2.serialise(), Xapian::PL2Weight(1.0).serialise());
562 }
563 
564 // Feature Test.
565 DEFINE_TESTCASE(pl2weight3, backend) {
566  Xapian::Database db = get_database("apitest_simpledata");
567  Xapian::Enquire enquire(db);
568  Xapian::Query query("paragraph");
569  enquire.set_query(query);
570  Xapian::MSet mset;
571 
573  mset = enquire.get_mset(0, 10);
574  TEST_EQUAL(mset.size(), 5);
575  // Expected weight difference calculated in extended precision using stats
576  // from the test database.
577  TEST_EQUAL_DOUBLE(mset[2].get_weight(),
578  mset[3].get_weight() + 0.0086861771701328694);
579 
580  // Test with OP_SCALE_WEIGHT.
583 
584  Xapian::MSet mset2;
585  mset2 = enquire.get_mset(0, 10);
586  TEST_EQUAL(mset2.size(), 5);
587  TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
588  for (int i = 0; i < 5; ++i) {
589  TEST_EQUAL_DOUBLE(15.0 * mset[i].get_weight(), mset2[i].get_weight());
590  }
591 }
592 
593 // Test exception for junk after serialised weight.
594 DEFINE_TESTCASE(pl2plusweight1, !backend) {
595  Xapian::PL2PlusWeight wt(2.0, 0.9);
596  try {
598  Xapian::PL2PlusWeight * b2 = b.unserialise(wt.serialise() + "X");
599  // Make sure we actually use the weight.
600  bool empty = b2->name().empty();
601  delete b2;
602  if (empty)
603  FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised to empty name!");
604  FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised OK");
605  } catch (const Xapian::SerialisationError &e) {
606  TEST(e.get_msg().find("PL2Plus") != string::npos);
607  }
608 }
609 
610 // Test for invalid values of parameters, c and delta.
611 DEFINE_TESTCASE(pl2plusweight2, !backend) {
612  // InvalidArgumentError should be thrown if parameter c is invalid.
614  Xapian::PL2PlusWeight wt(-2.0, 0.9));
615 
616  // InvalidArgumentError should be thrown if parameter delta is invalid.
618  Xapian::PL2PlusWeight wt(1.0, -1.9));
619 }
620 
621 // Test for default values of parameters, c and delta.
622 DEFINE_TESTCASE(pl2plusweight3, !backend) {
623  Xapian::PL2PlusWeight weight2;
624 
625  /* Parameter c should be set to 1.0 by constructor if none is given. */
626  TEST_EQUAL(weight2.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
627 
628  /* Parameter delta should be set to 0.8 by constructor if none is given. */
629  TEST_EQUAL(weight2.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
630 }
631 
632 // Feature Test 1 for PL2PlusWeight.
633 DEFINE_TESTCASE(pl2plusweight4, backend) {
634  Xapian::Database db = get_database("apitest_simpledata");
635  Xapian::Enquire enquire(db);
636  enquire.set_query(Xapian::Query("paragraph"));
637  Xapian::MSet mset;
638 
639  enquire.set_weighting_scheme(Xapian::PL2PlusWeight(2.0, 0.8));
640  mset = enquire.get_mset(0, 10);
641  TEST_EQUAL(mset.size(), 5);
642  // Expected weight difference calculated in extended precision using stats
643  // from the test database.
644  TEST_EQUAL_DOUBLE(mset[2].get_weight(),
645  mset[3].get_weight() + 0.0086861771701328694);
646 }
647 
648 // Feature Test 2 for PL2PlusWeight
649 DEFINE_TESTCASE(pl2plusweight5, backend) {
650  Xapian::Database db = get_database("apitest_simpledata");
651  Xapian::Enquire enquire(db);
652  Xapian::Query query("word");
653  enquire.set_query(query);
654  Xapian::MSet mset;
655 
656  enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
657  mset = enquire.get_mset(0, 10);
658  // Expect MSet contains two documents having query "word".
659  TEST_EQUAL(mset.size(), 2);
660  // Expect Document 2 has higher weight than document 4 because
661  // "word" appears more no. of times in document 2 than document 4.
662  mset_expect_order(mset, 2, 4);
663 
664  // Test with OP_SCALE_WEIGHT.
666  enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
667 
668  Xapian::MSet mset2;
669  mset2 = enquire.get_mset(0, 10);
670  TEST_EQUAL(mset2.size(), mset.size());
671  TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
672  for (Xapian::doccount i = 0; i < mset.size(); ++i) {
673  TEST_EQUAL_DOUBLE(15.0 * mset[i].get_weight(), mset2[i].get_weight());
674  }
675 }
676 
677 // Feature test
678 DEFINE_TESTCASE(dphweight1, backend) {
679  Xapian::Database db = get_database("apitest_simpledata");
680  Xapian::Enquire enquire(db);
681  Xapian::Query query("paragraph");
682 
683  enquire.set_query(query);
685 
686  Xapian::MSet mset1;
687  mset1 = enquire.get_mset(0, 10);
688  TEST_EQUAL(mset1.size(), 5);
689  /* The weight has been calculated manually by using the statistics of the
690  * test database. */
691  TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
692 
693  // Test with OP_SCALE_WEIGHT.
696 
697  Xapian::MSet mset2;
698  mset2 = enquire.get_mset(0, 10);
699  TEST_EQUAL(mset2.size(), 5);
700  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
701  for (int i = 0; i < 5; ++i) {
702  TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
703  }
704 }
705 
706 // Test exception for junk after serialised weight.
707 DEFINE_TESTCASE(dphweight2, !backend) {
709  try {
711  Xapian::DPHWeight * t2 = t.unserialise(wt.serialise() + "X");
712  // Make sure we actually use the weight.
713  bool empty = t2->name().empty();
714  delete t2;
715  if (empty)
716  FAIL_TEST("Serialised DPHWeight with junk appended unserialised to empty name!");
717  FAIL_TEST("Serialised DPHWeight with junk appended unserialised OK");
718  } catch (const Xapian::SerialisationError &e) {
719  TEST(e.get_msg().find("DPH") != string::npos);
720  }
721 }
722 
723 // Test wdf == doclen.
724 DEFINE_TESTCASE(dphweight3, backend) {
725  Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
726  Xapian::Enquire enquire(db);
727  Xapian::Query query("solo");
728 
729  enquire.set_query(query);
731 
732  Xapian::MSet mset1;
733  mset1 = enquire.get_mset(0, 10);
734  TEST_EQUAL(mset1.size(), 1);
735  // Weight gets clamped to zero.
736  TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
737 }
738 
739 // Test for various cases of normalization string.
740 DEFINE_TESTCASE(tfidfweight1, !backend) {
741  // InvalidArgumentError should be thrown if normalization string is invalid
743  Xapian::TfIdfWeight b("JOHN_LENNON"));
744 
746  Xapian::TfIdfWeight b("LOL"));
747 
748  /* Normalization string should be set to "ntn" by constructor if none is
749  given. */
750  Xapian::TfIdfWeight weight2;
751  TEST_EQUAL(weight2.serialise(), Xapian::TfIdfWeight("ntn").serialise());
752 }
753 
754 // Test exception for junk after serialised weight.
755 DEFINE_TESTCASE(tfidfweight2, !backend) {
756  Xapian::TfIdfWeight wt("ntn");
757  try {
759  Xapian::TfIdfWeight * b2 = b.unserialise(wt.serialise() + "X");
760  // Make sure we actually use the weight.
761  bool empty = b2->name().empty();
762  delete b2;
763  if (empty)
764  FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
765  FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
766  } catch (const Xapian::SerialisationError &e) {
767  TEST(e.get_msg().find("TfIdf") != string::npos);
768  }
769 }
770 
771 // Feature tests for various normalization functions.
772 DEFINE_TESTCASE(tfidfweight3, backend) {
773  Xapian::Database db = get_database("apitest_simpledata");
774  Xapian::Enquire enquire(db);
775  Xapian::Query query("word");
776  Xapian::MSet mset;
777 
778  // Check for "ntn" when termfreq != N
779  enquire.set_query(query);
781  mset = enquire.get_mset(0, 10);
782  TEST_EQUAL(mset.size(), 2);
783  // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
784  mset_expect_order(mset, 2, 4);
785  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * log(6.0 / 2));
786 
787  // Check that wqf is taken into account.
788  enquire.set_query(Xapian::Query("word", 2));
790  Xapian::MSet mset2 = enquire.get_mset(0, 10);
791  TEST_EQUAL(mset2.size(), 2);
792  // wqf is 2, so weights should be doubled.
793  TEST_EQUAL_DOUBLE(mset[0].get_weight() * 2, mset2[0].get_weight());
794  TEST_EQUAL_DOUBLE(mset[1].get_weight() * 2, mset2[1].get_weight());
795 
796  // Test with OP_SCALE_WEIGHT.
799  mset2 = enquire.get_mset(0, 10);
800  TEST_EQUAL(mset2.size(), 2);
801  // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
802  mset_expect_order(mset2, 2, 4);
803  TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
804  TEST_EQUAL_DOUBLE(15 * mset[0].get_weight(), mset2[0].get_weight());
805 
806  // check for "nfn" when termfreq != N
807  enquire.set_query(query);
809  mset = enquire.get_mset(0, 10);
810  TEST_EQUAL(mset.size(), 2);
811  mset_expect_order(mset, 2, 4);
812  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 / 2);
813 
814  // check for "nsn" when termfreq != N
815  enquire.set_query(query);
817  mset = enquire.get_mset(0, 10);
818  TEST_EQUAL(mset.size(), 2);
819  mset_expect_order(mset, 2, 4);
820  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * pow(log(6.0 / 2), 2.0));
821 
822  // Check for "bnn" and for both branches of 'b'.
823  enquire.set_query(Xapian::Query("test"));
825  mset = enquire.get_mset(0, 10);
826  TEST_EQUAL(mset.size(), 1);
827  mset_expect_order(mset, 1);
828  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1.0);
829 
830  // Check for "lnn" and for both branches of 'l'.
831  enquire.set_query(Xapian::Query("word"));
833  mset = enquire.get_mset(0, 10);
834  TEST_EQUAL(mset.size(), 2);
835  mset_expect_order(mset, 2, 4);
836  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
837  TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1.0); // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
838 
839  // Check for "snn"
840  enquire.set_query(Xapian::Query("paragraph"));
841  enquire.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
842  mset = enquire.get_mset(0, 10);
843  TEST_EQUAL(mset.size(), 5);
844  mset_expect_order(mset, 2, 1, 4, 3, 5);
845  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 9.0);
846  TEST_EQUAL_DOUBLE(mset[4].get_weight(), 1.0);
847 
848  // Check for "ntn" when termfreq=N
849  enquire.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "t"
851  mset = enquire.get_mset(0, 10);
852  TEST_EQUAL(mset.size(), 6);
853  mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
854  for (int i = 0; i < 6; ++i) {
855  TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
856  }
857 
858  // Check for "npn" and for both branches of 'p'
859  enquire.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "p"
861  mset = enquire.get_mset(0, 10);
862  TEST_EQUAL(mset.size(), 6);
863  mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
864  for (int i = 0; i < 6; ++i) {
865  TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
866  }
867 
868  // Check for "Lnn".
869  enquire.set_query(Xapian::Query("word"));
871  mset = enquire.get_mset(0, 10);
872  TEST_EQUAL(mset.size(), 2);
873  mset_expect_order(mset, 2, 4);
874  TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
875  TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
876 
877  enquire.set_query(Xapian::Query("word"));
879  mset = enquire.get_mset(0, 10);
880  TEST_EQUAL(mset.size(), 2);
881  mset_expect_order(mset, 2, 4);
882  TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8 * log((6.0 - 2) / 2));
883  TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1 * log((6.0 - 2) / 2));
884 }
885 
887  public:
888  double factor;
889 
890  unsigned & zero_inits, & non_zero_inits;
891 
892  CheckInitWeight(unsigned &z, unsigned &n)
893  : factor(-1.0), zero_inits(z), non_zero_inits(n) { }
894 
895  void init(double factor_) {
896  factor = factor_;
897  if (factor == 0.0)
898  ++zero_inits;
899  else
900  ++non_zero_inits;
901  }
902 
903  Weight * clone() const {
904  return new CheckInitWeight(zero_inits, non_zero_inits);
905  }
906 
908  Xapian::termcount) const {
909  return 1.0;
910  }
911 
912  double get_maxpart() const { return 1.0; }
913 
915  return 1.0 / doclen;
916  }
917 
918  double get_maxextra() const { return 1.0; }
919 };
920 
922 DEFINE_TESTCASE(checkinitweight1, backend && !multi && !remote) {
923  Xapian::Database db = get_database("apitest_simpledata");
924  Xapian::Enquire enquire(db);
926  Xapian::Query("this"), Xapian::Query("paragraph"));
927  enquire.set_query(q);
928  unsigned zero_inits = 0, non_zero_inits = 0;
929  CheckInitWeight wt(zero_inits, non_zero_inits);
930  enquire.set_weighting_scheme(wt);
931  Xapian::MSet mset = enquire.get_mset(0, 3);
932  TEST_EQUAL(zero_inits, 1);
933  TEST_EQUAL(non_zero_inits, 2);
934 }
935 
937  public:
938  double factor;
939 
941 
942  string term1;
943 
944  // When testing OP_SYNONYM, term2 is also set.
945  // When testing OP_WILDCARD, term2 == "*".
946  // When testing a repeated term, term2 == "=" for the first occurrence and
947  // "_" for subsequent occurrences.
948  mutable string term2;
949 
952 
956 
958  const string & term1_,
959  const string & term2_,
960  Xapian::termcount & sum_,
961  Xapian::termcount & sum_squares_)
962  : factor(-1.0), db(db_), term1(term1_), term2(term2_),
963  sum(sum_), sum_squares(sum_squares_),
964  len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
965  {
966  need_stat(COLLECTION_SIZE);
967  need_stat(RSET_SIZE);
968  need_stat(AVERAGE_LENGTH);
969  need_stat(TERMFREQ);
970  need_stat(RELTERMFREQ);
971  need_stat(QUERY_LENGTH);
972  need_stat(WQF);
973  need_stat(WDF);
974  need_stat(DOC_LENGTH);
975  need_stat(DOC_LENGTH_MIN);
976  need_stat(DOC_LENGTH_MAX);
977  need_stat(WDF_MAX);
978  need_stat(COLLECTION_FREQ);
979  need_stat(UNIQUE_TERMS);
980  need_stat(TOTAL_LENGTH);
981  }
982 
984  const string & term_,
985  Xapian::termcount & sum_,
986  Xapian::termcount & sum_squares_)
987  : CheckStatsWeight(db_, term_, string(), sum_, sum_squares_) { }
988 
989  void init(double factor_) {
990  factor = factor_;
991  }
992 
993  Weight * clone() const {
994  auto res = new CheckStatsWeight(db, term1, term2, sum, sum_squares);
995  if (term2 == "=") {
996  // The object passed to Enquire::set_weighting_scheme() is cloned
997  // right away, and then cloned again for each term, and then
998  // potentially once more for the term-independent weight
999  // contribution. In the repeated case, we want to handle the first
1000  // actual term specially, so we arrange for that to have "=" for
1001  // term2, and subsequent clones to have "_", so that we accumulate
1002  // sum and sum_squares on the first occurrence only.
1003  term2 = "_";
1004  }
1005  return res;
1006  }
1007 
1009  Xapian::termcount uniqueterms) const {
1010  Xapian::doccount num_docs = db.get_doccount();
1011  TEST_EQUAL(get_collection_size(), num_docs);
1012  TEST_EQUAL(get_rset_size(), 0);
1013  TEST_EQUAL(get_average_length(), db.get_avlength());
1014  Xapian::totallength totlen = get_total_length();
1015  TEST_EQUAL(totlen, db.get_total_length());
1016  double total_term_occurences = get_average_length() * num_docs;
1017  TEST_EQUAL(Xapian::totallength(total_term_occurences + 0.5), totlen);
1018  if (term2.empty() || term2 == "=" || term2 == "_") {
1019  TEST_EQUAL(get_termfreq(), db.get_termfreq(term1));
1020  TEST_EQUAL(get_collection_freq(), db.get_collection_freq(term1));
1021  if (term2.empty()) {
1022  TEST_EQUAL(get_query_length(), 1);
1023  } else {
1024  TEST_EQUAL(get_query_length(), 2);
1025  }
1026  } else {
1027  Xapian::doccount tfmax = 0, tfsum = 0;
1028  Xapian::termcount cfmax = 0, cfsum = 0;
1029  if (term2 == "*") {
1030  // OP_WILDCARD case.
1031  for (auto&& t = db.allterms_begin(term1);
1032  t != db.allterms_end(term1); ++t) {
1033  Xapian::doccount tf = t.get_termfreq();
1034  tout << "->" << *t << " " << tf << '\n';
1035  tfsum += tf;
1036  tfmax = max(tfmax, tf);
1038  cfsum += cf;
1039  cfmax = max(cfmax, cf);
1040  }
1041  TEST_EQUAL(get_query_length(), 1);
1042  } else {
1043  // OP_SYNONYM case.
1044  Xapian::doccount tf1 = db.get_termfreq(term1);
1045  Xapian::doccount tf2 = db.get_termfreq(term2);
1046  tfsum = tf1 + tf2;
1047  tfmax = max(tf1, tf2);
1048  Xapian::termcount cf1 = db.get_collection_freq(term1);
1049  Xapian::termcount cf2 = db.get_collection_freq(term2);
1050  cfsum = cf1 + cf2;
1051  cfmax = max(cf1, cf2);
1052  TEST_EQUAL(get_query_length(), 2);
1053  }
1054  // Synonym occurs at least as many times as any term.
1055  TEST_REL(get_termfreq(), >=, tfmax);
1056  TEST_REL(get_collection_freq(), >=, cfmax);
1057  // Synonym can't occur more times than the terms do.
1058  TEST_REL(get_termfreq(), <=, tfsum);
1059  TEST_REL(get_collection_freq(), <=, cfsum);
1060  // Synonym can't occur more times than there are documents/terms.
1061  TEST_REL(get_termfreq(), <=, num_docs);
1062  TEST_REL(get_collection_freq(), <=, totlen);
1063  }
1064  TEST_EQUAL(get_reltermfreq(), 0);
1065  TEST_EQUAL(get_wqf(), 1);
1066  TEST_REL(doclen,>=,len_lower);
1067  TEST_REL(doclen,<=,len_upper);
1068  TEST_REL(uniqueterms,>=,1);
1069  TEST_REL(uniqueterms,<=,doclen);
1070  TEST_REL(wdf,<=,wdf_upper);
1071  if (term2 != "_") {
1072  sum += wdf;
1073  sum_squares += wdf * wdf;
1074  }
1075  return 1.0;
1076  }
1077 
1078  double get_maxpart() const {
1079  if (len_upper == 0) {
1080  len_lower = get_doclength_lower_bound();
1081  len_upper = get_doclength_upper_bound();
1082  wdf_upper = get_wdf_upper_bound();
1083  }
1084  return 1.0;
1085  }
1086 
1088  return 1.0 / doclen;
1089  }
1090 
1091  double get_maxextra() const { return 1.0; }
1092 };
1093 
1095 DEFINE_TESTCASE(checkstatsweight1, backend && !remote) {
1096  Xapian::Database db = get_database("apitest_simpledata");
1097  Xapian::Enquire enquire(db);
1099  for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1100  const string & term = *a;
1101  enquire.set_query(Xapian::Query(term));
1102  Xapian::termcount sum = 0;
1103  Xapian::termcount sum_squares = 0;
1104  CheckStatsWeight wt(db, term, sum, sum_squares);
1105  enquire.set_weighting_scheme(wt);
1106  Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1107 
1108  // The document order in the multi-db case isn't the same as the
1109  // postlist order on the combined DB, so it's hard to compare the
1110  // wdf for each document in the Weight objects, but we can sum
1111  // the wdfs and the squares of the wdfs which provides a decent
1112  // check that we're not getting the wrong wdf values (it ensures
1113  // they have the right mean and standard deviation).
1114  Xapian::termcount expected_sum = 0;
1115  Xapian::termcount expected_sum_squares = 0;
1117  for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
1118  Xapian::termcount wdf = i.get_wdf();
1119  expected_sum += wdf;
1120  expected_sum_squares += wdf * wdf;
1121  }
1122  TEST_EQUAL(sum, expected_sum);
1123  TEST_EQUAL(sum_squares, expected_sum_squares);
1124  }
1125 }
1126 
1128 // Regression test for bugs fixed in 1.4.1.
1129 DEFINE_TESTCASE(checkstatsweight2, backend && !remote) {
1130  Xapian::Database db = get_database("apitest_simpledata");
1131  Xapian::Enquire enquire(db);
1133  for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1134  const string & term1 = *a;
1135  if (++a == db.allterms_end()) break;
1136  const string & term2 = *a;
1138  Xapian::Query(term1), Xapian::Query(term2));
1139  tout << q.get_description() << '\n';
1140  enquire.set_query(q);
1141  Xapian::termcount sum = 0;
1142  Xapian::termcount sum_squares = 0;
1143  CheckStatsWeight wt(db, term1, term2, sum, sum_squares);
1144  enquire.set_weighting_scheme(wt);
1145  Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1146 
1147  // The document order in the multi-db case isn't the same as the
1148  // postlist order on the combined DB, so it's hard to compare the
1149  // wdf for each document in the Weight objects, but we can sum
1150  // the wdfs and the squares of the wdfs which provides a decent
1151  // check that we're not getting the wrong wdf values (it ensures
1152  // they have the right mean and standard deviation).
1153  Xapian::termcount expected_sum = 0;
1154  Xapian::termcount expected_sum_squares = 0;
1157  Xapian::docid did1 = *i, did2 = *j;
1158  while (true) {
1159  // To calculate expected_sum_squares correctly we need to square
1160  // the sum per document.
1161  Xapian::termcount wdf;
1162  if (did1 == did2) {
1163  wdf = i.get_wdf() + j.get_wdf();
1164  did1 = did2 = 0;
1165  } else if (did1 < did2) {
1166  wdf = i.get_wdf();
1167  did1 = 0;
1168  } else {
1169  wdf = j.get_wdf();
1170  did2 = 0;
1171  }
1172  expected_sum += wdf;
1173  expected_sum_squares += wdf * wdf;
1174 
1175  if (did1 == 0) {
1176  if (++i != db.postlist_end(term1)) {
1177  did1 = *i;
1178  } else {
1179  if (did2 == Xapian::docid(-1)) break;
1180  did1 = Xapian::docid(-1);
1181  }
1182  }
1183  if (did2 == 0) {
1184  if (++j != db.postlist_end(term2)) {
1185  did2 = *j;
1186  } else {
1187  if (did1 == Xapian::docid(-1)) break;
1188  did2 = Xapian::docid(-1);
1189  }
1190  }
1191  }
1192  // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1193  // the individual terms.
1194  TEST_EQUAL(sum, expected_sum);
1195  TEST_REL(sum_squares, >=, expected_sum_squares);
1196  }
1197 }
1198 
1200 // Regression test for bug fixed in 1.4.1.
1201 // Don't run with multi-database, as the termfreq checks don't work
1202 // there - FIXME: Investigate this - it smells like a bug.
1203 DEFINE_TESTCASE(checkstatsweight3, backend && !remote && !multi) {
1204  struct PlCmp {
1205  bool operator()(const Xapian::PostingIterator& a,
1206  const Xapian::PostingIterator& b) {
1207  return *a < *b;
1208  }
1209  };
1210 
1211  Xapian::Database db = get_database("apitest_simpledata");
1212  Xapian::Enquire enquire(db);
1214  static const char * const testcases[] = {
1215  "a", // a* matches all documents, but no term matches all.
1216  "pa", // Expands to only "paragraph", matching 5.
1217  "zulu", // No matches.
1218  "th", // Term "this" matches all documents.
1219  };
1220  for (auto pattern : testcases) {
1222  tout << q.get_description() << '\n';
1223  enquire.set_query(q);
1224  Xapian::termcount sum = 0;
1225  Xapian::termcount sum_squares = 0;
1226  CheckStatsWeight wt(db, pattern, "*", sum, sum_squares);
1227  enquire.set_weighting_scheme(wt);
1228  Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1229 
1230  // The document order in the multi-db case isn't the same as the
1231  // postlist order on the combined DB, so it's hard to compare the
1232  // wdf for each document in the Weight objects, but we can sum
1233  // the wdfs and the squares of the wdfs which provides a decent
1234  // check that we're not getting the wrong wdf values (it ensures
1235  // they have the right mean and standard deviation).
1236  Xapian::termcount expected_sum = 0;
1237  Xapian::termcount expected_sum_squares = 0;
1238  vector<Xapian::PostingIterator> postlists;
1239  for (auto&& t = db.allterms_begin(pattern);
1240  t != db.allterms_end(pattern); ++t) {
1241  postlists.emplace_back(db.postlist_begin(*t));
1242  }
1243  make_heap(postlists.begin(), postlists.end(), PlCmp());
1244  Xapian::docid did = 0;
1245  Xapian::termcount wdf = 0;
1246  while (!postlists.empty()) {
1247  pop_heap(postlists.begin(), postlists.end(), PlCmp());
1248  Xapian::docid did_new = *postlists.back();
1249  Xapian::termcount wdf_new = postlists.back().get_wdf();
1250  if (++(postlists.back()) == Xapian::PostingIterator()) {
1251  postlists.pop_back();
1252  } else {
1253  push_heap(postlists.begin(), postlists.end(), PlCmp());
1254  }
1255  if (did_new != did) {
1256  expected_sum += wdf;
1257  expected_sum_squares += wdf * wdf;
1258  wdf = 0;
1259  did = did_new;
1260  }
1261  wdf += wdf_new;
1262  }
1263  expected_sum += wdf;
1264  expected_sum_squares += wdf * wdf;
1265  // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1266  // the individual terms.
1267  TEST_EQUAL(sum, expected_sum);
1268  TEST_REL(sum_squares, >=, expected_sum_squares);
1269  }
1270 }
1271 
1273 // Regression test for bug fixed in 1.4.6. Doesn't work with
1274 // multi as the weight object is cloned more times.
1275 DEFINE_TESTCASE(checkstatsweight4, backend && !remote && !multi) {
1276  Xapian::Database db = get_database("apitest_simpledata");
1277  Xapian::Enquire enquire(db);
1279  for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1280  const string & term = *a;
1281  enquire.set_query(Xapian::Query(term, 1, 1) |
1282  Xapian::Query(term, 1, 2));
1283  Xapian::termcount sum = 0;
1284  Xapian::termcount sum_squares = 0;
1285  CheckStatsWeight wt(db, term, "=", sum, sum_squares);
1286  enquire.set_weighting_scheme(wt);
1287  Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1288 
1289  // The document order in the multi-db case isn't the same as the
1290  // postlist order on the combined DB, so it's hard to compare the
1291  // wdf for each document in the Weight objects, but we can sum
1292  // the wdfs and the squares of the wdfs which provides a decent
1293  // check that we're not getting the wrong wdf values (it ensures
1294  // they have the right mean and standard deviation).
1295  Xapian::termcount expected_sum = 0;
1296  Xapian::termcount expected_sum_squares = 0;
1298  for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
1299  Xapian::termcount wdf = i.get_wdf();
1300  expected_sum += wdf;
1301  expected_sum_squares += wdf * wdf;
1302  }
1303  TEST_EQUAL(sum, expected_sum);
1304  TEST_EQUAL(sum_squares, expected_sum_squares);
1305  }
1306 }
1307 
1308 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
1309 DEFINE_TESTCASE(unigramlmweight4, backend) {
1310  Xapian::Database db = get_database("apitest_simpledata");
1311  Xapian::Enquire enquire1(db);
1312  Xapian::Enquire enquire2(db);
1313  enquire1.set_query(Xapian::Query("paragraph"));
1314  Xapian::MSet mset1;
1315  enquire2.set_query(Xapian::Query("paragraph"));
1316  Xapian::MSet mset2;
1317  // 5 documents available with term paragraph so mset size should be 5
1320  mset1 = enquire1.get_mset(0, 10);
1321  mset2 = enquire2.get_mset(0, 10);
1322 
1323  TEST_EQUAL(mset1.size(), 5);
1324  TEST_EQUAL_DOUBLE(mset1[1].get_weight(), mset2[1].get_weight());
1325 }
1326 
1327 /* Test for checking if we don't use smoothing all
1328  * of them should give same result i.e wdf_double/len_double */
1329 DEFINE_TESTCASE(unigramlmweight5, backend) {
1330  Xapian::Database db = get_database("apitest_simpledata");
1331  Xapian::Enquire enquire1(db);
1332  Xapian::Enquire enquire2(db);
1333  Xapian::Enquire enquire3(db);
1334  Xapian::Enquire enquire4(db);
1335  enquire1.set_query(Xapian::Query("paragraph"));
1336  Xapian::MSet mset1;
1337  enquire2.set_query(Xapian::Query("paragraph"));
1338  Xapian::MSet mset2;
1339  enquire3.set_query(Xapian::Query("paragraph"));
1340  Xapian::MSet mset3;
1341  enquire4.set_query(Xapian::Query("paragraph"));
1342  Xapian::MSet mset4;
1343  // 5 documents available with term paragraph so mset size should be 5
1348 
1349  mset1 = enquire1.get_mset(0, 10);
1350  mset2 = enquire2.get_mset(0, 10);
1351  mset3 = enquire3.get_mset(0, 10);
1352  mset4 = enquire4.get_mset(0, 10);
1353 
1354  TEST_EQUAL(mset1.size(), 5);
1355  TEST_EQUAL(mset2.size(), 5);
1356  TEST_EQUAL(mset3.size(), 5);
1357  TEST_EQUAL(mset4.size(), 5);
1358  for (Xapian::doccount i = 0; i < 5; ++i) {
1359  TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset4[i].get_weight());
1360  TEST_EQUAL_DOUBLE(mset2[i].get_weight(), mset4[i].get_weight());
1361  TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset2[i].get_weight());
1362  TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset2[i].get_weight());
1363  TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset4[i].get_weight());
1364  TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight());
1365  }
1366 }
1367 
1368 // Test Exception for junk after serialised weight (with Dir+ enabled).
1369 DEFINE_TESTCASE(unigramlmweight6, !backend) {
1371  try {
1372  Xapian::LMWeight d;
1373  Xapian::LMWeight * d2 = d.unserialise(wt.serialise() + "X");
1374  // Make sure we actually use the weight.
1375  bool empty = d2->name().empty();
1376  delete d2;
1377  if (empty)
1378  FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
1379  FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
1380  } catch (const Xapian::SerialisationError &e) {
1381  TEST(e.get_msg().find("LM") != string::npos);
1382  }
1383 }
1384 
1385 // Feature test for Dir+ function.
1386 DEFINE_TESTCASE(unigramlmweight7, backend) {
1387  Xapian::Database db = get_database("apitest_simpledata");
1388  Xapian::Enquire enquire1(db);
1389  Xapian::Enquire enquire2(db);
1390  enquire1.set_query(Xapian::Query("paragraph"));
1391  enquire2.set_query(Xapian::Query("paragraph"));
1392  Xapian::MSet mset1;
1393  Xapian::MSet mset2;
1394 
1397 
1398  mset1 = enquire1.get_mset(0, 10);
1399  mset2 = enquire2.get_mset(0, 10);
1400 
1401  // mset size should be 5
1402  TEST_EQUAL(mset1.size(), 5);
1403  TEST_EQUAL(mset2.size(), 5);
1404 
1405  // Expect mset weights associated with Dir+ more than mset weights by Dir
1406  // because of the presence of extra weight component in Dir+ function.
1407  TEST_REL(mset2[0].get_weight(),>,mset1[0].get_weight());
1408  TEST_REL(mset2[1].get_weight(),>,mset1[1].get_weight());
1409  TEST_REL(mset2[2].get_weight(),>,mset1[2].get_weight());
1410  TEST_REL(mset2[3].get_weight(),>,mset1[3].get_weight());
1411  TEST_REL(mset2[4].get_weight(),>,mset1[4].get_weight());
1412 }
1413 
1414 // Regression test that OP_SCALE_WEIGHT works with LMWeight (fixed in 1.4.1).
1415 DEFINE_TESTCASE(unigramlmweight8, backend) {
1416  Xapian::Database db = get_database("apitest_simpledata");
1417  Xapian::Enquire enquire(db);
1418  Xapian::Query query("paragraph");
1419 
1420  enquire.set_query(query);
1422 
1423  Xapian::MSet mset1;
1424  mset1 = enquire.get_mset(0, 10);
1425  TEST_EQUAL(mset1.size(), 5);
1426 
1429 
1430  Xapian::MSet mset2;
1431  mset2 = enquire.get_mset(0, 10);
1432  TEST_EQUAL(mset2.size(), mset1.size());
1433  TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
1434  for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
1435  TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
1436  }
1437 }
1438 
1439 // Feature test for BoolWeight.
1440 // Test exception for junk after serialised weight.
1441 DEFINE_TESTCASE(boolweight1, !backend) {
1442  Xapian::BoolWeight wt;
1443  try {
1445  Xapian::BoolWeight * t2 = t.unserialise(wt.serialise() + "X");
1446  // Make sure we actually use the weight.
1447  bool empty = t2->name().empty();
1448  delete t2;
1449  if (empty)
1450  FAIL_TEST("Serialised BoolWeight with junk appended unserialised to empty name!");
1451  FAIL_TEST("Serialised BoolWeight with junk appended unserialised OK");
1452  } catch (const Xapian::SerialisationError &e) {
1453  TEST(e.get_msg().find("Bool") != string::npos);
1454  }
1455 }
1456 
1457 // Feature test for CoordWeight.
1458 DEFINE_TESTCASE(coordweight1, backend) {
1459  Xapian::Enquire enquire(get_database("apitest_simpledata"));
1461  static const char * const terms[] = {
1462  "this", "line", "paragraph", "rubbish"
1463  };
1465  terms, terms + sizeof(terms) / sizeof(terms[0]));
1466  enquire.set_query(query);
1467  Xapian::MSet mymset1 = enquire.get_mset(0, 100);
1468  // CoordWeight scores 1 for each matching term, so the weight should equal
1469  // the number of matching terms.
1470  for (Xapian::MSetIterator i = mymset1.begin(); i != mymset1.end(); ++i) {
1471  Xapian::termcount matching_terms = 0;
1473  while (t != enquire.get_matching_terms_end(i)) {
1474  ++matching_terms;
1475  ++t;
1476  }
1477  TEST_EQUAL(i.get_weight(), matching_terms);
1478  }
1479 
1480  // Test with OP_SCALE_WEIGHT.
1482  Xapian::MSet mymset2 = enquire.get_mset(0, 100);
1483  TEST_EQUAL(mymset1.size(), mymset2.size());
1484  for (Xapian::doccount i = 0; i != mymset1.size(); ++i) {
1485  TEST_EQUAL(15.0 * mymset1[i].get_weight(), mymset2[i].get_weight());
1486  }
1487 }
1488 
1489 // Test exception for junk after serialised weight.
1490 DEFINE_TESTCASE(coordweight2, !backend) {
1492  try {
1494  Xapian::CoordWeight * t2 = t.unserialise(wt.serialise() + "X");
1495  // Make sure we actually use the weight.
1496  bool empty = t2->name().empty();
1497  delete t2;
1498  if (empty)
1499  FAIL_TEST("Serialised CoordWeight with junk appended unserialised to empty name!");
1500  FAIL_TEST("Serialised CoordWeight with junk appended unserialised OK");
1501  } catch (const Xapian::SerialisationError &e) {
1502  TEST(e.get_msg().find("Coord") != string::npos);
1503  }
1504 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
Wildcard expansion.
Definition: query.h:255
void init(double factor_)
Allow the subclass to perform any initialisation it needs to.
Definition: api_weight.cc:989
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator get_matching_terms_end(Xapian::docid) const
End iterator corresponding to get_matching_terms_begin()
Definition: enquire.h:715
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: tradweight.cc:133
InL2Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: inl2weight.cc:103
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: dlhweight.cc:166
PL2PlusWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
Definition: database.h:269
#define TEST_NOT_EQUAL_DOUBLE(a, b)
Test two doubles for non-near-equality.
Definition: testsuite.h:300
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
Definition: error.h:122
double get_sumpart(Xapian::termcount, Xapian::termcount, Xapian::termcount) const
Calculate the weight contribution for this object&#39;s term to a document.
Definition: api_weight.cc:907
Class representing a list of search results.
Definition: mset.h:44
This class implements the InL2 weighting scheme.
Definition: weight.h:833
STL namespace.
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:932
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: inl2weight.cc:97
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: bb2weight.cc:126
CheckInitWeight(unsigned &z, unsigned &n)
Definition: api_weight.cc:892
BM25PlusWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
DEFINE_TESTCASE(tradweight3, !backend)
Definition: api_weight.cc:36
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
Definition: weight.h:1252
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
Definition: api_weight.cc:918
Xapian::totallength get_total_length() const
Get the total length of all the documents in the database.
Definition: omdatabase.cc:312
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: ineb2weight.cc:99
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: tfidfweight.cc:86
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:956
Weight * clone() const
Clone this object.
Definition: api_weight.cc:993
test functionality of the Xapian API
std::string name() const
Return the name of this weighting scheme.
Definition: boolweight.cc:44
std::string name() const
Return the name of this weighting scheme.
Definition: bm25weight.cc:132
std::string name() const
Return the name of this weighting scheme.
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
Definition: api_weight.cc:1078
Xapian::doclength get_avlength() const
Get the average length of the documents in the database.
Definition: omdatabase.cc:293
This class implements the BB2 weighting scheme.
Definition: weight.h:1049
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:32
Class for iterating over a list of terms.
IfB2Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: ifb2weight.cc:106
Xapian::Weight subclass implementing Coordinate Matching.
Definition: weight.h:1504
BM25Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: bm25weight.cc:149
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
std::string name() const
Return the name of this weighting scheme.
Xapian::termcount & sum_squares
Definition: api_weight.cc:951
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const
Calculate the term-independent weight component for a document.
Definition: api_weight.cc:914
Xapian::termcount wdf_upper
Definition: api_weight.cc:955
TfIdfWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: tfidfweight.cc:92
Class implementing a "boolean" weighting scheme.
Definition: weight.h:422
This class provides read/write access to a database.
Definition: database.h:789
Indicates an error in the std::string serialisation of an object.
Definition: error.h:929
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Iterator over a Xapian::MSet.
Definition: mset.h:368
Scale the weight contributed by a subquery.
Definition: query.h:166
Public interfaces for the Xapian library.
CheckStatsWeight(const Xapian::Database &db_, const string &term1_, const string &term2_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Definition: api_weight.cc:957
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: lmweight.cc:100
double get_maxpart() const
Return an upper bound on what get_sumpart() can return for any document.
Definition: api_weight.cc:912
Weight * clone() const
Clone this object.
Definition: api_weight.cc:903
DPHWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: dphweight.cc:129
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
std::string name() const
Return the name of this weighting scheme.
Definition: lmweight.cc:94
IneB2Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: ineb2weight.cc:105
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
std::string name() const
Return the name of this weighting scheme.
Definition: ifb2weight.cc:94
Xapian::termcount & sum
Definition: api_weight.cc:950
double get_maxextra() const
Return an upper bound on what get_sumextra() can return for any document.
Definition: api_weight.cc:1091
std::string name() const
Return the name of this weighting scheme.
Definition: inl2weight.cc:91
Xapian::Weight subclass implementing the traditional probabilistic formula.
Definition: weight.h:763
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: pl2weight.cc:138
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
Definition: weight.h:1125
std::string name() const
Return the name of this weighting scheme.
Definition: pl2weight.cc:132
This class implements the PL2 weighting scheme.
Definition: weight.h:1185
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: dphweight.cc:123
This class implements the IneB2 weighting scheme.
Definition: weight.h:977
BoolWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: boolweight.cc:57
std::string name() const
Return the name of this weighting scheme.
Definition: tradweight.cc:127
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
Definition: omdatabase.cc:223
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
Definition: testsuite.h:295
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: ifb2weight.cc:100
double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const
Calculate the term-independent weight component for a document.
Definition: api_weight.cc:1087
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: coordweight.cc:50
Match like OP_OR but weighting as if a single term.
Definition: query.h:239
This class implements the IfB2 weighting scheme.
Definition: weight.h:904
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
Match only documents which all subqueries match.
Definition: query.h:84
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
CheckStatsWeight(const Xapian::Database &db_, const string &term_, Xapian::termcount &sum_, Xapian::termcount &sum_squares_)
Definition: api_weight.cc:983
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
std::string name() const
Return the name of this weighting scheme.
Definition: bb2weight.cc:120
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
CoordWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: coordweight.cc:57
std::string name() const
Return the name of this weighting scheme.
Definition: dlhweight.cc:160
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: bm25weight.cc:138
Xapian::termcount len_upper
Definition: api_weight.cc:953
PL2Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: pl2weight.cc:144
void init(double factor_)
Allow the subclass to perform any initialisation it needs to.
Definition: api_weight.cc:895
This class implements the DPH weighting scheme.
Definition: weight.h:1348
double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen, Xapian::termcount uniqueterms) const
Calculate the weight contribution for this object&#39;s term to a document.
Definition: api_weight.cc:1008
Match documents which at least one subquery matches.
Definition: query.h:92
Xapian-specific test helper functions and macros.
std::string name() const
Return the name of this weighting scheme.
Definition: ineb2weight.cc:93
LMWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: lmweight.cc:110
void mset_expect_order(const Xapian::MSet &A, Xapian::docid d1, Xapian::docid d2, Xapian::docid d3, Xapian::docid d4, Xapian::docid d5, Xapian::docid d6, Xapian::docid d7, Xapian::docid d8, Xapian::docid d9, Xapian::docid d10, Xapian::docid d11, Xapian::docid d12)
Definition: testutils.cc:225
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
std::string name() const
Return the name of this weighting scheme.
Definition: coordweight.cc:44
Class representing a query.
Definition: query.h:46
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
static void gen_wdf_eq_doclen_db(Xapian::WritableDatabase &db, const string &)
Definition: api_weight.cc:513
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
Xapian::Weight subclass implementing the Language Model formula.
Definition: weight.h:1401
std::string name() const
Return the name of this weighting scheme.
Definition: dphweight.cc:117
BB2Weight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: bb2weight.cc:132
std::string serialise() const
Return this object&#39;s parameters serialised as a single string.
Definition: boolweight.cc:50
Xapian::Database db
Definition: api_weight.cc:940
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Definition: weight.h:639
std::string name() const
Return the name of this weighting scheme.
Definition: tfidfweight.cc:80
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:535
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
Xapian::Weight subclass implementing the tf-idf weighting scheme.
Definition: weight.h:447
Xapian::termcount len_lower
Definition: api_weight.cc:954
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140
Abstract base class for weighting schemes.
Definition: weight.h:35
TradWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: tradweight.cc:139
Xapian::termcount get_collection_freq(const std::string &tname) const
Return the total number of occurrences of the given term.
Definition: omdatabase.cc:339
DLHWeight * unserialise(const std::string &serialised) const
Unserialise parameters.
Definition: dlhweight.cc:172
unsigned & zero_inits
Definition: api_weight.cc:890
static const testcase testcases[]
Definition: api_unicode.cc:39