xapian-core  2.0.0
api_collapse.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "api_collapse.h"
24 
25 #include <xapian.h>
26 
27 #include "apitest.h"
28 #include "testutils.h"
29 
30 using namespace std;
31 
33 DEFINE_TESTCASE(collapsekey5, backend) {
34  Xapian::Database db(get_database("apitest_simpledata"));
35  Xapian::Enquire enquire(db);
36  // "this" matches all documents.
37  enquire.set_query(Xapian::Query("this"));
38 
39  Xapian::MSet full_mset = enquire.get_mset(0, db.get_doccount());
40 
41  for (Xapian::valueno slot = 0; slot < 10; ++slot) {
42  map<string, Xapian::doccount> tally;
43  for (Xapian::docid did = 1; did <= db.get_doccount(); ++did) {
44  ++tally[db.get_document(did).get_value(slot)];
45  }
46 
47  for (Xapian::doccount cmax = db.get_doccount() + 1; cmax > 0; --cmax) {
48  tout << "Collapsing on slot " << slot << " max " << cmax << '\n';
49  enquire.set_collapse_key(slot, cmax);
50  Xapian::MSet mset = enquire.get_mset(0, full_mset.size());
51 
52  // Check the collapse MSet size is as expected.
53  Xapian::doccount expect_size = 0;
54  map<string, Xapian::doccount>::const_iterator i;
55  for (i = tally.begin(); i != tally.end(); ++i) {
56  if (i->first.empty() || i->second <= cmax) {
57  expect_size += i->second;
58  } else {
59  expect_size += cmax;
60  }
61  }
62  TEST_EQUAL(mset.size(), expect_size);
63 
64  // Check that the right number of documents with each collapse key
65  // value are left after collapsing.
66  map<string, Xapian::doccount> seen;
67  for (Xapian::MSetIterator j = mset.begin(); j != mset.end(); ++j) {
68  const string & key = j.get_collapse_key();
69  TEST(tally.find(key) != tally.end());
70  ++seen[key];
71  }
72  for (i = tally.begin(); i != tally.end(); ++i) {
73  if (i->first.empty() || i->second <= cmax) {
74  TEST_EQUAL(seen[i->first], i->second);
75  } else {
76  TEST_EQUAL(seen[i->first], cmax);
77  }
78  }
79  }
80  }
81 }
82 
84 DEFINE_TESTCASE(collapsekey6, backend) {
85  Xapian::Database db(get_database("apitest_simpledata"));
86  Xapian::Enquire enquire(db);
87  // "this" matches all documents.
88  enquire.set_query(Xapian::Query("this"));
89 
90  Xapian::MSet full_mset = enquire.get_mset(0, db.get_doccount());
91 
92  for (Xapian::valueno slot = 0; slot < 10; ++slot) {
93  for (Xapian::doccount cmax = db.get_doccount() + 1; cmax > 0; --cmax) {
94  for (int percent = 65; percent != 100; ++percent) {
95  tout << "Collapsing on slot " << slot << " max " << cmax
96  << " cutoff " << percent << '\n';
97  enquire.set_collapse_key(slot, cmax);
98  enquire.set_cutoff(percent);
99  Xapian::MSet mset = enquire.get_mset(0, full_mset.size());
100  for (Xapian::MSetIterator j = mset.begin(); j != mset.end(); ++j) {
101  TEST_REL(j.get_percent(), >=, percent);
102  }
103  }
104  }
105  }
106 }
DEFINE_TESTCASE(collapsekey5, backend)
Simple test of collapsing with collapse_max > 1.
Definition: api_collapse.cc:33
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:47
test functionality of the Xapian API
An indexed database of documents.
Definition: database.h:75
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: database.cc:233
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
Definition: database.cc:368
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
Definition: document.cc:185
Querying session.
Definition: enquire.h:57
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
Definition: enquire.cc:200
void set_cutoff(int percent_threshold, double weight_threshold=0)
Set lower bounds on percentage and/or weight.
Definition: enquire.cc:172
void set_query(const Query &query, termcount query_length=0)
Set the query.
Definition: enquire.cc:72
void set_collapse_key(valueno collapse_key, doccount collapse_max=1)
Control collapsing of results.
Definition: enquire.cc:165
Iterator over a Xapian::MSet.
Definition: mset.h:535
Class representing a list of search results.
Definition: mset.h:46
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791
Class representing a query.
Definition: query.h:45
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
Definition: testmacros.h:35
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:104
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:276
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:273
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.