37 bool need_comma =
false;
57 bool need_comma =
false;
67 if (!posrepr.empty()) {
68 posrepr =
", pos=[" + posrepr +
"]";
71 result +=
"(" +
str(*
p) +
72 ", doclen=" +
str(
p.get_doclength()) +
73 ", wdf=" +
str(
p.get_wdf()) +
84 bool need_comma =
false;
91 if (!posrepr.empty()) {
92 posrepr =
", pos=[" + posrepr +
"]";
96 result +=
"Term(" + *t +
", wdf=" +
str(t.get_wdf()) + posrepr;
134 unsigned long totlen = 0;
139 map<string, string> posting_reprs;
140 map<Xapian::valueno, string> value_reprs;
153 if (doclen < doclen_lower_bound)
154 doclen_lower_bound = doclen;
155 if (doclen > doclen_upper_bound)
156 doclen_upper_bound = doclen;
170 if (wdf) ++found_unique_terms;
189 if (!posrepr.empty()) {
190 posrepr =
",[" + posrepr +
"]";
192 string posting_repr =
"(" +
str(did) +
"," +
197 map<string, string>::iterator i = posting_reprs.find(*t);
198 if (i == posting_reprs.end()) {
199 posting_reprs[*t] = posting_repr;
201 i->second +=
"," + posting_repr;
209 TEST((*v).size() != 0);
210 string value_repr =
"(" +
str(did) +
"," + *v +
")";
213 map<Xapian::valueno, string>::iterator i;
214 i = value_reprs.find(v.get_valueno());
215 if (i == value_reprs.end()) {
216 value_reprs[v.get_valueno()] = value_repr;
218 i->second +=
"," + value_repr;
224 TEST_EQUAL(expected_termcount, found_termcount);
229 TEST_REL(unique_terms, >=, found_unique_terms);
230 TEST_REL(unique_terms, <=, found_termcount);
239 map<string, string>::const_iterator i;
244 TEST(i != posting_reprs.end());
251 bool need_comma =
false;
260 cf_count +=
p.get_wdf();
264 if (!posrepr.empty()) {
265 posrepr =
",[" + posrepr +
"]";
267 posting_repr +=
"(" +
str(*
p) +
"," +
268 str(
p.get_wdf()) +
"/" +
269 str(
p.get_doclength()) + posrepr +
")";
270 if (wdf_upper_bound <
p.get_wdf())
271 wdf_upper_bound =
p.get_wdf();
281 TEST(i == posting_reprs.end());
283 map<Xapian::valueno, string>::const_iterator j;
284 for (j = value_reprs.begin(); j != value_reprs.end(); ++j) {
286 string value_lower_bound;
287 string value_upper_bound;
292 value_lower_bound = *v;
293 value_upper_bound = *v;
297 if (*v > value_upper_bound) {
298 value_upper_bound = *v;
300 if (*v < value_lower_bound) {
301 value_lower_bound = *v;
304 value_repr +=
"(" +
str(v.get_docid()) +
"," + *v +
")";
316 if (expected_doccount == 0) {
An indexed database of documents.
ValueIterator valuestream_begin(Xapian::valueno slot) const
Return an iterator over the value in slot slot for each document.
Xapian::doccount get_termfreq(std::string_view term) const
Get the number of documents indexed by a specified term.
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
PostingIterator postlist_begin(std::string_view term) const
Start iterating the postings of a term.
TermIterator termlist_begin(Xapian::docid did) const
Start iterating the terms in a document.
double get_avlength() const
Old name for get_average_length() for backward compatibility.
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
Get an upper bound on the wdf of term term.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
Xapian::termcount get_doclength(Xapian::docid did) const
Get the length of a specified document.
bool term_exists(std::string_view term) const
Test is a particular term is present in any document.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
TermIterator allterms_end(std::string_view={}) const noexcept
End iterator corresponding to allterms_begin(prefix).
Xapian::termcount get_collection_freq(std::string_view term) const
Get the total number of occurrences of a specified term.
Xapian::doccount get_doccount() const
Get the number of documents in the database.
PostingIterator postlist_end(std::string_view) const noexcept
End iterator corresponding to postlist_begin().
TermIterator termlist_end(Xapian::docid) const noexcept
End iterator corresponding to termlist_begin().
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
TermIterator allterms_begin(std::string_view prefix={}) const
Start iterating all terms in the database with a given prefix.
ValueIterator valuestream_end(Xapian::valueno) const noexcept
Return end iterator corresponding to valuestream_begin().
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
Xapian::Document get_document(Xapian::docid did, unsigned flags=0) const
Get a document from the database.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in a specified document.
Class representing a document.
Xapian::valueno values_count() const
Count the value slots used in this document.
ValueIterator values_begin() const
Start iterating the values in this document.
TermIterator termlist_end() const noexcept
End iterator corresponding to termlist_begin().
Xapian::termcount termlist_count() const
Return the number of distinct terms in this document.
TermIterator termlist_begin() const
Start iterating the terms in this document.
ValueIterator values_end() const noexcept
End iterator corresponding to values_begin().
Class for iterating over term positions.
Class for iterating over a list of terms.
Class for iterating over a list of terms.
PositionIterator positionlist_end() const noexcept
Return an end PositionIterator for the current term.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
UnimplementedError indicates an attempt to use an unimplemented feature.
Class for iterating over document values.
test database contents and consistency.
string str(int value)
Convert int to std::string.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Convert types to std::string.
#define TEST_REL(A, REL, B)
Test a relation holds,e.g. TEST_REL(a,>,b);.
string docterms_to_string(const Xapian::Database &db, Xapian::docid did)
Convert the list of terms in a document to a string.
void dbcheck(const Xapian::Database &db, Xapian::doccount expected_doccount, Xapian::docid expected_lastdocid)
Check consistency of database and statistics.
string postlist_to_string(const Xapian::Database &db, const string &tname)
Convert the list of postings in a postlist to a string.
string termstats_to_string(const Xapian::Database &db, const string &term)
Convert statistics about a term to a string.
string positions_to_string(Xapian::PositionIterator &it, const Xapian::PositionIterator &end, Xapian::termcount *count)
Convert the list of positions in a positionlist to a string.
string docstats_to_string(const Xapian::Database &db, Xapian::docid did)
Convert statistics about a document to a string.
a generic test suite engine
#define TEST_EQUAL(a, b)
Test for equality of two things.
#define TEST_EQUAL_DOUBLE(a, b)
Test two doubles for near equality.
#define TEST(a)
Test a condition, without an additional explanation for failure.