34 #include <string_view>
49 MatchSpy::~MatchSpy() {}
52 MatchSpy::clone()
const {
53 throw UnimplementedError(
"MatchSpy not suitable for use with remote searches - clone() method unimplemented");
58 throw UnimplementedError(
"MatchSpy not suitable for use with remote searches - name() method unimplemented");
62 MatchSpy::serialise()
const {
63 throw UnimplementedError(
"MatchSpy not suitable for use with remote searches - serialise() method unimplemented");
67 MatchSpy::unserialise(
const string &,
const Registry &)
const {
68 throw UnimplementedError(
"MatchSpy not suitable for use with remote searches - unserialise() method unimplemented");
72 MatchSpy::serialise_results()
const {
73 throw UnimplementedError(
"MatchSpy not suitable for use with remote searches - serialise_results() method unimplemented");
77 MatchSpy::merge_results(
const string &) {
78 throw UnimplementedError(
"MatchSpy not suitable for use with remote searches - merge_results() method unimplemented");
82 MatchSpy::get_description()
const {
83 return "Xapian::MatchSpy()";
94 map<string, Xapian::doccount>::const_iterator
it;
102 it = spy->values.begin();
108 Assert(it != spy->values.end());
116 Assert(it != spy->values.end());
119 if (it == spy->values.end()) {
122 current_term = it->first;
127 while (it != spy->values.end() && it->first <
term) {
131 if (it == spy->values.end()) {
134 current_term = it->first;
152 :
str(str_), frequency(frequency_) {}
184 vector<StringAndFrequency>::const_iterator
it;
199 Assert(it != values.end());
200 return it->get_frequency();
207 Assert(it != values.end());
210 if (it == values.end()) {
213 current_term = it->get_string();
218 while (it != values.end() && it->get_string() <
term) {
222 if (it != values.end()) {
223 current_term = it->get_string();
251 const map<string, doccount> & items,
256 result.reserve(maxitems);
258 bool is_heap =
false;
260 for (map<string, doccount>::const_iterator i = items.begin();
261 i != items.end(); ++i) {
262 if (result.size() < maxitems) {
263 result.emplace_back(i->first, i->second);
269 Assert(result.size() == maxitems);
271 Heap::make(result.begin(), result.end(), cmpfn);
276 if (!cmpfn(new_item, result[0])) {
281 result[0] = std::move(new_item);
286 Heap::sort(result.begin(), result.end(), cmpfn);
288 sort(result.begin(), result.end(), cmpfn);
293 ValueCountMatchSpy::operator()(
const Document &doc,
double) {
296 string val(doc.
get_value(internal->slot));
297 if (!val.empty()) ++(
internal->values[val]);
301 ValueCountMatchSpy::values_begin()
const
308 ValueCountMatchSpy::top_values_begin(
size_t maxvalues)
const
311 unique_ptr<StringAndFreqTermList> termlist;
312 if (
usual(maxvalues > 0)) {
321 ValueCountMatchSpy::clone()
const {
328 return "Xapian::ValueCountMatchSpy";
332 ValueCountMatchSpy::serialise()
const {
340 ValueCountMatchSpy::unserialise(
const string & s,
const Registry &)
const
342 const char *
p = s.data();
343 const char * end =
p + s.size();
354 ValueCountMatchSpy::serialise_results()
const {
355 LOGCALL(REMOTE,
string,
"ValueCountMatchSpy::serialise_results", NO_ARGS);
359 for (
auto&& item : internal->values) {
367 ValueCountMatchSpy::merge_results(
const string & s) {
368 LOGCALL_VOID(REMOTE,
"ValueCountMatchSpy::merge_results", s);
370 const char *
p = s.data();
371 const char * end =
p + s.size();
377 internal->total += n;
386 internal->values[val] += freq;
391 ValueCountMatchSpy::get_description()
const {
392 string d =
"ValueCountMatchSpy(";
394 d +=
str(internal->total);
395 d +=
" docs seen, looking in ";
396 d +=
str(internal->values.size());
Compare two StringAndFrequency objects.
StringAndFreqCmpByFreq()
Default constructor.
bool operator()(const StringAndFrequency &a, const StringAndFrequency &b) const
Return true if a has a higher frequency than b.
A termlist iterator over a vector of StringAndFrequency objects.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
void init()
init should be called after the values have been set, but before iteration begins.
PositionList * positionlist_begin() const
Return PositionList for the current position.
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
vector< StringAndFrequency >::const_iterator it
vector< StringAndFrequency > values
TermList * next()
Advance the current position to the next term in the termlist.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
TermList * skip_to(string_view term)
Skip forward to the specified term.
A string with a corresponding frequency.
std::string get_string() const
Return the string.
StringAndFrequency(const std::string &str_, Xapian::doccount frequency_)
Construct a StringAndFrequency object.
Xapian::doccount frequency
Xapian::doccount get_frequency() const
Return the frequency.
A termlist iterator over the contents of a ValueCountMatchSpy.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
ValueCountTermList(ValueCountMatchSpy::Internal *spy_)
TermList * next()
Advance the current position to the next term in the termlist.
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
TermList * skip_to(string_view term)
Skip forward to the specified term.
PositionList * positionlist_begin() const
Return PositionList for the current position.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
intrusive_ptr< Xapian::ValueCountMatchSpy::Internal > spy
map< string, Xapian::doccount >::const_iterator it
Class representing a document.
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
A smart pointer that uses intrusive reference counting.
InvalidOperationError indicates the API was used in an invalid way.
Abstract base class for match spies.
Abstract base class for iterating term positions in a document.
Registry for user subclasses.
Abstract base class for termlists.
Class for iterating over a list of terms.
UnimplementedError indicates an attempt to use an unimplemented feature.
Class for counting the frequencies of values in the matching documents.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Class representing a document.
Hierarchy of classes which Xapian can throw as exceptions.
C++ STL heap implementation with extensions.
static void get_most_frequent_items(vector< StringAndFrequency > &result, const map< string, doccount > &items, size_t maxitems)
Get the most frequent items from a map from string to frequency.
static void unsupported_method()
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
string str(int value)
Convert int to std::string.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Various assertion macros.
void unpack_throw_serialisation_error(const char *p)
Throw appropriate SerialisationError.
Pack types into strings and unpack them again.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
parsing a user query string to build a Xapian::Query object
Class for looking up user subclasses during unserialisation.
Convert types to std::string.
Various handy string-related helpers.
Abstract base class for termlists.