64 MatchDecider::~MatchDecider() { }
94 return internal->items.size();
100 return internal->items.empty();
107 internal->items.insert(did);
113 internal->items.erase(did);
119 return internal->items.find(did) !=
internal->items.end();
125 return "RSet(" +
internal->get_description() +
")";
131 string description(
"RSet::Internal(");
133 set<Xapian::docid>::const_iterator i;
134 for (i =
items.begin(); i !=
items.end(); ++i) {
135 if (i !=
items.begin()) description +=
", ";
136 description +=
str(*i);
149 MSetItem::get_description()
const
153 description =
str(did) +
", " +
str(wt) +
", " +
156 description =
"Xapian::MSetItem(" + description +
")";
192 LOGCALL_VOID(API,
"Xapian::MSet::fetch_", first | last);
193 Assert(
internal.get() != 0);
194 internal->fetch_items(first, last);
200 LOGCALL(API,
int,
"Xapian::MSet::convert_to_percent", wt);
201 Assert(
internal.get() != 0);
209 Assert(
internal.get() != 0);
212 if (
internal->stats->get_stats(tname, termfreq))
224 LOGCALL(API,
double,
"Xapian::MSet::get_termweight", tname);
225 Assert(
internal.get() != 0);
230 if (!
internal->stats->get_termweight(tname, termweight)) {
232 msg +=
": termweight not available";
241 Assert(
internal.get() != 0);
242 return internal->firstitem;
248 Assert(
internal.get() != 0);
249 return internal->matches_lower_bound;
255 Assert(
internal.get() != 0);
264 Assert(
internal.get() != 0);
265 return internal->matches_upper_bound;
271 Assert(
internal.get() != 0);
272 return internal->uncollapsed_lower_bound;
278 Assert(
internal.get() != 0);
279 return internal->uncollapsed_estimated;
285 Assert(
internal.get() != 0);
286 return internal->uncollapsed_upper_bound;
292 Assert(
internal.get() != 0);
293 return internal->max_possible;
299 Assert(
internal.get() != 0);
300 return internal->max_attained;
308 const string & hi_start,
309 const string & hi_end,
310 const string & omit)
const
312 Assert(
internal.get() != 0);
313 return internal->snippet(text, length,
stemmer, flags,
314 hi_start, hi_end, omit);
320 Assert(
internal.get() != 0);
321 return internal->items.size();
327 Assert(
internal.get() != 0);
328 return "Xapian::MSet(" +
internal->get_description() +
")";
334 LOGCALL(MATCH,
int,
"Xapian::MSet::Internal::convert_to_percent_internal", wt);
339 int pcent =
static_cast<int>(v);
341 " => pcent = " << pcent);
342 if (pcent > 100) pcent = 100;
343 if (pcent < 0) pcent = 0;
344 if (pcent == 0 && wt > 0) pcent = 1;
352 LOGCALL(MATCH,
Document,
"Xapian::MSet::Internal::get_doc_by_index", index);
354 map<Xapian::doccount, Document>::const_iterator doc;
355 doc = indexeddocs.find(index);
356 if (doc != indexeddocs.end()) {
359 if (index < firstitem || index >= firstitem + items.size()) {
360 throw RangeError(
"The mset returned from the match does not contain the document at index " +
str(index));
363 if (!requested_docs.empty()) {
367 doc = indexeddocs.find(index);
368 if (doc != indexeddocs.end()) {
373 RETURN(enquire->get_document(items[index - firstitem]));
379 LOGCALL_VOID(MATCH,
"Xapian::MSet::Internal::fetch_items", first | last);
380 if (enquire.get() == 0) {
381 throw InvalidOperationError(
"Can't fetch documents from an MSet which is not derived from a query.");
383 if (items.empty())
return;
384 if (last > items.size() - 1)
385 last = items.size() - 1;
387 map<Xapian::doccount, Document>::const_iterator doc;
388 doc = indexeddocs.find(i);
389 if (doc == indexeddocs.end()) {
391 set<Xapian::doccount>::const_iterator s;
392 s = requested_docs.find(i);
393 if (s == requested_docs.end()) {
395 enquire->request_doc(items[i - firstitem]);
396 requested_docs.insert(i);
405 string description =
"Xapian::MSet::Internal(";
407 description +=
"firstitem=" +
str(firstitem) +
", " +
408 "matches_lower_bound=" +
str(matches_lower_bound) +
", " +
409 "matches_estimated=" +
str(matches_estimated) +
", " +
410 "matches_upper_bound=" +
str(matches_upper_bound) +
", " +
411 "max_possible=" +
str(max_possible) +
", " +
412 "max_attained=" +
str(max_attained);
414 for (vector<Xapian::Internal::MSetItem>::const_iterator i = items.begin();
415 i != items.end(); ++i) {
416 if (!description.empty()) description +=
", ";
417 description += i->get_description();
428 set<Xapian::doccount>::const_iterator i;
429 for (i = requested_docs.begin(); i != requested_docs.end(); ++i) {
430 indexeddocs[*i] = enquire->read_doc(items[*i - firstitem]);
431 LOGLINE(MATCH,
"stored doc at index " << *i <<
" is " << indexeddocs[*i]);
434 requested_docs.clear();
442 Assert(mset.internal.get());
446 return mset.internal->items[index].did;
452 Assert(mset.internal.get());
456 return mset.internal->get_doc_by_index(index);
462 Assert(mset.internal.get());
466 return mset.internal->items[index].wt;
472 Assert(mset.internal.get());
476 return mset.internal->items[index].collapse_key;
482 Assert(mset.internal.get());
486 return mset.internal->items[index].collapse_count;
492 Assert(mset.internal.get());
496 return mset.internal->items[index].sort_key;
502 return "Xapian::MSetIterator(" +
str(mset.size() - off_from_end) +
")";
509 order(
Enquire::ASCENDING), percent_cutoff(0), weight_cutoff(0),
511 sorter(), time_limit(0.0),
weight(0),
512 eweightname(
"trad"), expand_k(1.0)
515 throw InvalidArgumentError(
"Can't make an Enquire object from an uninitialised Database object.");
543 LOGCALL(MATCH,
MSet,
"Enquire::Internal::get_mset", first | maxitems | check_at_least | rset | mdecider);
545 if (percent_cutoff && (sort_by ==
VAL || sort_by == VAL_REL)) {
556 first = min(first, docs);
557 maxitems = min(maxitems, docs - first);
558 check_at_least = min(check_at_least, docs);
559 check_at_least = max(check_at_least, first + maxitems);
564 collapse_max, collapse_key,
565 percent_cutoff, weight_cutoff,
566 order, sort_key, sort_by, sort_value_forward,
567 time_limit, *(stats.get()),
weight, spies,
568 (sorter.get() != NULL),
572 match.
get_mset(first, maxitems, check_at_least, retval,
573 *(stats.get()), mdecider, sorter.get());
574 if (first_orig != first && retval.
internal.get()) {
575 retval.
internal->firstitem = first_orig;
587 retval.
internal->stats = stats.release();
595 const RSet & rset,
int flags,
599 LOGCALL(MATCH,
ESet,
"Enquire::Internal::get_eset", maxitems | rset | flags | edecider_ | min_wt);
602 opt_intrusive_ptr<const ExpandDecider> edecider(edecider_);
603 if (maxitems == 0 || rset.
empty()) {
612 opt_intrusive_ptr<const ExpandDecider> decider_noquery(
615 if (edecider.get()) {
617 edecider.get()))->release();
619 edecider = decider_noquery;
627 if (eweightname ==
"bo1") {
629 eset.
internal->expand(maxitems, db, rset, edecider.get(), bo1eweight,
632 TradEWeight tradeweight(db, rset.
size(), use_exact_termfreq, expand_k);
633 eset.
internal->expand(maxitems, db, rset, edecider.get(), tradeweight,
642 typedef map<string, unsigned int>
tmap_t;
648 const string &right)
const {
649 tmap_t::const_iterator l, r;
651 r = tmap.find(right);
652 Assert((l != tmap.end()) && (r != tmap.end()));
654 return l->second < r->second;
670 map<string, unsigned int> tmap;
671 unsigned int index = 1;
673 if (tmap.find(*qt) == tmap.end())
677 vector<string> matching_terms;
681 while (docterms != docterms_end) {
682 string term = *docterms;
683 map<string, unsigned int>::iterator t = tmap.find(term);
684 if (t != tmap.end()) matching_terms.push_back(term);
689 sort(matching_terms.begin(), matching_terms.end(),
ByQueryIndexCmp(tmap));
692 matching_terms.end()));
700 return get_matching_terms(*it);
706 return db.get_termfreq(tname);
712 string description = db.get_description();
723 unsigned int multiplier = db.internal.size();
728 db.internal[dbnumber]->request_document(realdid);
734 unsigned int multiplier = db.
internal.size();
740 doc = db.internal[dbnumber]->collect_document(realdid);
747 unsigned int multiplier = db.
internal.size();
753 return Document(db.internal[dbnumber]->open_document(realdid,
true));
796 internal->set_query(
query, len);
808 LOGCALL_VOID(API,
"Xapian::Enquire::add_matchspy", spy);
809 internal->spies.push_back(spy);
814 LOGCALL_VOID(API,
"Xapian::Enquire::clear_matchspies", NO_ARGS);
815 internal->spies.clear();
821 LOGCALL_VOID(API,
"Xapian::Enquire::set_weighting_scheme", weight_);
831 LOGCALL_VOID(API,
"Xapian::Enquire::set_expansion_scheme", eweightname_ | expand_k_);
833 if (eweightname_ ==
"prob") {
834 internal->eweightname =
"trad";
835 internal->expand_k = expand_k_;
839 if (eweightname_ !=
"bo1" && eweightname_ !=
"trad") {
843 internal->eweightname = eweightname_;
844 internal->expand_k = expand_k_;
851 internal->collapse_key = collapse_key;
852 internal->collapse_max = collapse_max;
858 internal->order = order;
864 internal->percent_cutoff = percent_cutoff;
865 internal->weight_cutoff = weight_cutoff;
877 internal->sorter = NULL;
878 internal->sort_key = sort_key;
880 internal->sort_value_forward = ascending;
886 internal->sorter = NULL;
887 internal->sort_key = sort_key;
889 internal->sort_value_forward = ascending;
895 internal->sorter = NULL;
896 internal->sort_key = sort_key;
898 internal->sort_value_forward = ascending;
906 internal->sorter = sorter;
908 internal->sort_value_forward = ascending;
916 internal->sorter = sorter;
918 internal->sort_value_forward = ascending;
926 internal->sorter = sorter;
928 internal->sort_value_forward = ascending;
934 internal->time_limit = time_limit;
942 LOGCALL(API,
Xapian::MSet,
"Xapian::Enquire::get_mset", first | maxitems | check_at_least | rset | mdecider);
943 RETURN(
internal->get_mset(first, maxitems, check_at_least, rset, mdecider));
950 LOGCALL(API,
Xapian::ESet,
"Xapian::Enquire::get_eset", maxitems | rset | flags | edecider | min_wt);
951 RETURN(
internal->get_eset(maxitems, rset, flags, edecider, min_wt));
971 return "Xapian::Enquire(" +
internal->get_description() +
")";
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Wrapper around standard unique_ptr template.
database class declarations
void get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, Xapian::MSet &mset, Xapian::Weight::Internal &stats, const Xapian::MatchDecider *mdecider, const Xapian::KeyMaker *sorter)
Run the match and generate an MSet object.
This class stores a list of terms.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
ByQueryIndexCmp(const tmap_t &tmap_)
map< string, unsigned int > tmap_t
bool operator()(const string &left, const string &right) const
This class is used to access a database, or a group of databases.
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
A document in the database, possibly plus modifications.
A handle representing a document in a Xapian database.
Xapian::Internal::intrusive_ptr< Internal > internal
Class which actually implements Xapian::ESet.
Class representing a list of search results.
Xapian::Internal::intrusive_ptr< Internal > internal
Internals of enquire system.
void set_query(const Query &query_, termcount qlen_)
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, const RSet *omrset, const MatchDecider *mdecider) const
const Xapian::Database db
The database which this enquire object uses.
Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const
Read a previously requested document from the database.
const Query & get_query() const
TermIterator get_matching_terms(Xapian::docid did) const
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags, const ExpandDecider *edecider, double min_wt) const
string get_description() const
Internal(const Internal &)
Copy not allowed.
void request_doc(const Xapian::Internal::MSetItem &item) const
Request a document from the database.
Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const
Xapian::doccount get_termfreq(const string &tname) const
This class provides an interface to the information retrieval system for the purpose of searching.
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
void set_sort_by_key(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by key generated from values only.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
void clear_matchspies()
Remove all the matchspies.
void set_time_limit(double time_limit)
Set a time limit for the match.
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
void add_matchspy(MatchSpy *spy)
Add a matchspy.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
void set_sort_by_key_then_relevance(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by keys generated from values, then by relevance for documents with identical k...
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
~Enquire()
Close the Xapian::Enquire object.
void set_sort_by_relevance()
Set the sorting to be by relevance only.
void operator=(const Enquire &other)
Assignment is allowed (and is cheap).
Xapian::Internal::intrusive_ptr< Internal > internal
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
std::string get_description() const
Return a string describing this object.
const Xapian::Query & get_query() const
Get the current query.
void set_docid_order(docid_order order)
Set sort order for document IDs.
Enquire(const Enquire &other)
Copying is allowed (and is cheap).
void set_sort_by_relevance_then_key(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by relevance, then by keys generated from values.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
docid_order
Ordering of docids.
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
Decide if a Xapian::Error exception should be ignored.
ExpandDecider subclass which rejects terms using two ExpandDeciders.
ExpandDecider subclass which rejects terms in a specified list.
Virtual base class for expand decider functor.
This class implements the Bo1 scheme for query expansion.
Class for calculating ESet term weights.
An item resulting from a query.
Xapian::docid did
Document id.
This class implements the TradWeight scheme for query expansion.
A smart pointer that optionally uses intrusive reference counting.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
InvalidOperationError indicates the API was used in an invalid way.
Virtual base class for key making functors.
Iterator over a Xapian::MSet.
std::string get_sort_key() const
Return the sort key for the current position.
Xapian::Document get_document() const
Get the Document object for the current position.
std::string get_collapse_key() const
Return the collapse key for the current position.
double get_weight() const
Get the weight for the current position.
Xapian::doccount get_collapse_count() const
Return a count of the number of collapses done onto the current key.
Xapian::docid operator*() const
Get the numeric document id for the current position.
std::string get_description() const
Return a string describing this object.
int convert_to_percent_internal(double wt) const
Converts a weight to a percentage weight.
double percent_factor
Factor to multiply weights by to convert them to percentages.
string get_description() const
Return a string describing this object.
void read_docs() const
Read and cache the documents so far requested.
Xapian::Document get_doc_by_index(Xapian::doccount index) const
get a document by index in MSet, via the cache.
void fetch_items(Xapian::doccount first, Xapian::doccount last) const
Fetch items specified into the document cache.
Class representing a list of search results.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Xapian::doccount size() const
Return number of items in this MSet object.
MSet()
Default constructor.
double get_max_possible() const
The maximum possible weight any document could achieve.
void fetch_(Xapian::doccount first, Xapian::doccount last) const
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
std::string snippet(const std::string &text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, const std::string &hi_start="<b>", const std::string &hi_end="</b>", const std::string &omit="...") const
Generate a snippet.
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Xapian::Internal::intrusive_ptr< Internal > internal
MSet & operator=(const MSet &o)
Copying is allowed.
Xapian::doccount get_firstitem() const
Rank of first item in this MSet.
std::string get_description() const
Return a string describing this object.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
double get_max_attained() const
The maximum weight attained by any document.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Base class for matcher decision functor.
Abstract base class for match spies.
Class representing a query.
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
const TermIterator get_terms_end() const
End iterator for terms in the query object.
std::string get_description() const
Return a string describing this object.
Xapian::termcount get_length() const
Return the length of this query object.
string get_description() const
Return a string describing this object.
set< Xapian::docid > items
Items in the relevance set.
RSet()
Default constructor.
Xapian::Internal::intrusive_ptr< Internal > internal
void add_document(Xapian::docid did)
Add a document to the relevance set.
void remove_document(Xapian::docid did)
Remove a document from the relevance set.
Xapian::doccount size() const
The number of documents in this R-Set.
bool contains(Xapian::docid did) const
Test if a given document in the relevance set.
bool empty() const
Test if this R-Set is empty.
std::string get_description() const
Return a string describing this object.
void operator=(const RSet &rset)
Assignment operator.
RangeError indicates an attempt to access outside the bounds of a container.
Class representing a stemming algorithm.
Class for iterating over a list of terms.
UnimplementedError indicates an attempt to use an unimplemented feature.
Class to hold statistics for a given collection.
Abstract base class for weighting schemes.
virtual Weight * clone() const =0
Clone this object.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
#define LOGCALL_DTOR(CATEGORY, CLASS)
Hierarchy of classes which Xapian can throw as exceptions.
Xapian::ESet::Internal class.
Define exp10() if not provided by <cmath>
Allow rejection of terms during ESet generation.
Collate statistics and calculate the term weights for the ESet.
API for working with documents.
MSetItem comparison functions.
const Xapian::Enquire::Internal::sort_setting VAL
const Xapian::Enquire::Internal::sort_setting REL
class for performing a match
string str(int value)
Convert int to std::string.
The Xapian namespace contains public interfaces for the Xapian library.
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
double weight
The weight of a document or term.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Various assertion macros.
#define AssertRel(A, REL, B)
Round a bounded estimate to an appropriate number of S.F.
Xapian::doccount round_estimate(T lb, T ub, T est)
Round a bounded estimate to an appropriate number of S.F.
static Xapian::Stem stemmer
Convert types to std::string.
Class for iterating over a list of terms.
A vector-like container of terms which can be iterated.
Xapian::Weight::Internal class, holding database and term statistics.