64 MatchDecider::~MatchDecider() { }
94 return internal->items.size();
100 return internal->items.empty();
107 internal->items.insert(did);
113 internal->items.erase(did);
119 return internal->items.find(did) !=
internal->items.end();
125 return "RSet(" +
internal->get_description() +
")";
131 string description(
"RSet::Internal(");
133 set<Xapian::docid>::const_iterator i;
134 for (i = items.begin(); i != items.end(); ++i) {
135 if (i != items.begin()) description +=
", ";
136 description +=
str(*i);
149 MSetItem::get_description()
const 153 description =
str(did) +
", " +
str(wt) +
", " +
156 description =
"Xapian::MSetItem(" + description +
")";
192 LOGCALL_VOID(API,
"Xapian::MSet::fetch_", first | last);
193 Assert(
internal.
get() != 0);
194 internal->fetch_items(first, last);
200 LOGCALL(API,
int,
"Xapian::MSet::convert_to_percent", wt);
201 Assert(
internal.
get() != 0);
209 Assert(
internal.
get() != 0);
212 if (
internal->stats->get_stats(tname, termfreq))
224 LOGCALL(API,
double,
"Xapian::MSet::get_termweight", tname);
225 Assert(
internal.
get() != 0);
230 if (!
internal->stats->get_termweight(tname, termweight)) {
232 msg +=
": termweight not available";
241 Assert(
internal.
get() != 0);
242 return internal->firstitem;
248 Assert(
internal.
get() != 0);
249 return internal->matches_lower_bound;
255 Assert(
internal.
get() != 0);
264 Assert(
internal.
get() != 0);
265 return internal->matches_upper_bound;
271 Assert(
internal.
get() != 0);
272 return internal->uncollapsed_lower_bound;
278 Assert(
internal.
get() != 0);
279 return internal->uncollapsed_estimated;
285 Assert(
internal.
get() != 0);
286 return internal->uncollapsed_upper_bound;
292 Assert(
internal.
get() != 0);
293 return internal->max_possible;
299 Assert(
internal.
get() != 0);
300 return internal->max_attained;
308 const string & hi_start,
309 const string & hi_end,
310 const string & omit)
const 312 Assert(
internal.
get() != 0);
313 return internal->snippet(text, length, stemmer, flags,
314 hi_start, hi_end, omit);
320 Assert(
internal.
get() != 0);
321 return internal->items.size();
327 Assert(
internal.
get() != 0);
328 return "Xapian::MSet(" +
internal->get_description() +
")";
334 LOGCALL(MATCH,
int,
"Xapian::MSet::Internal::convert_to_percent_internal", wt);
335 if (percent_factor == 0)
RETURN(100);
338 double v = wt * percent_factor + 100.0 * DBL_EPSILON;
339 int pcent =
static_cast<int>(v);
340 LOGLINE(MATCH,
"wt = " << wt <<
", max_possible = " << max_possible <<
341 " => pcent = " << pcent);
342 if (pcent > 100) pcent = 100;
343 if (pcent < 0) pcent = 0;
344 if (pcent == 0 && wt > 0) pcent = 1;
352 LOGCALL(MATCH,
Document,
"Xapian::MSet::Internal::get_doc_by_index", index);
354 map<Xapian::doccount, Document>::const_iterator doc;
355 doc = indexeddocs.find(index);
356 if (doc != indexeddocs.end()) {
359 if (index < firstitem || index >= firstitem + items.size()) {
360 throw RangeError(
"The mset returned from the match does not contain the document at index " +
str(index));
363 if (!requested_docs.empty()) {
367 doc = indexeddocs.find(index);
368 if (doc != indexeddocs.end()) {
373 RETURN(enquire->get_document(items[index - firstitem]));
379 LOGCALL_VOID(MATCH,
"Xapian::MSet::Internal::fetch_items", first | last);
380 if (enquire.get() == 0) {
381 throw InvalidOperationError(
"Can't fetch documents from an MSet which is not derived from a query.");
383 if (items.empty())
return;
384 if (last > items.size() - 1)
385 last = items.size() - 1;
387 map<Xapian::doccount, Document>::const_iterator doc;
388 doc = indexeddocs.find(i);
389 if (doc == indexeddocs.end()) {
391 set<Xapian::doccount>::const_iterator s;
392 s = requested_docs.find(i);
393 if (s == requested_docs.end()) {
395 enquire->request_doc(items[i - firstitem]);
396 requested_docs.insert(i);
405 string description =
"Xapian::MSet::Internal(";
407 description +=
"firstitem=" +
str(firstitem) +
", " +
408 "matches_lower_bound=" +
str(matches_lower_bound) +
", " +
409 "matches_estimated=" +
str(matches_estimated) +
", " +
410 "matches_upper_bound=" +
str(matches_upper_bound) +
", " +
411 "max_possible=" +
str(max_possible) +
", " +
412 "max_attained=" +
str(max_attained);
414 for (vector<Xapian::Internal::MSetItem>::const_iterator i = items.begin();
415 i != items.end(); ++i) {
416 if (!description.empty()) description +=
", ";
417 description += i->get_description();
428 set<Xapian::doccount>::const_iterator i;
429 for (i = requested_docs.begin(); i != requested_docs.end(); ++i) {
430 indexeddocs[*i] = enquire->read_doc(items[*i - firstitem]);
431 LOGLINE(MATCH,
"stored doc at index " << *i <<
" is " << indexeddocs[*i]);
434 requested_docs.clear();
442 Assert(mset.internal.get());
446 return mset.internal->items[index].did;
452 Assert(mset.internal.get());
456 return mset.internal->get_doc_by_index(index);
462 Assert(mset.internal.get());
466 return mset.internal->items[index].wt;
472 Assert(mset.internal.get());
476 return mset.internal->items[index].collapse_key;
482 Assert(mset.internal.get());
486 return mset.internal->items[index].collapse_count;
492 Assert(mset.internal.get());
496 return mset.internal->items[index].sort_key;
502 return "Xapian::MSetIterator(" +
str(mset.size() - off_from_end) +
")";
509 order(
Enquire::ASCENDING), percent_cutoff(0), weight_cutoff(0),
511 sorter(), time_limit(0.0),
weight(0),
512 eweightname(
"trad"), expand_k(1.0)
515 throw InvalidArgumentError(
"Can't make an Enquire object from an uninitialised Database object.");
543 LOGCALL(MATCH,
MSet,
"Enquire::Internal::get_mset", first | maxitems | check_at_least | rset | mdecider);
556 first = min(first, docs);
557 maxitems = min(maxitems, docs - first);
558 check_at_least = min(check_at_least, docs);
559 check_at_least = max(check_at_least, first + maxitems);
572 match.get_mset(first, maxitems, check_at_least, retval,
573 *(stats.get()), mdecider,
sorter.get());
574 if (first_orig != first && retval.internal.get()) {
575 retval.
internal->firstitem = first_orig;
578 Assert(
weight->name() !=
"bool" || retval.get_max_possible() == 0);
584 retval.internal->enquire =
this;
586 if (!retval.internal->stats) {
587 retval.internal->stats = stats.release();
595 const RSet & rset,
int flags,
599 LOGCALL(MATCH,
ESet,
"Enquire::Internal::get_eset", maxitems | rset | flags | edecider_ | min_wt);
602 opt_intrusive_ptr<const ExpandDecider> edecider(edecider_);
603 if (maxitems == 0 || rset.
empty()) {
612 opt_intrusive_ptr<const ExpandDecider> decider_noquery(
615 if (edecider.get()) {
617 edecider.get()))->release();
619 edecider = decider_noquery;
629 eset.
internal->expand(maxitems,
db, rset, edecider.get(), bo1eweight,
633 eset.
internal->expand(maxitems,
db, rset, edecider.get(), tradeweight,
642 typedef map<string, unsigned int>
tmap_t;
648 const string &right)
const {
649 tmap_t::const_iterator l, r;
651 r = tmap.find(right);
652 Assert((l != tmap.end()) && (r != tmap.end()));
654 return l->second < r->second;
670 map<string, unsigned int> tmap;
671 unsigned int index = 1;
673 if (tmap.find(*qt) == tmap.end())
677 vector<string> matching_terms;
681 while (docterms != docterms_end) {
682 string term = *docterms;
683 map<string, unsigned int>::iterator t = tmap.find(term);
684 if (t != tmap.end()) matching_terms.push_back(term);
689 sort(matching_terms.begin(), matching_terms.end(),
ByQueryIndexCmp(tmap));
692 matching_terms.end()));
728 db.
internal[dbnumber]->request_document(realdid);
740 doc =
db.
internal[dbnumber]->collect_document(realdid);
795 LOGCALL_VOID(API,
"Xapian::Enquire::set_query", query | len);
796 internal->set_query(query, len);
808 LOGCALL_VOID(API,
"Xapian::Enquire::add_matchspy", spy);
809 internal->spies.push_back(spy);
814 LOGCALL_VOID(API,
"Xapian::Enquire::clear_matchspies", NO_ARGS);
815 internal->spies.clear();
821 LOGCALL_VOID(API,
"Xapian::Enquire::set_weighting_scheme", weight_);
831 LOGCALL_VOID(API,
"Xapian::Enquire::set_expansion_scheme", eweightname_ | expand_k_);
833 if (eweightname_ ==
"prob") {
834 internal->eweightname =
"trad";
835 internal->expand_k = expand_k_;
839 if (eweightname_ !=
"bo1" && eweightname_ !=
"trad") {
843 internal->eweightname = eweightname_;
844 internal->expand_k = expand_k_;
851 internal->collapse_key = collapse_key;
852 internal->collapse_max = collapse_max;
858 internal->order = order;
864 internal->percent_cutoff = percent_cutoff;
865 internal->weight_cutoff = weight_cutoff;
877 internal->sorter = NULL;
878 internal->sort_key = sort_key;
880 internal->sort_value_forward = ascending;
886 internal->sorter = NULL;
887 internal->sort_key = sort_key;
889 internal->sort_value_forward = ascending;
895 internal->sorter = NULL;
896 internal->sort_key = sort_key;
898 internal->sort_value_forward = ascending;
906 internal->sorter = sorter;
908 internal->sort_value_forward = ascending;
916 internal->sorter = sorter;
918 internal->sort_value_forward = ascending;
926 internal->sorter = sorter;
928 internal->sort_value_forward = ascending;
934 internal->time_limit = time_limit;
942 LOGCALL(API,
Xapian::MSet,
"Xapian::Enquire::get_mset", first | maxitems | check_at_least | rset | mdecider);
943 RETURN(
internal->get_mset(first, maxitems, check_at_least, rset, mdecider));
950 LOGCALL(API,
Xapian::ESet,
"Xapian::Enquire::get_eset", maxitems | rset | flags | edecider | min_wt);
951 RETURN(
internal->get_eset(maxitems, rset, flags, edecider, min_wt));
971 return "Xapian::Enquire(" +
internal->get_description() +
")";
Xapian::termcount get_length() const
Return the length of this query object.
The Xapian namespace contains public interfaces for the Xapian library.
Xapian::doccount size() const
Return number of items in this MSet object.
ExpandDecider subclass which rejects terms in a specified list.
void operator=(const Enquire &other)
Assignment is allowed (and is cheap).
Xapian::doccount size() const
The number of documents in this R-Set.
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
std::string eweightname
The weighting scheme to use for query expansion.
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
MSet & operator=(const MSet &o)
Copying is allowed.
const Xapian::Database db
The database which this enquire object uses.
void read_docs() const
Read and cache the documents so far requested.
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
double get_max_possible() const
The maximum possible weight any document could achieve.
void set_sort_by_relevance()
Set the sorting to be by relevance only.
void set_docid_order(docid_order order)
Set sort order for document IDs.
std::string get_description() const
Return a string describing this object.
int convert_to_percent(double weight) const
Convert a weight to a percentage.
This class is used to access a database, or a group of databases.
Xapian::Document get_doc_by_index(Xapian::doccount index) const
get a document by index in MSet, via the cache.
class for performing a match
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
std::string get_description() const
Return a string describing this object.
void fetch_(Xapian::doccount first, Xapian::doccount last) const
string get_description() const
const Query & get_query() const
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
#define AssertRel(A, REL, B)
InvalidOperationError indicates the API was used in an invalid way.
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Class representing a stemming algorithm.
Abstract base class for match spies.
double weight
The weight of a document or term.
virtual std::string get_description() const
Return a string describing this object.
Xapian::doccount round_estimate(T lb, T ub, T est)
Round a bounded estimate to an appropriate number of S.F.
Class which actually implements Xapian::ESet.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Xapian::docid did
Document id.
Xapian::Internal::intrusive_ptr< Internal > internal
double get_max_attained() const
The maximum weight attained by any document.
std::string snippet(const std::string &text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, const std::string &hi_start="<b>", const std::string &hi_end="</b>", const std::string &omit="...") const
Generate a snippet.
Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const
Read a previously requested document from the database.
void operator=(const RSet &rset)
Assignment operator.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Xapian::doccount get_termfreq(const string &tname) const
bool operator()(const string &left, const string &right) const
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
A document in the database, possibly plus modifications.
Class representing a list of search results.
bool contains(Xapian::docid did) const
Test if a given document in the relevance set.
ExpandDecider subclass which rejects terms using two ExpandDeciders.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
std::string get_sort_key() const
Return the sort key for the current position.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Convert types to std::string.
Virtual base class for expand decider functor.
ByQueryIndexCmp(const tmap_t &tmap_)
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, const RSet *omrset, const MatchDecider *mdecider) const
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Xapian::doccount get_doccount() const
Get the number of documents in the database.
static Xapian::Stem stemmer
virtual Weight * clone() const =0
Clone this object.
A vector-like container of terms which can be iterated.
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
TermIterator get_matching_terms(Xapian::docid did) const
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Enquire(const Enquire &other)
Copying is allowed (and is cheap).
Xapian::Internal::intrusive_ptr< Internal > internal
Xapian::Enquire::docid_order order
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Class for calculating ESet term weights.
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Decide if a Xapian::Error exception should be ignored.
int convert_to_percent_internal(double wt) const
Converts a weight to a percentage weight.
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
RangeError indicates an attempt to access outside the bounds of a container.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Xapian::doccount get_collapse_count() const
Return a count of the number of collapses done onto the current key.
Xapian::doccount get_firstitem() const
Rank of first item in this MSet.
string get_description() const
Return a string describing this object.
Collate statistics and calculate the term weights for the ESet.
Xapian::doccount collapse_max
Iterator over a Xapian::MSet.
Xapian::docid operator*() const
Get the numeric document id for the current position.
Weight * weight
The weight to use for this query.
Xapian::Weight::Internal class, holding database and term statistics.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Query query
The user's query.
Class to hold statistics for a given collection.
void clear_matchspies()
Remove all the matchspies.
void set_time_limit(double time_limit)
Set a time limit for the match.
An item resulting from a query.
Xapian::valueno collapse_key
string str(int value)
Convert int to std::string.
MSet()
Default constructor.
termcount qlen
The query length.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Xapian::Internal::opt_intrusive_ptr< KeyMaker > sorter
Define exp10() if not provided by <cmath>
Allow rejection of terms during ESet generation.
std::string get_description() const
Return a string describing this object.
RSet()
Default constructor.
void set_sort_by_relevance_then_key(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by relevance, then by keys generated from values.
Xapian::Internal::intrusive_ptr< Internal > internal
std::string get_description() const
Return a string describing this object.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
map< string, unsigned int > tmap_t
Xapian::Internal::intrusive_ptr< Internal > internal
Base class for matcher decision functor.
This class stores a list of terms.
void set_sort_by_key_then_relevance(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by keys generated from values, then by relevance for documents with identical k...
double get_weight() const
Get the weight for the current position.
void remove_document(Xapian::docid did)
Remove a document from the relevance set.
void add_document(Xapian::docid did)
Add a document to the relevance set.
Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
void request_doc(const Xapian::Internal::MSetItem &item) const
Request a document from the database.
Xapian::ESet::Internal class.
~Enquire()
Close the Xapian::Enquire object.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Round a bounded estimate to an appropriate number of S.F.
const Xapian::Query & get_query() const
Get the current query.
void add_matchspy(MatchSpy *spy)
Add a matchspy.
std::string get_description() const
Return a string describing this object.
This class provides an interface to the information retrieval system for the purpose of searching...
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
std::string get_collapse_key() const
Return the collapse key for the current position.
MSetItem comparison functions.
string get_description() const
Return a string describing this object.
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
unsigned valueno
The number for a value slot in a document.
void fetch_items(Xapian::doccount first, Xapian::doccount last) const
Fetch items specified into the document cache.
const Xapian::Enquire::Internal::sort_setting REL
This class implements the TradWeight scheme for query expansion.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Class representing a list of search results.
void set_query(const Query &query_, termcount qlen_)
Various assertion macros.
double expand_k
The parameter required for TradWeight query expansion.
Xapian::Document get_document() const
Get the Document object for the current position.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Class representing a query.
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
vector< Xapian::Internal::opt_intrusive_ptr< MatchSpy > > spies
API for working with documents.
A smart pointer that optionally uses intrusive reference counting.
bool empty() const
Test if this R-Set is empty.
const TermIterator get_terms_end() const
End iterator for terms in the query object.
Class for iterating over a list of terms.
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Internals of enquire system.
docid_order
Ordering of docids.
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
A handle representing a document in a Xapian database.
Wrapper around standard unique_ptr template.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags, const ExpandDecider *edecider, double min_wt) const
Xapian::Weight subclass implementing the BM25 probabilistic formula.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
This class implements the Bo1 scheme for query expansion.
void set_sort_by_key(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by key generated from values only.
Virtual base class for key making functors.
UnimplementedError indicates an attempt to use an unimplemented feature.
Internal(const Internal &)
Copy not allowed.
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Abstract base class for weighting schemes.