60 #include <unordered_set>
81 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
82 defined(__mc68000__) || defined(__mc68010__) || \
83 defined(__mc68020__) || defined(__mc68030__)
106 return a_max_wt > b_max_wt;
144 void shrink(
size_t new_size);
150 pls.reserve(reserve);
157 if (new_size >=
pls.size())
160 for (
auto&& i =
pls.begin() + new_size; i !=
pls.end(); ++i) {
163 pls.resize(new_size);
191 auto begin =
pls.begin() +
pls.size() - out_of;
192 for (
auto i = begin; i !=
pls.end(); ++i) {
193 (*i)->recalc_maxweight();
202 vector<PostList*>::iterator begin =
pls.begin();
203 nth_element(begin, begin + set_size - 1,
pls.end(),
213 if (
pls.size() == 1) {
243 if (
pls.size() == 1) {
259 if (
pls.size() == 1) {
347 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
348 vector<PostList *>::const_iterator terms_end = pls.begin() + end;
351 pl =
new NearPostList(pl, window, terms_begin, terms_end);
352 }
else if (window == end - begin) {
371 size_t end =
pls.size();
372 size_t begin = end - n_subqs;
394 pl.reset(
new AndNotPostList(pl.release(), rhs, matcher, db_size));
404 pl.reset(filter.postlist(pl.release(),
pls));
452 unsigned char ch = *(*p)++;
454 case 4:
case 5:
case 6:
case 7: {
460 size_t n_subqs = ch & 0x07;
465 unsigned char code = (ch >> 3) & 0x0f;
527 size_t len = ch & 0x0f;
532 if (
size_t(end - *p) < len)
534 string term(*p, len);
537 int code = ((ch >> 4) & 0x03);
564 string begin(*p, len);
573 string end_(*p, len);
594 int max_type =
static_cast<unsigned char>(*(*p)++);
595 op combiner =
static_cast<op>(*(*p)++);
598 string pattern(*p, len);
608 string name(*p, len);
613 string m =
"PostingSource ";
615 m +=
" not registered";
629 return new QueryScaleWeight(scale_factor,
647 string msg =
"Unknown Query serialisation: ";
685 QueryTerm::get_description()
const
689 desc =
"<alldocuments>";
727 string desc =
"PostingSource(";
728 desc +=
source->get_description();
734 : scale_factor(factor), subquery(subquery_)
806 vector<pair<Xapian::termpos, string>> &terms =
807 *
static_cast<vector<pair<Xapian::termpos, string>
>*>(void_terms);
808 terms.push_back(make_pair(
pos,
term));
858 result +=
static_cast<char>(0x20 |
slot);
860 result +=
static_cast<char>(0x20 | 15);
878 string desc =
"VALUE_RANGE ";
925 result +=
static_cast<char>(0x20 |
slot);
927 result +=
static_cast<char>(0x20 | 15);
944 string desc =
"VALUE_LE ";
987 result +=
static_cast<char>(0x20 | 0x10 |
slot);
989 result +=
static_cast<char>(0x20 | 0x10 | 15);
1005 string desc =
"VALUE_GE ";
1017 double or_factor = 0.0;
1018 if (factor == 0.0) {
1027 if (!old_in_synonym) {
1036 if (expansions_left == 0)
1043 if (expansions_left-- == 0) {
1046 string msg(
"Wildcard ");
1048 msg +=
"* expands to more than ";
1054 const string & term = t->get_termname();
1068 if (factor != 0.0) {
1110 result +=
static_cast<char>(0x0b);
1112 result +=
static_cast<unsigned char>(
max_type);
1113 result +=
static_cast<unsigned char>(
combiner);
1127 string desc =
"WILDCARD ";
1151 QueryVector::const_iterator i;
1156 result += (*i).internal->get_length();
1161 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1162 #define MISC(X) static_cast<unsigned char>(X)
1166 static const unsigned char first_byte[] = {
1184 AssertRel(
size_t(op_),<,
sizeof(first_byte));
1185 unsigned char ch = first_byte[op_];
1199 QueryVector::const_iterator i;
1202 Assert((*i).internal.get());
1203 (*i).internal->serialise(result);
1241 QueryVector::const_iterator i;
1244 Assert((*i).internal.get());
1245 (*i).internal->gather_terms(void_terms);
1253 LOGCALL_VOID(MATCH,
"QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1262 size_t size_before = ctx.
size();
1263 QueryVector::const_iterator q;
1266 Assert((*q).internal.get());
1267 (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1270 size_t out_of = ctx.
size() - size_before;
1271 if (elite_set_size && elite_set_size < out_of) {
1290 if (factor == 0.0) {
1303 bool wdf_disjoint =
false;
1309 wdf_disjoint =
true;
1310 vector<string> prefixes;
1313 wdf_disjoint =
false;
1316 auto qw =
static_cast<const QueryWildcard*
>(q.internal.get());
1317 prefixes.push_back(qw->get_pattern());
1321 sort(prefixes.begin(), prefixes.end());
1322 const string* prev =
nullptr;
1323 for (
const auto& i : prefixes) {
1326 wdf_disjoint =
false;
1336 wdf_disjoint =
true;
1337 unordered_set<string> terms;
1340 wdf_disjoint =
false;
1343 auto qt =
static_cast<const QueryTerm*
>(q.internal.get());
1344 if (!terms.insert(qt->get_term()).second) {
1345 wdf_disjoint =
false;
1367 if (factor == 0.0) {
1403 QueryVector::const_iterator i;
1405 if (desc.size() > 1) {
1408 desc +=
str(parameter);
1412 Assert((*i).internal.get());
1416 desc += (*i).internal->get_description();
1439 size_t len =
term.size();
1441 if (
wqf == 1 &&
pos == 0) {
1450 }
else if (
wqf == 1) {
1454 result +=
static_cast<char>(0x40 | 0x10);
1457 result +=
static_cast<char>(0x40 | 0x10 | len);
1463 result +=
static_cast<char>(0x40 | 0x20);
1466 result +=
static_cast<char>(0x40 | 0x20 | len);
1471 }
else if (
wqf > 1 ||
pos > 0) {
1474 result +=
static_cast<char>(0x40 | 0x30);
1477 result +=
static_cast<char>(0x40 | 0x30 | len);
1487 result +=
static_cast<char>(0x40);
1490 result +=
static_cast<char>(0x40 | len);
1498 result +=
static_cast<char>(0x0c);
1500 const string & n =
source->name();
1504 const string & s =
source->serialise();
1524 if (subquery.
internal.get() == NULL)
1555 QueryVector::const_iterator i;
1558 Assert((*i).internal.get());
1559 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1567 if (subquery.
internal.get() != NULL)
1595 if (subquery.
internal.get() == NULL) {
1670 AutoPtr<PostList> l(
subqueries[0].internal->postlist(qopt, factor));
1673 AutoPtr<PostList> r(ctx.
postlist());
1681 double factor)
const
1683 subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor);
1699 QueryVector::const_iterator i;
1702 Assert((*i).internal.get());
1703 (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1712 AutoPtr<PostList> l(
subqueries[0].internal->postlist(qopt, factor));
1713 if (factor == 0.0) {
1719 AutoPtr<PostList> r(ctx.
postlist());
1727 double factor)
const
1729 subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor);
1745 QueryVector::const_iterator i;
1748 Assert((*i).internal.get());
1749 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1779 QueryVector::const_iterator i;
1782 Assert((*i).internal.get());
1784 PostList* pl = (*i).internal->postlist(qopt, factor);
2006 string d =
"(SYNONYM ";
2007 d +=
subqueries[0].internal->get_description();
2035 result +=
static_cast<char>(0x00);
Merged postlist: items from one list, weights from both.
Return items which are in A, unless they're in B.
Wrapper around standard unique_ptr template.
A postlist with weights modified by another postlist.
A postlist generated by taking one postlist (the left-hand postlist), and removing any documents whic...
A PostList which contains no entries.
Postlist which matches an exact phrase using positional information.
N-way OR postlist with wt=max(wt_i).
Postlist which matches terms occurring within a specified window.
Wrapper postlist providing positions for an OR.
A postlist comprising two postlists ORed together.
Postlist which matches a phrase using positional information.
Xapian::termcount get_total_subqs() const
LeafPostList * open_lazy_post_list(const std::string &term, Xapian::termcount wqf, double factor)
PostList * make_synonym_postlist(PostList *pl, double factor, bool wdf_disjoint)
LeafPostList * open_post_list(const std::string &term, Xapian::termcount wqf, double factor)
void destroy_postlist(PostList *pl)
bool full_db_has_positions() const
Xapian::doccount shard_index
void set_total_subqs(Xapian::termcount n)
const Xapian::Database::Internal & db
Base class for databases.
virtual LeafPostList * open_post_list(const string &tname) const =0
Open a posting list.
virtual Xapian::doccount get_value_freq(Xapian::valueno slot) const =0
Return the frequency of a given value slot.
virtual bool has_positions() const =0
Check whether this database contains any positional information.
virtual std::string get_value_upper_bound(Xapian::valueno slot) const =0
Get an upper bound on the values stored in the given value slot.
virtual std::string get_value_lower_bound(Xapian::valueno slot) const =0
Get a lower bound on the values stored in the given value slot.
virtual TermList * open_allterms(const string &prefix) const =0
Open an allterms list.
This class is used to access a database, or a group of databases.
size_t begin
Start and end indices for the PostLists this positional filter uses.
PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_, Xapian::termcount window_)
PostList * postlist(PostList *pl, const vector< PostList * > &pls) const
OrContext & get_maybe_ctx(size_t reserve)
AndContext(QueryOptimiser *qopt_, size_t reserve)
void add_pos_filter(Query::op op_, size_t n_subqs, Xapian::termcount window)
OrContext & get_not_ctx(size_t reserve)
AutoPtr< OrContext > maybe_ctx
AutoPtr< OrContext > not_ctx
list< PosFilter > pos_filters
void add_postlist(PostList *pl)
void shrink(size_t new_size)
void select_most_frequent(size_t set_size)
Select the set_size postlists with the highest term frequency.
void select_elite_set(size_t set_size, size_t out_of)
Select the best set_size postlists from the last out_of added.
PostList * postlist_max()
OrContext(QueryOptimiser *qopt_, size_t reserve)
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
void add_subquery(const Xapian::Query &subquery)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
void add_subquery(const Xapian::Query &subquery)
Xapian::Query::op get_op() const
std::string get_description() const
Xapian::Query::op get_op() const
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
Xapian::Query::op get_op() const
std::string get_description() const
virtual Query::Internal * done()=0
virtual Xapian::Query::op get_op() const =0
void serialise_(std::string &result, Xapian::termcount parameter=0) const
virtual void add_subquery(const Xapian::Query &subquery)=0
size_t get_num_subqueries() const
void do_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, Xapian::termcount elite_set_size=0, size_t first=0) const
const std::string get_description_helper(const char *op, Xapian::termcount window=0) const
PostList * do_synonym(QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_type() const
void gather_terms(void *void_terms) const
PostList * do_max(QueryOptimiser *qopt, double factor) const
termcount get_length() const
const Query get_subquery(size_t n) const
void serialise(std::string &result) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::termcount set_size
void serialise(std::string &result) const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor) const
std::string get_description() const
Xapian::Query::op get_op() const
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
std::string get_description() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_op() const
void serialise(std::string &result) const
Xapian::Query::op get_type() const
std::string get_description() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
std::string get_description() const
Xapian::Query::op get_op() const
void serialise(std::string &result) const
Xapian::Query::op get_op() const
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_op() const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor) const
std::string get_description() const
std::string get_description() const
Xapian::Query::op get_op() const
void postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor) const
void serialise(std::string &result) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Internal::opt_intrusive_ptr< PostingSource > source
Xapian::Query::op get_type() const
std::string get_description() const
void serialise(std::string &result) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
size_t get_num_subqueries() const
void serialise(std::string &result) const
Xapian::Query::op get_type() const
std::string get_description() const
const Query get_subquery(size_t n) const
QueryScaleWeight(double factor, const Query &subquery_)
void gather_terms(void *void_terms) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_op() const
std::string get_description() const
void serialise(std::string &result) const
void gather_terms(void *void_terms) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_type() const
std::string get_description() const
void serialise(std::string &result) const
Xapian::Query::op get_type() const
void serialise(std::string &result) const
std::string get_description() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
void serialise(std::string &result) const
Xapian::Query::op get_type() const
std::string get_description() const
Xapian::termcount max_expansion
Xapian::Query::op get_type() const
QueryWildcard * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
void serialise(std::string &result) const
std::string get_description() const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
Xapian::Query::op get_op() const
termcount get_length() const
void postlist_windowed(Xapian::Query::op op, AndContext &ctx, QueryOptimiser *qopt, double factor) const
void postlist_sub_xor(XorContext &ctx, QueryOptimiser *qopt, double factor) const
PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const
std::string get_description() const
Xapian::Query::op get_op() const
XorContext(QueryOptimiser *qopt_, size_t reserve)
InvalidArgumentError indicates an invalid parameter value was passed to the API.
InvalidOperationError indicates the API was used in an invalid way.
Abstract base class for postlists.
virtual Xapian::doccount get_termfreq_est() const =0
Get an estimate of the number of documents indexed by this term.
virtual double get_maxweight() const =0
Return an upper bound on what get_weight() can return.
Base class which provides an "external" source of postings.
virtual PostingSource * unserialise_with_registry(const std::string &serialised, const Registry ®istry) const
Create object given string serialisation returned by serialise().
PostingSource * release()
Start reference counting this object.
virtual termcount get_length() const
virtual void postlist_sub_xor(Xapian::Internal::XorContext &ctx, QueryOptimiser *qopt, double factor) const
static Query::Internal * unserialise(const char **p, const char *end, const Registry ®)
virtual void postlist_sub_and_like(Xapian::Internal::AndContext &ctx, QueryOptimiser *qopt, double factor) const
virtual void postlist_sub_or_like(Xapian::Internal::OrContext &ctx, QueryOptimiser *qopt, double factor) const
virtual const Query get_subquery(size_t n) const
virtual void gather_terms(void *void_terms) const
virtual size_t get_num_subqueries() const
Class representing a query.
@ WILDCARD_LIMIT_FIRST
Stop expanding when OP_WILDCARD reaches its expansion limit.
@ WILDCARD_LIMIT_MOST_FREQUENT
Limit OP_WILDCARD expansion to the most frequent terms.
const Query get_subquery(size_t n) const
Read a top level subquery.
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
@ LEAF_POSTING_SOURCE
Value returned by get_type() for a PostingSource.
@ OP_MAX
Pick the maximum weight of any subquery.
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
@ OP_WILDCARD
Wildcard expansion.
@ OP_XOR
Match documents which an odd number of subqueries match.
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
@ OP_NEAR
Match only documents where all subqueries match near each other.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
@ LEAF_TERM
Value returned by get_type() for a term.
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
@ OP_INVALID
Construct an invalid query.
Query()
Construct a query matching no documents.
static const Query unserialise(const std::string &serialised, const Registry ®=Registry())
Unserialise a string and return a Query object.
Xapian::Internal::intrusive_ptr< Internal > internal
op get_type() const
Get the type of the top level of the query.
Registry for user subclasses.
const Xapian::PostingSource * get_posting_source(const std::string &name) const
Get a posting source given a name.
Indicates an error in the std::string serialisation of an object.
const_iterator begin() const
const_iterator end() const
void push_back(const T &elt)
WildcardError indicates an error expanding a wildcarded query.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Append a string to an object description, escaping invalid UTF-8.
A PostList which contains no entries.
Hierarchy of classes which Xapian can throw as exceptions.
Return docs containing terms forming a particular exact phrase.
Return document ids from an external source.
Abstract base class for leaf postlists.
void decode_length_and_check(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
void decode_length(const char **p, const char *end, unsigned &out)
Decode a length encoded by encode_length.
length encoded as a string
std::string encode_length(T len)
Encode a length as a variable-length string.
N-way OR postlist with wt=max(wt_i)
string str(int value)
Convert int to std::string.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Return docs containing terms within a specified window.
Various assertion macros.
#define AssertRel(A, REL, B)
Wrapper postlist providing positions for an OR.
Return docs containing terms forming a particular phrase.
External sources of posting information.
Details passed around while building PostList tree from Query tree.
std::string serialise_double(double v)
Serialise a double to a string.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
functions to serialise and unserialise a double
Convert types to std::string.
Various handy helpers which std::string really should provide.
bool startswith(const std::string &s, char pfx)
Class providing an operator which sorts postlists to select max or terms.
bool operator()(const PostList *a, const PostList *b)
Return true if and only if a has a strictly greater termweight than b.
Comparison functor which orders PostList* by descending get_termfreq_est().
bool operator()(const PostList *a, const PostList *b) const
Order by descending get_termfreq_est().
Abstract base class for termlists.
void description_append(std::string &desc, const std::string &s)
Return document ids matching a >= test on a specified doc value.
Return document ids matching a range test on a specified doc value.