65 #include <string_view>
66 #include <unordered_set>
92 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
93 defined(__mc68000__) || defined(__mc68010__) || \
94 defined(__mc68020__) || defined(__mc68030__)
117 return a_max_wt > b_max_wt;
151 : qopt(qopt_), estimates(reserve) {
164 if (termfreqs) termfreqs_list.emplace_back(*termfreqs);
169 add_termfreqs(termfreqs);
171 pls.emplace_back(pl);
173 if (estimate) estimates.
push_back(estimate);
180 first = std::min(first, f);
181 last = std::max(last, l);
188 add_postlist(
p.pl,
p.est.release(), termfreqs);
201 if (new_size >= pls.size())
204 for (
auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
207 pls.resize(new_size);
208 estimates.
erase(estimates.
begin() + new_size, estimates.
end());
227 unique_ptr<TermList> t(qopt->db.open_allterms(
query->get_fixed_prefix()));
228 bool skip_ucase =
query->get_fixed_prefix().
empty();
229 auto max_type =
query->get_max_type();
233 if (expansions_left == 0)
234 expansions_left = numeric_limits<decltype(expansions_left)>::max();
246 const string &
term = t->get_termname();
247 if (skip_ucase &&
term[0] >=
'A') {
256 if (
term[0] <=
'Z') {
257 static_assert(
'Z' + 1 ==
'[',
"'Z' + 1 == '['");
263 if (!
query->test_prefix_known(
term))
continue;
266 if (expansions_left == 0) {
269 string msg(
"Wildcard ");
270 msg +=
query->get_pattern();
271 if (
query->get_just_flags() == 0)
273 msg +=
" expands to more than ";
274 msg +=
str(
query->get_max_expansion());
281 add_postlist(qopt->open_lazy_post_list(
term, 1, factor), NULL);
287 auto set_size =
query->get_max_expansion();
288 if (size() > set_size) {
289 auto begin = pls.begin();
290 nth_element(begin, begin + set_size - 1, pls.end(),
297 for (
auto pl : pls) {
302 qopt->register_lazy_postlist_for_stats(
static_cast<LeafPostList*
>(pl),
304 add_termfreqs(termfreqs);
313 string pfx(
query->get_pattern(), 0,
query->get_fixed_prefix_len());
314 unique_ptr<TermList> t(qopt->db.open_allterms(pfx));
315 bool skip_ucase = pfx.empty();
316 auto max_type =
query->get_max_type();
320 if (expansions_left == 0)
321 expansions_left = numeric_limits<decltype(expansions_left)>::max();
333 const string&
term = t->get_termname();
336 if (skip_ucase &&
term[0] >=
'A') {
344 if (
term[0] <=
'Z') {
345 static_assert(
'Z' + 1 ==
'[',
"'Z' + 1 == '['");
354 if (expansions_left == 0) {
357 string msg(
"Edit distance ");
358 msg +=
query->get_pattern();
361 msg +=
" expands to more than ";
362 msg +=
str(
query->get_max_expansion());
369 add_postlist(qopt->open_lazy_post_list(
term, 1, factor), NULL);
373 auto set_size =
query->get_max_expansion();
374 if (size() > set_size) {
375 auto begin = pls.begin();
376 nth_element(begin, begin + set_size - 1, pls.end(),
383 for (
auto pl : pls) {
385 qopt->register_lazy_postlist_for_stats(
static_cast<LeafPostList*
>(pl),
387 add_termfreqs(termfreqs);
396 void estimate_termfreqs(
TermFreqs* termfreqs);
399 void select_elite_set(
size_t set_size,
size_t out_of);
411 Assert(!termfreqs_list.empty());
416 const TermFreqs& freqs = termfreqs_list[0];
417 auto& stats = *qopt->get_stats();
420 Assert(stats.collection_size);
421 double scale = 1.0 / stats.collection_size;
422 double P_est = freqs.
termfreq * scale;
423 double rtf_scale = 0.0;
424 if (stats.rset_size != 0) {
425 rtf_scale = 1.0 / stats.rset_size;
429 double cf_scale = 0.0;
430 if (
usual(stats.total_length != 0)) {
431 cf_scale = 1.0 / stats.total_length;
433 double Pc_est = freqs.
collfreq * cf_scale;
435 for (
size_t i = 1; i < termfreqs_list.size(); ++i) {
438 P_est += P_i - P_est * P_i;
439 double Pc_i = f.
collfreq * cf_scale;
440 Pc_est += Pc_i - Pc_est * Pc_i;
443 if (stats.rset_size != 0) {
445 Pr_est += Pr_i - Pr_est * Pr_i;
456 OrContext::select_elite_set(
size_t set_size,
size_t out_of)
458 auto begin = pls.begin() + pls.size() - out_of;
459 nth_element(begin, begin + set_size - 1, pls.end(),
CmpMaxOrTerms());
460 shrink(pls.size() - out_of + set_size);
466 if (!termfreqs_list.empty()) estimate_termfreqs(termfreqs);
468 switch (pls.size()) {
475 return {pl, estimates.empty() ? nullptr : estimates.release_at(0)};
479 unique_ptr<EstimateOp> est;
480 if (!qopt->get_no_estimates()) {
482 std::move(estimates)));
486 auto pl =
new BoolOrPostList(pls.begin(), pls.end(), qopt->db_size);
489 return {pl, std::move(est)};
513 auto pl =
new OrPostList(pls.front(), r, qopt->matcher);
515 if (pls.size() == 1) {
517 return {pl, std::move(est)};
527 OrContext::postlist_max()
529 switch (pls.size()) {
535 return {pl, estimates.release_at(0)};
544 qopt->matcher, qopt->db_size);
545 unique_ptr<EstimateOp> est;
546 if (!qopt->get_no_estimates()) {
549 std::move(estimates)));
552 return {pl, std::move(est)};
570 Assert(!termfreqs_list.empty());
575 auto& stats = *qopt->get_stats();
576 const TermFreqs& freqs = termfreqs_list[0];
579 Assert(stats.collection_size);
580 double scale = 1.0 / stats.collection_size;
581 double P_est = freqs.
termfreq * scale;
582 double rtf_scale = 0.0;
583 if (stats.rset_size != 0) {
584 rtf_scale = 1.0 / stats.rset_size;
588 double cf_scale = 0.0;
589 if (
usual(stats.total_length != 0)) {
590 cf_scale = 1.0 / stats.total_length;
592 double Pc_est = freqs.
collfreq * cf_scale;
594 for (
size_t i = 1; i < termfreqs_list.size(); ++i) {
597 P_est += P_i - 2.0 * P_est * P_i;
598 double Pc_i = f.
collfreq * cf_scale;
599 Pc_est += Pc_i - 2.0 * Pc_est * Pc_i;
602 if (stats.rset_size != 0) {
604 Pr_est += Pr_i - 2.0 * Pr_est * Pr_i;
614 unique_ptr<EstimateOp> est;
615 if (!qopt->get_no_estimates()) {
617 std::move(estimates)));
619 auto pl =
new XorPostList(pls.begin(), pls.end(), qopt->matcher,
623 return {pl, std::move(est)};
637 : op_(op__), begin(begin_), end(end_), window(window_) { }
641 const vector<PostList*>& pls,
645 auto terms_begin = pls.begin() + begin;
646 auto terms_end = pls.begin() + end;
649 if (termfreqs) *termfreqs /= 2;
654 window, terms_begin, terms_end, pltree);
655 }
else if (window != end - begin) {
657 if (termfreqs) *termfreqs /= 3;
662 window, terms_begin, terms_end, pltree);
665 if (termfreqs) *termfreqs /= 4;
670 terms_begin, terms_end, pltree);
692 bool match_all =
false;
703 add_termfreqs(termfreqs);
705 if (pls.empty() && termfreqs_list.size() > 1) {
706 qopt->destroy_postlist(pl);
709 pls.emplace_back(pl);
710 estimates.push_back(estimate.release());
713 first = std::max(first, pl_first);
714 last = std::min(last, pl_last);
721 return termfreqs != NULL;
725 return add_postlist(
p.pl, std::move(
p.est), termfreqs);
736 not_ctx.reset(
new OrContext(qopt, reserve));
743 maybe_ctx.reset(
new OrContext(qopt, reserve));
757 size_t end = pls.size();
758 size_t begin = end - n_subqs;
759 pos_filters.push_back(
PosFilter(op_, begin, end, window));
762 template<
typename T,
typename U>
770 return static_cast<T
>((l * double(n - r)) / n + 0.5);
776 auto matcher = qopt->matcher;
777 auto db_size = qopt->db_size;
780 Assert(!termfreqs_list.empty());
785 const TermFreqs& freqs = termfreqs_list[0];
787 double freqest = double(freqs.
termfreq);
789 double collfreqest = double(freqs.
collfreq);
791 auto& stats = *qopt->get_stats();
794 Assert(stats.collection_size);
796 for (
size_t i = 1; i < termfreqs_list.size(); ++i) {
801 freqest *= f.
termfreq / stats.collection_size;
802 if (
usual(stats.total_length != 0)) {
803 collfreqest *= f.
collfreq / stats.total_length;
808 if (stats.rset_size != 0)
818 unique_ptr<PostList> pl;
819 unique_ptr<EstimateOp> est;
820 switch (pls.size()) {
827 auto [new_pl, new_est] = qopt->open_post_list({}, 0, 0.0,
nullptr);
829 est = std::move(new_est);
834 est.reset(estimates.release_at(0));
837 pl.reset(
new AndPostList(pls.begin(), pls.end(), matcher));
838 if (!qopt->get_no_estimates()) {
840 std::move(estimates)));
845 if (not_ctx && !not_ctx->empty()) {
846 if (not_ctx->get_last() < first || not_ctx->get_first() > last) {
852 auto [rhs, rhs_est] = not_ctx->postlist(termfreqs ? &r_freqs : NULL,
856 auto& stats = *qopt->get_stats();
859 Assert(stats.collection_size);
862 stats.collection_size);
867 if (stats.total_length != 0) {
875 if (stats.rset_size != 0) {
883 if (!qopt->get_no_estimates()) {
886 std::move(est), std::move(rhs_est)));
897 for (
const PosFilter& filter : pos_filters) {
898 auto [new_pl, new_est] = filter.postlist(pl.release(), est.release(),
899 pls, matcher, termfreqs);
901 est = std::move(new_est);
907 if (maybe_ctx && !maybe_ctx->empty()) {
908 if (maybe_ctx->get_last() < first || maybe_ctx->get_first() > last) {
919 bool save_no_estimates = qopt->get_no_estimates();
920 qopt->set_no_estimates(
true);
921 auto [rhs, rhs_est] = maybe_ctx->postlist(termfreqs);
922 qopt->set_no_estimates(save_no_estimates);
938 return {pl.release(), est.release()};
943 Query::Internal::~Internal() { }
946 Query::Internal::get_num_subqueries() const noexcept
952 Query::Internal::get_subquery(
size_t)
const
958 Query::Internal::get_wqf()
const
964 Query::Internal::get_pos()
const
970 Query::Internal::gather_terms(
void *)
const
975 Query::Internal::get_length() const noexcept
981 Query::Internal::unserialise(
const char **
p,
const char * end,
986 unsigned char ch = *(*p)++;
988 case 4:
case 5:
case 6:
case 7: {
994 size_t n_subqs = ch & 0x07;
1001 unsigned char code = (ch >> 3) & 0x0f;
1052 }
while (--n_subqs);
1066 size_t len = ch & 0x0f;
1073 if (
size_t(end - *
p) < len)
1075 string term(*
p, len);
1078 int code = ((ch >> 4) & 0x03);
1134 switch (ch & 0x1f) {
1142 int flags =
static_cast<unsigned char>(*(*p)++);
1143 op combiner =
static_cast<op>(*(*p)++);
1144 unsigned edit_distance;
1145 size_t fixed_prefix_len;
1153 return new QueryEditDistance(pattern,
1165 int flags =
static_cast<unsigned char>(*(*p)++);
1166 op combiner =
static_cast<op>(*(*p)++);
1184 string m =
"PostingSource ";
1186 m +=
" not registered";
1190 string serialised_source;
1202 return new QueryScaleWeight(scale_factor,
1203 Query(unserialise(
p, end, reg)));
1222 string msg =
"Unknown Query serialisation: ";
1233 return ctx.
add_postlist(postlist(qopt, factor, termfreqs), termfreqs);
1241 bool keep_zero_weight)
const
1244 auto [pl_, est] = postlist(qopt, factor, termfreqs);
1245 unique_ptr<PostList> pl{pl_};
1246 if (!keep_zero_weight && pl && pl->recalc_maxweight() == 0.0) {
1256 ctx.
add_postlist(pl.release(), est.release(), termfreqs);
1264 ctx.
add_postlist(postlist(qopt, 0.0, termfreqs), termfreqs);
1273 ctx.
add_postlist(postlist(qopt, factor, termfreqs), termfreqs);
1279 QueryTerm::get_type() const noexcept
1281 return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
1285 QueryTerm::get_description()
const
1289 desc =
"<alldocuments>";
1309 if (
source->_refs == 0) {
1327 string desc =
"PostingSource(";
1328 desc +=
source->get_description();
1334 : scale_factor(factor), subquery(subquery_)
1400 unique_ptr<EstimateOp> est;
1418 double termfreq = pl->get_termfreq();
1419 auto tf = termfreq * stats.collection_size / db_size;
1420 auto rtf = termfreq * stats.rset_size / db_size;
1421 auto cf = termfreq * stats.total_length / db_size;
1426 RETURN({pl, std::move(est)});
1452 if (!
term.empty()) {
1453 vector<pair<Xapian::termpos, string>> &terms =
1454 *
static_cast<vector<pair<Xapian::termpos, string>
>*>(void_terms);
1455 terms.push_back(make_pair(
pos,
term));
1464 for (
size_t i = prefix; i != s.size(); ++i) {
1466 r +=
static_cast<unsigned char>(s[i]) * f;
1474 const string& begin,
const string* end,
1481 size_t common_prefix_len = size_t(-1);
1486 if (common_prefix_len == lo.size()) {
1487 if (common_prefix_len != hi.size())
1493 Assert(begin <= lo && (!end || hi <= *end));
1496 AssertRel(common_prefix_len, !=, hi.size());
1497 }
while (lo[common_prefix_len] == hi[common_prefix_len]);
1501 double denom = h - l;
1502 if (
rare(denom == 0.0)) {
1510 Assert(!(begin <= lo && (!end || hi <= *end)));
1515 return value_freq / 2;
1523 if (end && *end < hi) {
1528 double est = (e - b) / denom * value_freq;
1540 const auto db_size = qopt->
db_size;
1548 if (termfreqs) *termfreqs =
TermFreqs();
1552 if (termfreqs) *termfreqs =
TermFreqs();
1557 if (termfreqs) *termfreqs =
TermFreqs();
1564 *termfreqs =
TermFreqs(stats.collection_size,
1566 stats.total_length);
1575 unique_ptr<EstimateOp> est;
1578 if (value_freq == db_size) {
1588 if (termfreqs) *termfreqs *= double(value_freq) / db_size;
1590 value_freq,
slot,
string());
1591 RETURN({pl, std::move(est)});
1594 unique_ptr<EstimateOp> est;
1597 if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1599 RETURN({pl, std::move(est)});
1602 unique_ptr<EstimateOp> est;
1605 if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1607 RETURN({pl, std::move(est)});
1614 result +=
static_cast<char>(0x20 |
slot);
1616 result +=
static_cast<char>(0x20 | 15);
1632 string desc =
"VALUE_RANGE ";
1649 const auto db_size = qopt->
db_size;
1657 if (termfreqs) *termfreqs =
TermFreqs();
1661 if (termfreqs) *termfreqs =
TermFreqs();
1668 *termfreqs =
TermFreqs(stats.collection_size,
1670 stats.total_length);
1679 unique_ptr<EstimateOp> est;
1682 if (value_freq == db_size) {
1692 if (termfreqs) *termfreqs *= double(value_freq) / db_size;
1694 value_freq,
slot,
string());
1695 RETURN({pl, std::move(est)});
1698 unique_ptr<EstimateOp> est;
1701 if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1704 RETURN({pl, std::move(est)});
1713 result +=
static_cast<char>(0x20 |
slot);
1715 result +=
static_cast<char>(0x20 | 15);
1731 string desc =
"VALUE_LE ";
1746 const auto db_size = qopt->
db_size;
1754 if (termfreqs) *termfreqs =
TermFreqs();
1759 if (termfreqs) *termfreqs =
TermFreqs();
1766 *termfreqs =
TermFreqs(stats.collection_size,
1768 stats.total_length);
1776 unique_ptr<EstimateOp> est;
1779 if (value_freq == db_size) {
1790 value_freq,
slot,
string());
1791 RETURN({pl, std::move(est)});
1794 unique_ptr<EstimateOp> est;
1797 if (termfreqs) *termfreqs *= double(tf_est) / db_size;
1799 RETURN({pl, std::move(est)});
1806 result +=
static_cast<char>(0x20 | 0x10 |
slot);
1808 result +=
static_cast<char>(0x20 | 0x10 | 15);
1823 string desc =
"VALUE_GE ";
1834 : pattern(pattern_),
1835 max_expansion(max_expansion_),
1868 size_t qm_count = 0;
1869 bool had_star =
false;
1911 }
else if (qm_count > 1) {
1916 }
else if (qm_count == 1) {
1933 for ( ; i !=
tail; ++i) {
1939 for (
size_t test_o = o; test_o <=
p; ++test_o) {
1945 if (o ==
p)
return false;
1947 unsigned char b = candidate[o];
1955 }
else if (b < 0xf0) {
1960 if (
rare(
p - o < seqlen))
return false;
1965 if (
pattern[i] != candidate[o])
return false;
1974 if (candidate.size() <
min_len)
return false;
1975 if (candidate.size() >
max_len)
return false;
1981 candidate.size() -
suffix.size(),
1993 if (factor == 0.0) {
2000 if (!old_compound_weight) {
2023 factor, synonym_freqs));
2052 result +=
static_cast<char>(0x0b);
2054 result +=
static_cast<unsigned char>(
flags);
2055 result +=
static_cast<unsigned char>(
combiner);
2068 string desc =
"WILDCARD ";
2091 int edist =
edcalc(candidate, threshold);
2092 return edist <= threshold ? edist + 1 : 0;
2103 if (factor == 0.0) {
2110 if (!old_compound_weight) {
2133 factor, synonym_freqs));
2162 result +=
static_cast<char>(0x0a);
2164 result +=
static_cast<unsigned char>(
flags);
2165 result +=
static_cast<unsigned char>(
combiner);
2180 string desc =
"EDIT_DISTANCE ";
2199 desc +=
" fixed_prefix_len=";
2215 result += (*i).internal->get_length();
2220 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
2221 #define MISC(X) static_cast<unsigned char>(X)
2225 static const unsigned char first_byte[] = {
2243 AssertRel(
size_t(op_),<,
sizeof(first_byte));
2244 unsigned char ch = first_byte[op_];
2262 (*i).internal->serialise(result);
2304 (*i).internal->gather_terms(void_terms);
2314 LOGCALL_VOID(MATCH,
"QueryBranch::do_bool_or_like", ctx | qopt | termfreqs | first);
2327 (*q).internal->postlist_sub_bool_or_like(ctx, qopt, termfreqs);
2335 bool keep_zero_weight)
const
2337 LOGCALL_VOID(MATCH,
"QueryBranch::do_or_like", ctx | qopt | factor | termfreqs | elite_set_size | first | keep_zero_weight);
2346 size_t size_before = ctx.
size();
2351 (*q).internal->postlist_sub_or_like(ctx, qopt, factor,
2356 size_t out_of = ctx.
size() - size_before;
2357 if (elite_set_size && elite_set_size < out_of) {
2378 if (factor == 0.0) {
2384 return ctx.
postlist(termfreqs,
true);
2388 Assert(!old_compound_weight);
2394 if (!plest.
pl)
return {};
2414 if (factor == 0.0) {
2457 if (desc.size() > 1) {
2460 desc +=
str(parameter);
2468 desc += (*i).internal->get_description();
2491 size_t len =
term.size();
2493 if (
wqf == 1 &&
pos == 0) {
2502 }
else if (
wqf == 1) {
2506 result +=
static_cast<char>(0x40 | 0x10);
2509 result +=
static_cast<char>(0x40 | 0x10 | len);
2515 result +=
static_cast<char>(0x40 | 0x20);
2518 result +=
static_cast<char>(0x40 | 0x20 | len);
2523 }
else if (
wqf > 1 ||
pos > 0) {
2526 result +=
static_cast<char>(0x40 | 0x30);
2529 result +=
static_cast<char>(0x40 | 0x30 | len);
2539 result +=
static_cast<char>(0x40);
2542 result +=
static_cast<char>(0x40 | len);
2550 result +=
static_cast<char>(0x0c);
2611 if (!(*i).internal->postlist_sub_and_like(ctx, qopt, factor, termfreqs))
2711 if (factor == 0.0) {
2723 bool keep_zero_weight)
const
2725 do_or_like(ctx, qopt, factor, termfreqs, 0, 0, keep_zero_weight);
2757 if (!
subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor,
2785 (*i).internal->postlist_sub_xor(ctx, qopt, factor, termfreqs);
2810 if (!
subqueries[0].internal->postlist_sub_and_like(ctx, qopt, factor,
2816 if (factor != 0.0) {
2823 do_or_like(maybe_ctx, qopt, factor, termfreqs, 0, 1, need_wdf);
2851 if (!(*i).internal->postlist_sub_and_like(ctx, qopt, factor, termfreqs))
2890 result = ctx.
add_postlist(std::move(plest), termfreqs);
2892 if (factor == 0.0)
break;
2951 bool keep_zero_weight)
const
3143 string d =
"(SYNONYM ";
3144 d +=
subqueries[0].internal->get_description();
3172 result +=
static_cast<char>(0x00);
PostList class implementing Query::OP_AND_MAYBE.
PostList class implementing Query::OP_AND_NOT.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
PostList class implementing unweighted Query::OP_OR.
PostList class implementing Query::OP_AND_MAYBE.
PostList class implementing Query::OP_AND_NOT.
PostList class implementing unweighted Query::OP_OR.
Class for estimating the total number of matching documents.
Postlist which matches an exact phrase using positional information.
Abstract base class for leaf postlists.
N-way OR postlist with wt=max(wt_i).
Postlist which matches terms occurring within a specified window.
Wrapper postlist providing positions for an OR.
PostList class implementing Query::OP_OR.
Postlist which matches a phrase using positional information.
bool * get_max_weight_cached_flag_ptr()
Return pointer to flag to set to false to invalidate cached max weight.
Virtual base class for Database internals.
virtual std::string get_value_upper_bound(valueno slot) const =0
Get an upper bound on the values stored in the given value slot.
virtual std::string get_value_lower_bound(valueno slot) const =0
Get a lower bound on the values stored in the given value slot.
virtual PostList * open_post_list(std::string_view term) const =0
Return a PostList suitable for use in a PostingIterator.
virtual bool has_positions() const =0
Check whether this database contains any positional information.
virtual doccount get_value_freq(valueno slot) const =0
Return the frequency of a given value slot.
An indexed database of documents.
PostListAndEstimate postlist(TermFreqs *termfreqs)
bool add_postlist(PostList *pl, unique_ptr< EstimateOp > &&estimate, TermFreqs *termfreqs)
OrContext & get_maybe_ctx(size_t reserve)
AndContext(QueryOptimiser *qopt_, size_t reserve)
unique_ptr< OrContext > maybe_ctx
void add_pos_filter(Query::op op_, size_t n_subqs, Xapian::termcount window)
OrContext & get_not_ctx(size_t reserve)
bool add_postlist(PostListAndEstimate p, TermFreqs *termfreqs)
list< PosFilter > pos_filters
unique_ptr< OrContext > not_ctx
VecUniquePtr< EstimateOp > estimates
void add_postlist(PostList *pl, EstimateOp *estimate, TermFreqs *termfreqs)
vector< TermFreqs > termfreqs_list
Xapian::docid get_first() const
void expand_edit_distance(const QueryEditDistance *query, double factor, TermFreqs *termfreqs)
Expand an edit distance query.
Context(QueryOptimiser *qopt_, size_t reserve)
void add_termfreqs(TermFreqs *termfreqs)
void expand_wildcard(const QueryWildcard *query, double factor, TermFreqs *termfreqs)
Expand a wildcard query.
void add_postlist(PostListAndEstimate p, TermFreqs *termfreqs)
Xapian::docid get_last() const
Xapian::termcount size() const
void shrink(size_t new_size)
PostListAndEstimate postlist_max()
void select_elite_set(size_t set_size, size_t out_of)
Select the best set_size postlists from the last out_of added.
PostListAndEstimate postlist(TermFreqs *termfreqs, bool bool_or=false)
OrContext(QueryOptimiser *qopt_, size_t reserve)
size_t begin
Start and end indices for the PostLists this positional filter uses.
PostListAndEstimate postlist(PostList *pl, EstimateOp *est, const vector< PostList * > &pls, PostListTree *pltree, TermFreqs *termfreqs) const
PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_, Xapian::termcount window_)
Abstract base class for postlists.
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
virtual double recalc_maxweight()=0
Recalculate the upper bound on what get_weight() can return.
virtual void get_docid_range(docid &first, docid &last) const
Get the bounds on the range of docids this PostList can return.
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void add_subquery(const Xapian::Query &subquery)
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void add_subquery(const Xapian::Query &subquery)
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
void add_subquery(const Xapian::Query &subquery)
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
std::string get_description() const
virtual Query::Internal * done()=0
void do_bool_or_like(OrContext &ctx, QueryOptimiser *qopt, TermFreqs *termfreqs, size_t first=0) const
void do_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs, Xapian::termcount elite_set_size=0, size_t first=0, bool keep_zero_weight=true) const
Process OR-like subqueries.
virtual Xapian::Query::op get_op() const =0
void serialise_(std::string &result, Xapian::termcount parameter=0) const
virtual void add_subquery(const Xapian::Query &subquery)=0
Xapian::Query::op get_type() const noexcept
const std::string get_description_helper(const char *op, Xapian::termcount window=0) const
PostListAndEstimate do_max(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void gather_terms(void *void_terms) const
termcount get_length() const noexcept
PostListAndEstimate do_synonym(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
const Query get_subquery(size_t n) const
size_t get_num_subqueries() const noexcept
void serialise(std::string &result) const
Xapian::termcount max_expansion
void serialise(std::string &result) const
int test(const std::string &candidate) const
Perform edit distance test.
unsigned get_threshold() const
QueryEditDistance * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
termcount get_length() const noexcept
Xapian::Query::op get_type() const noexcept
std::string get_description() const
EditDistanceCalculator edcalc
Xapian::termcount set_size
void serialise(std::string &result) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs, bool keep_zero_weight) const
std::string get_description() const
Xapian::Query::op get_op() const
std::string get_description() const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void serialise(std::string &result) const
Xapian::Query::op get_type() const noexcept
std::string get_description() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
void serialise(std::string &result) const
Xapian::Query::op get_op() const
std::string get_description() const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::doccount shard_index
Xapian::termcount get_total_subqs() const
void set_no_estimates(bool f)
bool need_wdf_for_compound_weight() const
void destroy_postlist(PostList *pl)
PostListAndEstimate open_post_list(const std::string &term, Xapian::termcount wqf, double factor, TermFreqs *termfreqs)
Create a PostList object for term.
PostListAndEstimate make_synonym_postlist(PostListAndEstimate or_pl, double factor, const TermFreqs &termfreqs)
Create a SynonymPostList object.
const Xapian::Weight::Internal * get_stats() const
bool get_no_estimates() const
const Xapian::Database::Internal & db
void set_total_subqs(Xapian::termcount n)
void add_subquery(const Xapian::Query &subquery)
Xapian::Query::op get_op() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void postlist_sub_bool_or_like(OrContext &ctx, QueryOptimiser *qopt, TermFreqs *termfreqs) const
void postlist_sub_or_like(OrContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs, bool keep_zero_weight) const
std::string get_description() const
std::string get_description() const
Xapian::Query::op get_op() const
void serialise(std::string &result) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Internal::opt_intrusive_ptr< PostingSource > source
std::string get_description() const
Xapian::Query::op get_type() const noexcept
void serialise(std::string &result) const
void serialise(std::string &result) const
std::string get_description() const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
const Query get_subquery(size_t n) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
QueryScaleWeight(double factor, const Query &subquery_)
Xapian::Query::op get_type() const noexcept
void gather_terms(void *void_terms) const
size_t get_num_subqueries() const noexcept
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_op() const
std::string get_description() const
void serialise(std::string &result) const
void gather_terms(void *void_terms) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
bool postlist_sub_and_like(AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
Xapian::Query::op get_type() const noexcept
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
void serialise(std::string &result) const
void serialise(std::string &result) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
Xapian::Query::op get_type() const noexcept
Xapian::Query::op get_type() const noexcept
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void serialise(std::string &result) const
std::string get_description() const
bool test_wildcard_(const std::string &candidate, size_t o, size_t p, size_t i) const
size_t head
Fixed head and tail lengths, and min/max length term that can match.
Xapian::termcount max_expansion
Xapian::Query::op get_type() const noexcept
QueryWildcard * change_combiner(Xapian::Query::op new_op)
Change the combining operator.
bool test_prefix_known(const std::string &candidate) const
Perform wildcard test on candidate known to match prefix.
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void serialise(std::string &result) const
QueryWildcard(std::string_view pattern_, Xapian::termcount max_expansion_, int flags_, Query::op combiner_)
std::string get_description() const
termcount get_length() const noexcept
bool postlist_windowed(Xapian::Query::op op, AndContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
PostListAndEstimate postlist(QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
void postlist_sub_xor(XorContext &ctx, QueryOptimiser *qopt, double factor, TermFreqs *termfreqs) const
std::string get_description() const
Xapian::Query::op get_op() const
PostListAndEstimate postlist(TermFreqs *termfreqs)
XorContext(QueryOptimiser *qopt_, size_t reserve)
InvalidArgumentError indicates an invalid parameter value was passed to the API.
InvalidOperationError indicates the API was used in an invalid way.
Base class which provides an "external" source of postings.
virtual PostingSource * unserialise_with_registry(const std::string &serialised, const Registry ®istry) const
Create object given string serialisation returned by serialise().
PostingSource * release()
Start reference counting this object.
Class representing a query.
const Query get_subquery(size_t n) const
Read a top level subquery.
op get_type() const noexcept
Get the type of the top level of the query.
@ OP_SCALE_WEIGHT
Scale the weight contributed by a subquery.
@ LEAF_POSTING_SOURCE
Value returned by get_type() for a PostingSource.
@ OP_MAX
Pick the maximum weight of any subquery.
@ OP_VALUE_RANGE
Match only documents where a value slot is within a given range.
@ OP_WILDCARD
Wildcard expansion.
@ OP_XOR
Match documents which an odd number of subqueries match.
@ OP_AND_MAYBE
Match the first subquery taking extra weight from other subqueries.
@ LEAF_MATCH_ALL
Value returned by get_type() for MatchAll or equivalent.
@ OP_NEAR
Match only documents where all subqueries match near each other.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_FILTER
Match like OP_AND but only taking weight from the first subquery.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
@ OP_VALUE_LE
Match only documents where a value slot is <= a given value.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
@ OP_AND_NOT
Match documents which the first subquery matches but no others do.
@ OP_EDIT_DISTANCE
Edit distance expansion.
@ LEAF_TERM
Value returned by get_type() for a term.
@ OP_VALUE_GE
Match only documents where a value slot is >= a given value.
@ OP_INVALID
Construct an invalid query.
bool empty() const noexcept
Check if this query is Xapian::Query::MatchNothing.
@ WILDCARD_PATTERN_MULTI
Support * which matches 0 or more characters.
@ WILDCARD_LIMIT_FIRST
Stop expanding when OP_WILDCARD reaches its expansion limit.
@ WILDCARD_LIMIT_MOST_FREQUENT
Limit OP_WILDCARD expansion to the most frequent terms.
@ WILDCARD_PATTERN_SINGLE
Support ? which matches a single character.
Xapian::Internal::intrusive_ptr< Internal > internal
Registry for user subclasses.
const Xapian::PostingSource * get_posting_source(std::string_view name) const
Get a posting source given a name.
Indicates an error in the std::string serialisation of an object.
T::Internal *const * const_iterator
const_iterator begin() const
const_iterator end() const
void push_back(const T &elt)
Abstract base class for termlists.
virtual Internal * skip_to(std::string_view term)=0
Skip forward to the specified term.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
Suitable for "simple" type T.
const_iterator end() const
void reserve(size_type n)
void erase(const_iterator it)
const_iterator begin() const
WildcardError indicates an error expanding a wildcarded query.
#define UNSIGNED_OVERFLOW_OK(X)
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Append a string to an object description, escaping invalid UTF-8.
Edit distance calculation algorithm.
Hierarchy of classes which Xapian can throw as exceptions.
Return docs containing terms forming a particular exact phrase.
Return document ids from an external source.
C++ STL heap implementation with extensions.
N-way OR postlist with wt=max(wt_i)
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
string str(int value)
Convert int to std::string.
static double string_frac(const string &s, size_t prefix)
static T estimate_and_not(T l, T r, U n)
static Xapian::doccount estimate_range_freq(const string &lo, const string &hi, const string &begin, const string *end, Xapian::doccount value_freq)
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Return docs containing terms within a specified window.
Various assertion macros.
#define AssertRel(A, REL, B)
Wrapper postlist providing positions for an OR.
PostList class implementing Query::OP_OR.
void unpack_throw_serialisation_error(const char *p)
Throw appropriate SerialisationError.
Pack types into strings and unpack them again.
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
void pack_string_empty(std::string &s)
Append an empty encoded std::string to a string.
Return docs containing terms forming a particular phrase.
External sources of posting information.
Abstract base class for postlists.
static constexpr unsigned MAX_UTF_8_CHARACTER_LENGTH
Details passed around while building PostList tree from Query tree.
string serialise_double(double v)
Serialise a double to a string.
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
functions to serialise and unserialise a double
Convert types to std::string.
Various handy string-related helpers.
bool endswith(std::string_view s, char sfx)
bool startswith(std::string_view s, char pfx)
Class providing an operator which sorts postlists to select max or terms.
bool operator()(PostList *a, PostList *b)
Return true if and only if a has a strictly greater termweight than b.
Comparison functor which orders by descending termfreq.
bool operator()(const PostList *a, const PostList *b) const
Order PostList* by descending get_termfreq().
The frequencies for a term.
Xapian::doccount reltermfreq
Xapian::doccount termfreq
Xapian::termcount collfreq
Abstract base class for termlists.
Unicode and UTF-8 related classes and functions.
void description_append(std::string &desc, std::string_view s)
Return document ids matching a >= test on a specified doc value.
Return document ids matching a range test on a specified doc value.