41 #ifdef XAPIAN_HAS_REMOTE_BACKEND
71 #ifdef XAPIAN_HAS_REMOTE_BACKEND
78 template<
typename Action>
83 size_t n_remotes = remotes.size();
89 action(remotes[0].get());
94 unique_ptr<struct pollfd[]> fds(
new struct pollfd[n_remotes]);
95 for (
size_t i = 0; i != n_remotes; ++i) {
96 fds[i].fd = remotes[i]->get_read_fd();
97 fds[i].events = POLLIN;
101 int r = poll(fds.get(), n_remotes, -1);
104 if (r == 0 || errno == EINTR || errno == EAGAIN) {
111 while (i != n_remotes) {
112 if (fds[i].revents) {
113 action(remotes[i].get());
115 swap(remotes[i], remotes[--n_remotes]);
116 fds[i] = fds[n_remotes];
123 }
while (n_remotes > 1);
127 if (n_remotes == 1) {
128 action(remotes[0].get());
131 size_t n_remotes = first_nonselectable;
133 while (n_remotes > 1) {
136 for (
size_t i = 0; i != n_remotes; ++i) {
137 int fd = remotes[i]->get_read_fd();
139 if (fd >= nfds) nfds = fd + 1;
142 int r = select(nfds, &fds, NULL, NULL, NULL);
146 if (r == 0 || eno == EINTR || eno == EAGAIN) {
153 while (i != n_remotes) {
154 int fd = remotes[i]->get_read_fd();
155 if (FD_ISSET(fd, &fds)) {
156 action(remotes[i].get());
158 swap(remotes[i], remotes[--n_remotes]);
169 if (n_remotes == 1) {
170 action(remotes[0].get());
174 for (
size_t i = first_nonselectable; i != remotes.size(); ++i) {
175 action(remotes[i].get());
190 int percent_threshold,
191 double weight_threshold,
195 bool sort_val_reverse,
204 vector<Xapian::RSet> subrsets;
206 rset->
internal->shard(n_shards, subrsets);
208 subrsets.resize(n_shards);
215 subdb = multidb->shards[i];
218 #ifdef XAPIAN_HAS_REMOTE_BACKEND
225 as_rem->set_query(
query, query_length,
226 collapse_key, collapse_max,
227 order, sort_key, sort_by, sort_val_reverse,
229 n_shards == 1 ? percent_threshold : 0,
232 subrsets[i], matchspies);
241 (void)percent_threshold;
242 (void)weight_threshold;
246 (void)sort_val_reverse;
261 #ifdef XAPIAN_HAS_REMOTE_BACKEND
271 int fd =
remotes[i]->get_read_fd();
272 if (fd >= FD_SETSIZE) {
294 int fd =
remotes[i]->get_read_fd();
295 HANDLE handle = (HANDLE)_get_osfhandle(fd);
296 if (handle != INVALID_HANDLE_VALUE) {
304 if (i == FD_SETSIZE) {
325 auto submatch =
locals[i].get();
327 submatch->prepare_match(subrsets[i], stats);
332 #ifdef XAPIAN_HAS_REMOTE_BACKEND
349 int percent_threshold,
350 double percent_threshold_factor,
351 double weight_threshold,
355 bool sort_val_reverse,
357 const vector<opt_ptr_spy>& matchspies)
365 vector<PostList*> postlists;
366 postlists.reserve(
locals.size());
377 bool all_null =
true;
378 for (
size_t i = 0; i !=
locals.size(); ++i) {
380 postlists.push_back(
nullptr);
391 total_subqs = max(total_subqs, total_subqs_i);
392 if (plest.
pl !=
nullptr) {
396 plest.
est.release()));
397 if (check_at_least) {
407 postlists.push_back(plest.
pl);
410 Assert(!postlists.empty());
413 vector<Result>
dummy;
420 for (
auto pl : postlists)
delete pl;
430 if (max_possible == 0.0) {
432 if (sort_by ==
REL) {
441 percent_threshold = 0;
442 percent_threshold_factor = 0.0;
447 if (check_at_least == 0) {
454 for (
size_t i = 0; i != estimates.
size(); ++i) {
458 matches_lower_bound += e.
min;
459 matches_estimated += e.
est;
460 matches_upper_bound += e.
max;
465 matches_lower_bound = 0;
473 if (matches_lower_bound > collapse_max)
474 matches_lower_bound = collapse_max;
477 vector<Result>
dummy;
483 uncollapsed_lower_bound,
497 bool stop_once_full = (sort_forward &&
501 ProtoMSet proto_mset(first, maxitems, check_at_least,
502 mcmp, sort_by, total_subqs,
504 collapse_key, collapse_max,
505 percent_threshold, percent_threshold_factor,
513 if (!pltree.
next(min_weight)) {
521 bool calculated_weight = (sort_by ==
DOCID);
522 if (!calculated_weight) {
523 if (sort_by !=
VAL || min_weight > 0.0) {
525 if (weight < min_weight) {
528 calculated_weight =
true;
534 Result new_item(weight, did);
536 if (sort_by !=
DOCID && sort_by !=
REL) {
543 if (proto_mset.
early_reject(new_item, calculated_weight, spymaster,
550 if (!calculated_weight) {
553 calculated_weight =
true;
555 spymaster(doc, weight);
558 if (!calculated_weight) {
563 if (!proto_mset.
process(std::move(new_item), vsdoc))
571 return proto_mset.
finalise(mdecider,
586 int percent_threshold,
587 double weight_threshold,
591 bool sort_val_reverse,
595 AssertRel(check_at_least, >=, first + maxitems);
597 #ifdef XAPIAN_HAS_REMOTE_BACKEND
601 remotes[0]->start_match(first, maxitems, check_at_least, sorter,
603 return remotes[0]->get_mset(matchspies);
609 double percent_threshold_factor = percent_threshold / 100.0;
612 percent_threshold_factor -= DBL_EPSILON;
614 #ifdef XAPIAN_HAS_REMOTE_BACKEND
615 for (
auto&& submatch :
remotes) {
620 if (collapse_max != 0) {
624 AssertRel(check_at_least, >=, first + maxitems);
625 remote_maxitems = check_at_least;
627 submatch->
start_match(0, remote_maxitems, check_at_least, sorter,
634 for (
auto&& submatch :
locals) {
641 double local_percent_threshold_factor = percent_threshold_factor;
642 #ifdef XAPIAN_HAS_REMOTE_BACKEND
647 local_maxitems = first + maxitems;
648 if (collapse_max != 0) {
654 AssertRel(check_at_least, >=, first + maxitems);
655 local_maxitems = check_at_least;
657 local_percent_threshold_factor = 0.0;
660 local_mset =
get_local_mset(local_first, local_maxitems, check_at_least,
662 sorter, collapse_key, collapse_max,
664 local_percent_threshold_factor,
665 weight_threshold, order, sort_key, sort_by,
666 sort_val_reverse, time_limit, matchspies);
669 #ifdef XAPIAN_HAS_REMOTE_BACKEND
678 vector<pair<Xapian::MSet, Xapian::doccount>> msets;
685 auto& merged_stats = merged_mset.
internal->stats;
687 merged_stats = std::move(remote_mset.
internal->stats);
689 merged_stats->merge(*(remote_mset.
internal->stats));
691 if (remote_mset.
empty()) {
696 msets.push_back({remote_mset, 0});
700 if (!local_mset.
empty())
701 msets.push_back({local_mset, 0});
704 merged_mset.
internal->stats->merge(stats);
707 if (merged_mset.internal->max_possible == 0.0) {
709 if (sort_by ==
REL) {
718 percent_threshold = 0;
719 percent_threshold_factor = 0.0;
725 [&](
const pair<Xapian::MSet, Xapian::doccount>& a,
726 const pair<Xapian::MSet, Xapian::doccount>& b) {
727 return mcmp(b.first.internal->items[b.second],
728 a.first.internal->items[a.second]);
731 Heap::make(msets.begin(), msets.end(), heap_cmp);
733 double min_weight = 0.0;
734 if (percent_threshold) {
735 min_weight = percent_threshold_factor * 100.0 /
736 merged_mset.internal->percent_scale_factor;
740 merged_mset.internal->first = first;
741 while (!msets.empty() && merged_mset.size() != maxitems) {
742 auto& front = msets.front();
743 auto& result = front.first.internal->items[front.second];
744 if (percent_threshold) {
745 if (result.get_weight() < min_weight) {
751 if (!collapser || collapser.add(result.get_collapse_key())) {
759 merged_mset.internal->items.push_back(std::move(result));
762 auto n = front.second + 1;
763 if (n == front.first.size()) {
764 Heap::pop(msets.begin(), msets.end(), heap_cmp);
765 msets.resize(msets.size() - 1);
773 auto todo = check_at_least - maxitems;
774 if (merged_mset.size() != maxitems) {
777 for ( ; !msets.empty() && todo; --todo) {
778 auto& front = msets.front();
779 auto& result = front.first.internal->items[front.second];
780 if (percent_threshold) {
781 if (result.get_weight() < min_weight) {
787 (void)collapser.add(result.get_collapse_key());
788 auto n = front.second + 1;
789 if (n == front.first.size()) {
790 Heap::pop(msets.begin(), msets.end(), heap_cmp);
791 msets.resize(msets.size() - 1);
798 auto mseti = merged_mset.internal;
799 collapser.finalise(mseti->items, percent_threshold);
801 if (check_at_least > 0) {
808 auto collapser_lb = collapser.get_matches_lower_bound();
809 if (mseti->matches_upper_bound <= check_at_least) {
810 mseti->matches_lower_bound = collapser_lb;
811 mseti->matches_estimated = collapser_lb;
812 mseti->matches_upper_bound = collapser_lb;
816 mseti->matches_lower_bound = max(mseti->matches_lower_bound,
820 double unique_rate = 1.0;
824 if (docs_considered > 0) {
827 double unique = double(docs_considered - dups_ignored);
828 unique_rate = unique / double(docs_considered);
833 mseti->matches_upper_bound -= collapser.get_dups_ignored();
835 double estimate_scale = unique_rate;
837 if (estimate_scale != 1.0) {
838 auto l = mseti->matches_lower_bound;
839 auto u = mseti->matches_upper_bound;
841 mseti->matches_estimated = e;
845 AssertRel(mseti->matches_lower_bound, <=, mseti->matches_upper_bound);
846 mseti->matches_estimated = std::clamp(mseti->matches_estimated,
847 mseti->matches_lower_bound,
848 mseti->matches_upper_bound);
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
if(!(properties &BACKEND))
Simpler version of Collapser used when merging MSet objects.
PostList which applies a MatchDecider.
Class for estimating the total number of matching documents.
Xapian::MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, Xapian::Weight::Internal &stats, const Xapian::Weight &wtscheme, const Xapian::MatchDecider *mdecider, const Xapian::KeyMaker *sorter, Xapian::valueno collapse_key, Xapian::doccount collapse_max, int percent_threshold, double weight_threshold, Xapian::Enquire::docid_order order, Xapian::valueno sort_key, Xapian::Enquire::Internal::sort_setting sort_by, bool sort_val_reverse, double time_limit, const std::vector< opt_ptr_spy > &matchspies)
Run the match and produce an MSet object.
std::vector< std::unique_ptr< LocalSubMatch > > locals
LocalSubMatch objects for local databases.
std::size_t first_nonselectable
Partition point in remotes.
Xapian::MSet get_local_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, const Xapian::Weight &wtscheme, const Xapian::MatchDecider *mdecider, const Xapian::KeyMaker *sorter, Xapian::valueno collapse_key, Xapian::doccount collapse_max, int percent_threshold, double percent_threshold_factor, double weight_threshold, Xapian::Enquire::docid_order order, Xapian::valueno sort_key, Xapian::Enquire::Internal::sort_setting sort_by, bool sort_val_reverse, double time_limit, const std::vector< opt_ptr_spy > &matchspies)
std::vector< std::unique_ptr< RemoteSubMatch > > remotes
RemoteSubMatch objects for remote databases.
void for_all_remotes(Action action)
Perform action on remotes as they become ready using poll() or select().
Matcher(const Matcher &)=delete
Sharded database backend.
void set_postlists(PostList **pls, Xapian::doccount n_shards_)
Xapian::docid get_docid() const
double recalc_maxweight()
bool next(double w_min)
Return false if we're done.
double get_weight() const
void delete_postlists()
Delete all the PostList objects.
bool process(Result &&new_item, ValueStreamDocument &vsdoc)
Process new_item.
bool early_reject(Result &new_item, bool calculated_weight, SpyMaster &spymaster, const Xapian::Document &doc)
void set_new_min_weight(double min_wt)
double get_min_weight() const
Xapian::MSet finalise(const Xapian::MatchDecider *mdecider, const std::vector< std::unique_ptr< LocalSubMatch >> &locals, const Xapian::VecUniquePtr< EstimateOp > &estimates)
RemoteDatabase is the baseclass for remote database implementations.
Class for performing matching on a remote database.
void start_match(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, const Xapian::KeyMaker *sorter, const Xapian::Weight::Internal &total_stats)
Start the match.
Xapian::doccount get_shard() const
Return the index of the corresponding Database shard.
Xapian::MSet get_mset(const std::vector< opt_ptr_spy > &matchspies)
Get MSet.
void prepare_match(Xapian::Weight::Internal &total_stats)
Fetch and collate statistics.
void set_weight(double weight_)
void set_sort_key(const std::string &k)
A document which gets its values from a ValueStreamManager.
void set_document(Xapian::docid did_)
std::string get_value(Xapian::valueno slot) const
Virtual base class for Database internals.
virtual int get_backend_info(std::string *path) const =0
Get backend information about this database.
virtual void readahead_for_query(const Query &query) const
An indexed database of documents.
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
Class representing a document.
docid_order
Ordering of docids.
@ DESCENDING
docids sort in descending order.
unsigned _refs
Reference count.
A smart pointer that optionally uses intrusive reference counting.
Virtual base class for key making functors.
Class representing a list of search results.
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
bool empty() const
Return true if this MSet object is empty.
Abstract base class for match deciders.
Indicates a problem communicating with a remote database.
Class representing a query.
bool empty() const noexcept
Check if this query is Xapian::Query::MatchNothing.
Class representing a set of documents judged as relevant.
Xapian::Internal::intrusive_ptr< Internal > internal
UnimplementedError indicates an attempt to use an unimplemented feature.
Suitable for "simple" type T.
Class to hold statistics for a given collection.
void set_query(const Xapian::Query &query_)
Abstract base class for weighting schemes.
PostList which applies a MatchDecider.
Xapian::Enquire internals.
SubMatch class for a local database.
static void unimplemented(const char *msg)
static constexpr auto DOCID
static constexpr auto REL
static constexpr auto VAL_REL
static constexpr auto REL_VAL
static constexpr auto VAL
MSetCmp get_msetcmp_function(Xapian::Enquire::Internal::sort_setting sort_by, bool sort_forward, bool sort_val_reverse)
Select the appropriate msetcmp function.
Result comparison functions.
Work around MSVC's unhelpful non-standard invalid parameter handling.
Sharded database backend.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Various assertion macros.
#define AssertRel(A, REL, B)
Class for managing a tree of PostList objects.
RemoteDatabase is the baseclass for remote database implementations.
SubMatch class for a remote database.
Set of documents judged as relevant.
include <sys/select.h> with portability workarounds.
Socket handling utilities.
Class for managing MatchSpy objects during the match.
std::unique_ptr< EstimateOp > est
A document which gets its values from a ValueStreamManager.
Define preprocessor symbols for the library version.
Xapian::Weight::Internal class, holding database and term statistics.