sourcedoc/html/mset_8cc_source.html

 /* Copyright (C) 2017,2024,2025 Olly Betts

  * Copyright (C) 2018 Uppinder Chugh

  *

  * This program is free software; you can redistribute it and/or modify

  * it under the terms of the GNU General Public License as published by

  * the Free Software Foundation; either version 2 of the License, or

  * (at your option) any later version.

  *

  * This program is distributed in the hope that it will be useful,

  * but WITHOUT ANY WARRANTY; without even the implied warranty of

  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  * GNU General Public License for more details.

  *

  * You should have received a copy of the GNU General Public License

  * along with this program; if not, see

  * <https://www.gnu.org/licenses/>.

  */


 #include <config.h>


 #include "msetinternal.h"

 #include "xapian/mset.h"


 // FIXME: Clustering API needs work: #include "xapian/cluster.h"


 #include "backends/documentinternal.h"

 #include "net/serialise.h"

 #include "matcher/msetcmp.h"

 #include "omassert.h"

 #include "pack.h"

 #include "roundestimate.h"

 #include "serialise-double.h"

 #include "str.h"

 #include "unicode/description_append.h"


 #include <algorithm>

 #include <cfloat>

 #include <string>

 #include <string_view>

 #include <unordered_set>


 using namespace std;


 namespace Xapian {


 MSet::MSet(const MSet&) = default;


 MSet&

 MSet::operator=(const MSet&) = default;


 MSet::MSet(MSet&&) = default;


 MSet&

 MSet::operator=(MSet&&) = default;


 MSet::MSet() : internal(new MSet::Internal) {}


 MSet::MSet(Internal* internal_) : internal(internal_) {}


 MSet::~MSet() {}


 void

 MSet::fetch_(Xapian::doccount first, Xapian::doccount last) const

 {

     internal->fetch(first, last);

 }


 void

 MSet::set_item_weight(Xapian::doccount i, double weight)

 {

     internal->set_item_weight(i, weight);

 }


 #if 0 // FIXME: Diversification API needs work.

 static double

 evaluate_dmset(const vector<Xapian::docid>& dmset,

                const Xapian::ClusterSet& cset,

                double factor1,

                double factor2,

                const Xapian::MSet& mset,

                const vector<double>& dissimilarity)

 {

     double score_1 = 0, score_2 = 0;


     // FIXME: We could compute score_1 once then adjust for each candidate

     // change.

     // Seems hard to do similar for score_2 though.

     for (auto mset_index : dmset)

         score_1 += mset[mset_index].get_weight();


     auto cset_size = cset.size();

     for (Xapian::doccount c = 0; c < cset_size; ++c) {

         double min_dist = numeric_limits<double>::max();

         unsigned int pos = 1;

         for (auto mset_index : dmset) {

             // FIXME: Pre-compute 1.0 / log(2.0 + i) for i = [0, dmset.size()) ?

             double weight = dissimilarity[mset_index * cset_size + c];

             weight /= log(1.0 + pos);

             min_dist = min(min_dist, weight);

             ++pos;

         }

         score_2 += min_dist;

     }


     return factor2 * score_2 - factor1 * score_1;

 }


 void

 MSet::diversify_(Xapian::doccount k,

                  Xapian::doccount r,

                  double factor1,

                  double factor2)

 {

     // Ensured by inlined caller.

     AssertRel(k, >=, 2);


     auto mset_size = size();

     if (mset_size <= k) {

         // Picking k documents would pick the whole MSet so nothing to do.

         //

         // Since k >= 2, this means we don't try to diversify an MSet with

         // 2 documents (for which reordering can't usefully improve diversity

         // since the only possible change is to swap the order of the 2

         // documents).

         return;

     }


     std::vector<Xapian::doccount> main_dmset;

     main_dmset.reserve(k);


     Xapian::doccount count = 0;

     TermListGroup tlg(*this);

     std::vector<Xapian::Point> points;

     points.reserve(mset_size);

     for (MSetIterator it = begin(); it != end(); ++it) {

         Xapian::Document doc = it.get_document();

         doc.internal->set_index(count);

         points.push_back(Xapian::Point(tlg, doc));

         // Initial top-k diversified documents

         if (count < k) {

             // The initial diversified document set is the top-k documents from

             // the MSet.

             main_dmset.push_back(count);

         }

         ++count;

     }


     // Cluster the MSet into k clusters.

     Xapian::ClusterSet cset = Xapian::LCDClusterer(k).cluster(*this);


     // Pre-compute all the dissimilarity values.

     auto cset_size = cset.size();

     std::vector<double> dissimilarity;

     dissimilarity.reserve(cset_size * points.size());

     {

         Xapian::CosineDistance d;

         for (const auto& point : points) {

             for (unsigned int c = 0; c < cset_size; ++c) {

                 double dist = d.similarity(point, cset[c].get_centroid());

                 dissimilarity.push_back(1.0 - dist);

             }

         }

     }


     // Build topc, which contains the union of the top-r relevant documents of

     // each cluster.

     vector<Xapian::docid> topc;

     for (Xapian::doccount c = 0; c < cset_size; ++c) {

         // FIXME: This is supposed to pick the `r` most relevant documents, but

         // actually seems to pick those with the lowest docids.

         auto documents = cset[c].get_documents();

         auto limit = std::min(r, documents.size());

         for (Xapian::doccount d = 0; d < limit; ++d) {

             auto mset_index = documents[d].internal->get_index();

             topc.push_back(mset_index);

         }

     }


     vector<Xapian::doccount> curr_dmset = main_dmset;


     while (true) {

         bool found_better_dmset = false;

         for (unsigned int i = 0; i < main_dmset.size(); ++i) {

             auto curr_doc = main_dmset[i];

             double best_score = evaluate_dmset(curr_dmset, cset,

                                                factor1, factor2,

                                                *this, dissimilarity);

             bool found_better_doc = false;


             for (unsigned int j = 0; j < topc.size(); ++j) {

                 // Continue if candidate document from topc already

                 // exists in curr_dmset.  FIXME: Linear search!

                 auto candidate_doc = find(curr_dmset.begin(), curr_dmset.end(),

                                           topc[j]);

                 if (candidate_doc != curr_dmset.end()) {

                     continue;

                 }


                 auto temp_doc = curr_dmset[i];

                 curr_dmset[i] = topc[j];

                 double score = evaluate_dmset(curr_dmset, cset,

                                               factor1, factor2,

                                               *this, dissimilarity);


                 if (score < best_score) {

                     curr_doc = curr_dmset[i];

                     best_score = score;

                     found_better_doc = true;

                 }


                 curr_dmset[i] = temp_doc;

             }

             if (found_better_doc) {

                 curr_dmset[i] = curr_doc;

                 found_better_dmset = true;

             }

         }


         // Terminate algorithm when there's no change in current

         // document matchset

         if (!found_better_dmset)

             break;


         main_dmset = curr_dmset;

     }


     // Reorder the results to reflect the diversification.  To do this we need

     // to partition the MSet so the promoted documents come first (in original

     // MSet order), followed by the non-promoted documents (also in original

     // MSet order).

     unordered_set<Xapian::docid> promoted{k};

     for (auto mset_index : main_dmset) {

         promoted.insert(internal->items[mset_index].get_docid());

     }


     stable_partition(internal->items.begin(), internal->items.end(),

                      [&](const Result& result) {

                          return promoted.count(result.get_docid());

                      });

 }

 #endif


 void

 MSet::sort_by_relevance()

 {

     std::sort(internal->items.begin(), internal->items.end(),

               get_msetcmp_function(Enquire::Internal::REL, true, false));

 }


 int

 MSet::convert_to_percent(double weight) const

 {

     return internal->convert_to_percent(weight);

 }


 Xapian::doccount

 MSet::get_termfreq(std::string_view term) const

 {

     // Check the cached data for query terms first.

     Xapian::doccount termfreq;

     if (usual(internal->stats && internal->stats->get_stats(term, termfreq))) {

         return termfreq;

     }


     if (rare(!internal->enquire)) {

         // Consistent with get_termfreq() on an empty database which always

         // returns 0.

         return 0;

     }


     // Fall back to asking the database via enquire.

     return internal->enquire->get_termfreq(term);

 }


 double

 MSet::get_termweight(std::string_view term) const

 {

     // A term not in the query has no termweight, so 0.0 makes sense as the

     // answer in such cases.

     double weight = 0.0;

     if (usual(internal->stats)) {

         (void)internal->stats->get_termweight(term, weight);

     }

     return weight;

 }


 Xapian::doccount

 MSet::get_firstitem() const

 {

     return internal->first;

 }


 Xapian::doccount

 MSet::get_matches_lower_bound() const

 {

     return internal->matches_lower_bound;

 }


 Xapian::doccount

 MSet::get_matches_estimated() const

 {

     // Doing this here avoids calculating if the estimate is never looked at,

     // though does mean we recalculate if this method is called more than once.

     return round_estimate(internal->matches_lower_bound,

                           internal->matches_upper_bound,

                           internal->matches_estimated);

 }


 Xapian::doccount

 MSet::get_matches_upper_bound() const

 {

     return internal->matches_upper_bound;

 }


 Xapian::doccount

 MSet::get_uncollapsed_matches_lower_bound() const

 {

     return internal->uncollapsed_lower_bound;

 }


 Xapian::doccount

 MSet::get_uncollapsed_matches_estimated() const

 {

     // Doing this here avoids calculating if the estimate is never looked at,

     // though does mean we recalculate if this method is called more than once.

     return round_estimate(internal->uncollapsed_lower_bound,

                           internal->uncollapsed_upper_bound,

                           internal->uncollapsed_estimated);

 }


 Xapian::doccount

 MSet::get_uncollapsed_matches_upper_bound() const

 {

     return internal->uncollapsed_upper_bound;

 }


 double

 MSet::get_max_attained() const

 {

     return internal->max_attained;

 }


 double

 MSet::get_max_possible() const

 {

     return internal->max_possible;

 }


 Xapian::doccount

 MSet::size() const

 {

     return internal->items.size();

 }


 std::string

 MSet::snippet(std::string_view text,

               size_t length,

               const Xapian::Stem& stemmer,

               unsigned flags,

               std::string_view hi_start,

               std::string_view hi_end,

               std::string_view omit) const

 {

     // The actual implementation is in queryparser/termgenerator_internal.cc.

     return internal->snippet(text, length, stemmer, flags,

                              hi_start, hi_end, omit);

 }


 std::string

 MSet::get_description() const

 {

     return internal->get_description();

 }


 Document

 MSet::Internal::get_document(Xapian::doccount index) const

 {

     if (index >= items.size()) {

         string msg = "Requested index ";

         msg += str(index);

         msg += " in MSet of size ";

         msg += str(items.size());

         throw Xapian::RangeError(msg);

     }

     Assert(enquire);

     return enquire->get_document(items[index].get_docid());

 }


 void

 MSet::Internal::fetch(Xapian::doccount first_, Xapian::doccount last) const

 {

     if (items.empty() || !enquire) {

         return;

     }

     if (last > items.size() - 1) {

         last = items.size() - 1;

     }

     if (first_ <= last) {

         Xapian::doccount n = last - first_;

         for (Xapian::doccount i = 0; i <= n; ++i) {

             enquire->request_document(items[i].get_docid());

         }

     }

 }


 void

 MSet::Internal::set_item_weight(Xapian::doccount i, double weight)

 {

     // max_attained is updated assuming that set_item_weight is called on every

     // MSet item from 0 up. While assigning new weights max_attained is updated

     // as the maximum of the new weights set till Xapian::doccount i.

     if (i == 0)

         max_attained = weight;

     else

         max_attained = max(max_attained, weight);

     // Ideally the max_possible should be the maximum possible weight that

     // can be assigned by the reranking algorithm, but since it is not always

     // possible to calculate the max possible weight for a reranking algorithm

     // we use this approach.

     max_possible = max(max_possible, max_attained);

     items[i].set_weight(weight);

 }


 int

 MSet::Internal::convert_to_percent(double weight) const

 {

     int percent;

     if (percent_scale_factor == 0.0) {

         // For an unweighted search, give all matches 100%.

         percent = 100;

     } else if (weight <= 0.0) {

         // Some weighting schemes can return zero relevance while matching,

         // so give such matches 0%.

         percent = 0;

     } else {

         // Adding on 100 * DBL_EPSILON was a hack to work around excess

         // precision (e.g. on x86 when not using SSE), but this code seems like

         // it's generally asking for problems with floating point rounding

         // issues - maybe we ought to carry through the matching and total

         // number of subqueries and calculate using those instead.

         //

         // There are corresponding hacks in matcher/matcher.cc.

         percent = int(weight * percent_scale_factor + 100.0 * DBL_EPSILON);

         if (percent <= 0) {

             // Make any non-zero weight give a non-zero percentage.

             percent = 1;

         } else if (percent > 100) {

             // Make sure we don't ever exceed 100%.

             percent = 100;

         }

         // FIXME: Ideally we should also make sure any non-exact match gives

         // < 100%.

     }

     return percent;

 }


 void

 MSet::Internal::unshard_docids(Xapian::doccount shard,

                                Xapian::doccount n_shards)

 {

     for (auto& result : items) {

         result.unshard_docid(shard, n_shards);

     }

 }


 void

 MSet::Internal::merge_stats(const Internal* o, bool collapsing)

 {

     if (snippet_bg_relevance.empty()) {

         snippet_bg_relevance = o->snippet_bg_relevance;

     } else {

         Assert(snippet_bg_relevance == o->snippet_bg_relevance);

     }

     if (collapsing) {

         matches_lower_bound = max(matches_lower_bound, o->matches_lower_bound);

         // matches_estimated will get adjusted later in this case.

     } else {

         matches_lower_bound += o->matches_lower_bound;

     }

     matches_estimated += o->matches_estimated;

     matches_upper_bound += o->matches_upper_bound;

     uncollapsed_lower_bound += o->uncollapsed_lower_bound;

     uncollapsed_estimated += o->uncollapsed_estimated;

     uncollapsed_upper_bound += o->uncollapsed_upper_bound;

     max_possible = max(max_possible, o->max_possible);

     if (o->max_attained > max_attained) {

         max_attained = o->max_attained;

         percent_scale_factor = o->percent_scale_factor;

     }

 }


 string

 MSet::Internal::serialise() const

 {

     string result;


     result += serialise_double(max_possible);

     result += serialise_double(max_attained);


     result += serialise_double(percent_scale_factor);


     pack_uint(result, first);

     // Send back the raw matches_* values.  MSet::get_matches_estimated()

     // rounds the estimate lazily, but when we merge MSet objects we really

     // want to merge based on the raw estimates.

     //

     // It is also cleaner that a round-trip through serialisation gives you an

     // object which is as close to the original as possible.

     pack_uint(result, matches_lower_bound);

     pack_uint(result, matches_estimated);

     pack_uint(result, matches_upper_bound);

     pack_uint(result, uncollapsed_lower_bound);

     pack_uint(result, uncollapsed_estimated);

     pack_uint(result, uncollapsed_upper_bound);


     pack_uint(result, items.size());

     for (auto&& item : items) {

         result += serialise_double(item.get_weight());

         pack_uint(result, item.get_docid());

         pack_string(result, item.get_sort_key());

         pack_string(result, item.get_collapse_key());

         pack_uint(result, item.get_collapse_count());

     }


     if (stats)

         result += serialise_stats(*stats);


     return result;

 }


 void

 MSet::Internal::unserialise(const char * p, const char * p_end)

 {

     items.clear();


     max_possible = unserialise_double(&p, p_end);

     max_attained = unserialise_double(&p, p_end);


     percent_scale_factor = unserialise_double(&p, p_end);


     size_t msize;

     if (!unpack_uint(&p, p_end, &first) ||

         !unpack_uint(&p, p_end, &matches_lower_bound) ||

         !unpack_uint(&p, p_end, &matches_estimated) ||

         !unpack_uint(&p, p_end, &matches_upper_bound) ||

         !unpack_uint(&p, p_end, &uncollapsed_lower_bound) ||

         !unpack_uint(&p, p_end, &uncollapsed_estimated) ||

         !unpack_uint(&p, p_end, &uncollapsed_upper_bound) ||

         !unpack_uint(&p, p_end, &msize)) {

         unpack_throw_serialisation_error(p);

     }

     for ( ; msize; --msize) {

         double wt = unserialise_double(&p, p_end);

         Xapian::docid did;

         string sort_key, key;

         Xapian::doccount collapse_cnt;

         if (!unpack_uint(&p, p_end, &did) ||

             !unpack_string(&p, p_end, sort_key) ||

             !unpack_string(&p, p_end, key) ||

             !unpack_uint(&p, p_end, &collapse_cnt)) {

             unpack_throw_serialisation_error(p);

         }

         items.emplace_back(wt, did, std::move(key), collapse_cnt,

                            std::move(sort_key));

     }


     if (p != p_end) {

         stats.reset(new Xapian::Weight::Internal());

         unserialise_stats(p, p_end, *stats);

     }

 }


 string

 MSet::Internal::get_description() const

 {

     string desc = "MSet(matches_lower_bound=";

     desc += str(matches_lower_bound);

     desc += ", matches_estimated=";

     desc += str(matches_estimated);

     desc += ", matches_upper_bound=";

     desc += str(matches_upper_bound);

     if (uncollapsed_lower_bound != matches_lower_bound) {

         desc += ", uncollapsed_lower_bound=";

         desc += str(uncollapsed_lower_bound);

     }

     if (uncollapsed_estimated != matches_estimated) {

         desc += ", uncollapsed_estimated=";

         desc += str(uncollapsed_estimated);

     }

     if (uncollapsed_upper_bound != matches_upper_bound) {

         desc += ", uncollapsed_upper_bound=";

         desc += str(uncollapsed_upper_bound);

     }

     if (first != 0) {

         desc += ", first=";

         desc += str(first);

     }

     if (max_possible > 0) {

         desc += ", max_possible=";

         desc += str(max_possible);

     }

     if (max_attained > 0) {

         desc += ", max_attained=";

         desc += str(max_attained);

     }

     desc += ", [";

     bool comma = false;

     for (auto&& item : items) {

         if (comma) {

             desc += ", ";

         } else {

             comma = true;

         }

         desc += item.get_description();

     }

     desc += "])";

     return desc;

 }


 }

Result
A result in an MSet.
Definition: result.h:30

Xapian::ClusterSet
Class for storing the results returned by the Clusterer.
Definition: cluster.h:452

Xapian::ClusterSet::size
Xapian::doccount size() const
Return the number of clusters.

Xapian::CosineDistance
Class for calculating the cosine distance between two documents.
Definition: cluster.h:538

Xapian::CosineDistance::similarity
double similarity(const PointType &a, const PointType &b) const override
Calculates and returns the cosine similarity using the formula cos(theta) = a.b/(|a|*|b|)

Xapian::Document
Class representing a document.
Definition: document.h:64

Xapian::Document::internal
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
Definition: document.h:67

Xapian::Enquire::Internal::REL
@ REL
Definition: enquireinternal.h:49

Xapian::LCDClusterer
LCD clusterer: This clusterer implements the LCD clustering algorithm adapted from Modelling efficien...
Definition: cluster.h:662

Xapian::LCDClusterer::cluster
ClusterSet cluster(const MSet &mset) override
Implements the LCD clustering algorithm.

Xapian::MSet::Internal
Xapian::MSet internals.
Definition: msetinternal.h:44

Xapian::MSet::Internal::uncollapsed_upper_bound
Xapian::doccount uncollapsed_upper_bound
Definition: msetinternal.h:76

Xapian::MSet::Internal::serialise
std::string serialise() const
Serialise this object.
Definition: mset.cc:517

Xapian::MSet::Internal::convert_to_percent
int convert_to_percent(double weight) const
Definition: mset.cc:449

Xapian::MSet::Internal::enquire
Xapian::Internal::intrusive_ptr< const Enquire::Internal > enquire
Definition: msetinternal.h:64

Xapian::MSet::Internal::snippet_bg_relevance
std::unordered_map< std::string, double > snippet_bg_relevance
Relevance weights for non-query terms for generating snippets.
Definition: msetinternal.h:56

Xapian::MSet::Internal::get_description
std::string get_description() const
Return a string describing this object.
Definition: mset.cc:598

Xapian::MSet::Internal::stats
std::unique_ptr< Xapian::Weight::Internal > stats
For looking up query term frequencies and weights.
Definition: msetinternal.h:62

Xapian::MSet::Internal::max_attained
double max_attained
Definition: msetinternal.h:82

Xapian::MSet::Internal::items
std::vector< Result > items
The items in the MSet.
Definition: msetinternal.h:59

Xapian::MSet::Internal::uncollapsed_lower_bound
Xapian::doccount uncollapsed_lower_bound
Definition: msetinternal.h:72

Xapian::MSet::Internal::matches_estimated
Xapian::doccount matches_estimated
Definition: msetinternal.h:68

Xapian::MSet::Internal::unshard_docids
void unshard_docids(Xapian::doccount shard, Xapian::doccount n_shards)
Definition: mset.cc:482

Xapian::MSet::Internal::unserialise
void unserialise(const char *p, const char *p_end)
Unserialise a serialised Xapian::MSet::Internal object.
Definition: mset.cc:556

Xapian::MSet::Internal::get_document
Xapian::Document get_document(Xapian::doccount index) const
Definition: mset.cc:400

Xapian::MSet::Internal::merge_stats
void merge_stats(const Internal *o, bool collapsing)
Definition: mset.cc:491

Xapian::MSet::Internal::fetch
void fetch(Xapian::doccount first, Xapian::doccount last) const
Definition: mset.cc:414

Xapian::MSet::Internal::set_item_weight
void set_item_weight(Xapian::doccount i, double weight)
Definition: mset.cc:431

Xapian::MSet::Internal::matches_lower_bound
Xapian::doccount matches_lower_bound
Definition: msetinternal.h:66

Xapian::MSet::Internal::max_possible
double max_possible
Definition: msetinternal.h:80

Xapian::MSet::Internal::percent_scale_factor
double percent_scale_factor
Scale factor to convert weights to percentages.
Definition: msetinternal.h:85

Xapian::MSet::Internal::matches_upper_bound
Xapian::doccount matches_upper_bound
Definition: msetinternal.h:70

Xapian::MSet::Internal::uncollapsed_estimated
Xapian::doccount uncollapsed_estimated
Definition: msetinternal.h:74

Xapian::MSet
Class representing a list of search results.
Definition: mset.h:46

Xapian::MSet::internal
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
Definition: mset.h:78

Xapian::MSet::get_termfreq
Xapian::doccount get_termfreq(std::string_view term) const
Get the termfreq of a term.
Definition: mset.cc:281

Xapian::MSet::sort_by_relevance
void sort_by_relevance()
Sorts the list of documents in MSet according to their weights.
Definition: mset.cc:268

Xapian::MSet::set_item_weight
void set_item_weight(Xapian::doccount i, double wt)
Update the weight corresponding to the document indexed at position i with wt.
Definition: mset.cc:72

Xapian::MSet::size
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374

Xapian::MSet::MSet
MSet()
Default constructor.
Definition: mset.cc:59

Xapian::MSet::get_max_possible
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: mset.cc:368

Xapian::MSet::fetch_
void fetch_(Xapian::doccount first, Xapian::doccount last) const
Definition: mset.cc:66

Xapian::MSet::get_uncollapsed_matches_upper_bound
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: mset.cc:356

Xapian::MSet::MSetIterator
friend class MSetIterator
Definition: mset.h:47

Xapian::MSet::get_uncollapsed_matches_estimated
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: mset.cc:346

Xapian::MSet::get_uncollapsed_matches_lower_bound
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: mset.cc:340

Xapian::MSet::convert_to_percent
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: mset.cc:275

Xapian::MSet::get_description
std::string get_description() const
Return a string describing this object.
Definition: mset.cc:394

Xapian::MSet::~MSet
~MSet()
Destructor.
Definition: mset.cc:63

Xapian::MSet::get_firstitem
Xapian::doccount get_firstitem() const
Rank of first item in this MSet.
Definition: mset.cc:312

Xapian::MSet::get_termweight
double get_termweight(std::string_view term) const
Get the term weight of a term.
Definition: mset.cc:300

Xapian::MSet::get_matches_upper_bound
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: mset.cc:334

Xapian::MSet::begin
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786

Xapian::MSet::snippet
std::string snippet(std::string_view text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, std::string_view hi_start="<b>", std::string_view hi_end="</b>", std::string_view omit="...") const
Generate a snippet.
Definition: mset.cc:380

Xapian::MSet::get_max_attained
double get_max_attained() const
The maximum weight attained by any document.
Definition: mset.cc:362

Xapian::MSet::get_matches_lower_bound
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: mset.cc:318

Xapian::MSet::end
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791

Xapian::MSet::get_matches_estimated
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: mset.cc:324

Xapian::Point
Class to represent a document as a point in the Vector Space Model.
Definition: cluster.h:320

Xapian::RangeError
RangeError indicates an attempt to access outside the bounds of a container.
Definition: error.h:959

Xapian::Stem
Class representing a stemming algorithm.
Definition: stem.h:74

Xapian::Weight::Internal
Class to hold statistics for a given collection.
Definition: weightinternal.h:106

config.h

usual
#define usual(COND)
Definition: config.h:608

rare
#define rare(COND)
Definition: config.h:607

term
string term
Definition: databaseinternal.cc:439

p
PositionList * p
Definition: databaseinternal.cc:437

pos
Xapian::termpos pos
Definition: databaseinternal.cc:435

description_append.h
Append a string to an object description, escaping invalid UTF-8.

documentinternal.h
Abstract base class for a document.

mset.h
Class representing a list of search results.

get_msetcmp_function
MSetCmp get_msetcmp_function(Xapian::Enquire::Internal::sort_setting sort_by, bool sort_forward, bool sort_val_reverse)
Select the appropriate msetcmp function.
Definition: msetcmp.cc:100

msetcmp.h
Result comparison functions.

msetinternal.h
Xapian::MSet internals.

Heap::sort
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:277

Xapian::Internal::str
string str(int value)
Convert int to std::string.
Definition: str.cc:91

Xapian
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82

Xapian::doccount
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37

Xapian::docid
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51

omassert.h
Various assertion macros.

AssertRel
#define AssertRel(A, REL, B)
Definition: omassert.h:123

Assert
#define Assert(COND)
Definition: omassert.h:122

unpack_throw_serialisation_error
void unpack_throw_serialisation_error(const char *p)
Throw appropriate SerialisationError.
Definition: pack.cc:29

pack.h
Pack types into strings and unpack them again.

unpack_string
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
Definition: pack.h:468

unpack_uint
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346

pack_uint
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315

pack_string
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
Definition: pack.h:442

roundestimate.h
Round a bounded estimate to an appropriate number of S.F.

round_estimate
Xapian::doccount round_estimate(T lb, T ub, T est)
Round a bounded estimate to an appropriate number of S.F.
Definition: roundestimate.h:37

serialise_double
string serialise_double(double v)
Serialise a double to a string.
Definition: serialise-double.cc:83

unserialise_double
double unserialise_double(const char **p, const char *end)
Unserialise a double serialised by serialise_double.
Definition: serialise-double.cc:162

serialise-double.h
functions to serialise and unserialise a double

serialise_stats
string serialise_stats(const Xapian::Weight::Internal &stats)
Serialise a stats object.
Definition: serialise.cc:42

unserialise_stats
void unserialise_stats(const char *p, const char *p_end, Xapian::Weight::Internal &stat)
Unserialise a serialised stats object.
Definition: serialise.cc:92

serialise.h
functions to convert classes to strings and back

stemmer
static Xapian::Stem stemmer
Definition: stemtest.cc:42

str.h
Convert types to std::string.