xapian-core  2.0.0
localsubmatch.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2006-2026 Olly Betts
5  * Copyright (C) 2007 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_LOCALSUBMATCH_H
23 #define XAPIAN_INCLUDED_LOCALSUBMATCH_H
24 
25 #include "api/queryinternal.h"
27 #include "backends/leafpostlist.h"
28 #include "estimateop.h"
29 #include "weight/weightinternal.h"
30 #include "xapian/enquire.h"
31 #include "xapian/weight.h"
32 
33 class PostListTree;
34 
35 namespace Xapian {
36 namespace Internal {
37 class PostList;
38 }
39 }
40 
42 
46 
48  LocalSubMatch(const LocalSubMatch &) = delete;
49 
52 
55 
58 
61 
64 
67 
68  public:
71  const Xapian::Query& query_,
72  Xapian::termcount qlen_,
73  const Xapian::Weight& wt_factory_,
74  Xapian::doccount shard_index_)
75  : query(query_), qlen(qlen_), db(db_),
76  wt_factory(wt_factory_),
77  shard_index(shard_index_)
78  {}
79 
80  Estimates resolve(EstimateOp* estimate_op) {
81  Assert(estimate_op);
82  auto db_size = db->get_doccount();
83  // We shortcut an empty shard and avoid creating a postlist tree for
84  // it so shouldn't need to resolve estimates for it.
85  Assert(db_size);
86  Xapian::docid db_first, db_last;
87  db->get_used_docid_range(db_first, db_last);
88  return estimate_op->resolve(db_size, db_first, db_last);
89  }
90 
99  void prepare_match(const Xapian::RSet& rset,
101  {
102  stats.accumulate_stats(*db, rset);
103  }
104 
110  {
111  total_stats = &total_stats_;
112  }
113 
116  Xapian::termcount* total_subqs_ptr);
117 
121  PostListAndEstimate or_pl,
122  double factor,
123  const TermFreqs& termfreqs);
124 
126  open_post_list(const std::string& term,
127  Xapian::termcount wqf,
128  double factor,
129  bool need_positions,
130  bool compound_weight,
132  bool lazy_weight,
133  TermFreqs* termfreqs);
134 
136  TermFreqs* termfreqs) {
137  auto res = total_stats->termfreqs.emplace(pl->get_term(), TermFreqs());
138  if (res.second) {
139  // Only register if the term isn't already registered - e.g. a term
140  // from a wildcard expansion which is also present in the query
141  // verbatim such as: foo* food
142  res.first->second.termfreq = pl->get_termfreq();
143  res.first->second.collfreq = pl->get_collfreq();
144 #ifdef XAPIAN_ASSERTIONS
145  Xapian::doccount tf;
147  db->get_freqs(pl->get_term(), &tf, &cf);
148  AssertEq(res.first->second.termfreq, tf);
149  AssertEq(res.first->second.collfreq, cf);
150 #endif
151  }
152  if (termfreqs) *termfreqs = res.first->second;
153  }
154 
155  bool weight_needs_wdf() const {
157  }
158 
160  return total_stats;
161  }
162 };
163 
164 #endif /* XAPIAN_INCLUDED_LOCALSUBMATCH_H */
Class for estimating the total number of matching documents.
Definition: estimateop.h:64
Estimates resolve(Xapian::doccount db_size, Xapian::docid db_first, Xapian::docid db_last)
Definition: estimateop.cc:34
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
Xapian::termcount get_collfreq() const
Get the collection frequency of the term.
Definition: leafpostlist.h:70
const std::string & get_term() const
Get the term name.
Definition: leafpostlist.h:148
bool weight_needs_wdf() const
const Xapian::Database::Internal * db
The (sub-)Database we're searching.
Definition: localsubmatch.h:60
const Xapian::Weight::Internal * get_stats() const
Xapian::doccount shard_index
0-based index for the subdatabase.
Definition: localsubmatch.h:66
LocalSubMatch & operator=(const LocalSubMatch &)=delete
Don't allow assignment.
PostListAndEstimate open_post_list(const std::string &term, Xapian::termcount wqf, double factor, bool need_positions, bool compound_weight, Xapian::Internal::QueryOptimiser *qopt, bool lazy_weight, TermFreqs *termfreqs)
LocalSubMatch(const Xapian::Database::Internal *db_, const Xapian::Query &query_, Xapian::termcount qlen_, const Xapian::Weight &wt_factory_, Xapian::doccount shard_index_)
Constructor.
Definition: localsubmatch.h:70
LocalSubMatch(const LocalSubMatch &)=delete
Don't allow copying.
Xapian::Query query
The query.
Definition: localsubmatch.h:54
void prepare_match(const Xapian::RSet &rset, Xapian::Weight::Internal &stats)
Fetch and collate statistics.
Definition: localsubmatch.h:99
void start_match(Xapian::Weight::Internal &total_stats_)
Set the collated statistics.
void register_lazy_postlist_for_stats(LeafPostList *pl, TermFreqs *termfreqs)
PostListAndEstimate get_postlist(PostListTree *matcher, Xapian::termcount *total_subqs_ptr)
Get PostList.
Xapian::Weight::Internal * total_stats
The statistics for the collection.
Definition: localsubmatch.h:51
Xapian::termcount qlen
The query length (used by some weighting schemes).
Definition: localsubmatch.h:57
PostListAndEstimate make_synonym_postlist(PostListTree *pltree, PostListAndEstimate or_pl, double factor, const TermFreqs &termfreqs)
Convert a postlist into a synonym postlist.
const Xapian::Weight & wt_factory
Weight object (used as a factory by calling create on it).
Definition: localsubmatch.h:63
Estimates resolve(EstimateOp *estimate_op)
Definition: localsubmatch.h:80
Virtual base class for Database internals.
virtual void get_used_docid_range(docid &first, docid &last) const
Find lowest and highest docids actually in use.
virtual void get_freqs(std::string_view term, doccount *termfreq_ptr, termcount *collfreq_ptr) const =0
Returns frequencies for a term.
virtual doccount get_doccount() const =0
Abstract base class for postlists.
Definition: postlist.h:40
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
Definition: postlist.h:67
Class representing a query.
Definition: query.h:45
Class representing a set of documents judged as relevant.
Definition: rset.h:39
Class to hold statistics for a given collection.
void accumulate_stats(const Xapian::Database::Internal &sub_db, const Xapian::RSet &rset)
Accumulate the rtermfreqs for terms in the query.
std::map< std::string, TermFreqs, std::less<> > termfreqs
Map of term frequencies and relevant term frequencies for the collection.
Abstract base class for weighting schemes.
Definition: weight.h:38
bool get_sumpart_needs_wdf_() const
Definition: weight.h:484
string term
Virtual base class for Database internals.
Querying session.
Calculated bounds on and estimate of number of matches.
Abstract base class for leaf postlists.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define AssertEq(A, B)
Definition: omassert.h:124
#define Assert(COND)
Definition: omassert.h:122
Xapian::Query internals.
The frequencies for a term.
Weighting scheme API.
Xapian::Weight::Internal class, holding database and term statistics.