xapian-core  1.4.19
weightinternal.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007 Lemur Consulting Ltd
5  * Copyright (C) 2009,2010,2011,2012,2013,2014,2015,2017,2020 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "weightinternal.h"
25 
26 #include "xapian/enquire.h"
27 
28 #include "omassert.h"
29 #include "api/omenquireinternal.h"
30 #include "str.h"
31 #include "api/termlist.h"
32 
33 #include "autoptr.h"
34 #include <set>
35 
36 using namespace std;
37 
38 string
40  string desc("TermFreqs(termfreq=");
41  desc += str(termfreq);
42  desc += ", reltermfreq=";
43  desc += str(reltermfreq);
44  desc += ", collfreq=";
45  desc += str(collfreq);
46  desc += ", max_part=";
47  desc += str(max_part);
48  desc += ")";
49  return desc;
50 }
51 
52 namespace Xapian {
53 
54 Weight::Internal &
55 Weight::Internal::operator+=(const Weight::Internal & inc)
56 {
57 #ifdef XAPIAN_ASSERTIONS
58  Assert(!finalised);
59  subdbs += inc.subdbs;
60 #endif
61  total_length += inc.total_length;
62  collection_size += inc.collection_size;
63  rset_size += inc.rset_size;
64 
65  // Add termfreqs and reltermfreqs
66  map<string, TermFreqs>::const_iterator i;
67  for (i = inc.termfreqs.begin(); i != inc.termfreqs.end(); ++i) {
68  termfreqs[i->first] += i->second;
69  }
70  return *this;
71 }
72 
73 void
74 Weight::Internal::accumulate_stats(const Xapian::Database::Internal &subdb,
75  const Xapian::RSet &rset)
76 {
77 #ifdef XAPIAN_ASSERTIONS
78  Assert(!finalised);
79  ++subdbs;
80 #endif
81  total_length += subdb.get_total_length();
82  collection_size += subdb.get_doccount();
83  rset_size += rset.size();
84 
86  for (t = query.get_unique_terms_begin(); t != Xapian::TermIterator(); ++t) {
87  const string & term = *t;
88 
89  Xapian::doccount sub_tf;
90  Xapian::termcount sub_cf;
91  subdb.get_freqs(term, &sub_tf, &sub_cf);
92  TermFreqs & tf = termfreqs[term];
93  tf.termfreq += sub_tf;
94  tf.collfreq += sub_cf;
95  }
96 
97  const set<Xapian::docid> & items(rset.internal->get_items());
98  set<Xapian::docid>::const_iterator d;
99  for (d = items.begin(); d != items.end(); ++d) {
100  Xapian::docid did = *d;
101  Assert(did);
102  // The query is likely to contain far fewer terms than the documents,
103  // and we can skip the document's termlist, so look for each query term
104  // in the document.
105  AutoPtr<TermList> tl(subdb.open_term_list(did));
106  map<string, TermFreqs>::iterator i;
107  for (i = termfreqs.begin(); i != termfreqs.end(); ++i) {
108  const string & term = i->first;
109  TermList * ret = tl->skip_to(term);
110  Assert(ret == NULL);
111  (void)ret;
112  if (tl->at_end())
113  break;
114  if (term == tl->get_termname())
115  ++i->second.reltermfreq;
116  }
117  }
118 }
119 
120 void
121 Weight::Internal::merge(const Weight::Internal& o)
122 {
123  if (!o.have_max_part) return;
124  for (auto i : o.termfreqs) {
125  double& max_part = termfreqs[i.first].max_part;
126  max_part = max(max_part, i.second.max_part);
127  }
128 }
129 
130 string
131 Weight::Internal::get_description() const
132 {
133  string desc = "Weight::Internal(totlen=";
134  desc += str(total_length);
135  desc += ", collection_size=";
136  desc += str(collection_size);
137  desc += ", rset_size=";
138  desc += str(rset_size);
139 #ifdef XAPIAN_ASSERTIONS
140  desc += ", subdbs=";
141  desc += str(subdbs);
142  desc += ", finalised=";
143  desc += str(finalised);
144 #endif
145  desc += ", termfreqs={";
146  map<string, TermFreqs>::const_iterator i;
147  for (i = termfreqs.begin(); i != termfreqs.end(); ++i) {
148  if (i != termfreqs.begin())
149  desc += ", ";
150  desc += i->first;
151  desc += " => ";
152  desc += i->second.get_description();
153  }
154  desc += "})";
155  return desc;
156 }
157 
158 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
#define Assert(COND)
Definition: omassert.h:122
Xapian::doccount size() const
The number of documents in this R-Set.
Definition: omenquire.cc:92
virtual TermList * open_term_list(Xapian::docid did) const =0
Open a term list.
std::string get_description() const
Return a std::string describing this object.
Base class for databases.
Definition: database.h:56
virtual Internal * skip_to(const std::string &term)=0
Skip forward to the specified term.
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
Convert types to std::string.
Xapian::doccount termfreq
const TermIterator get_unique_terms_begin() const
Begin iterator for unique terms in the query object.
Definition: query.cc:160
std::map< std::string, TermFreqs > termfreqs
Map of term frequencies and relevant term frequencies for the collection.
Xapian::doccount collection_size
Number of documents in the collection.
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: enquire.h:63
Xapian::doccount rset_size
Number of relevant documents in the collection.
API for running queries.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool have_max_part
Has max_part been set for any term?
Xapian::Weight::Internal class, holding database and term statistics.
Class to hold statistics for a given collection.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Xapian::termcount collfreq
virtual Xapian::totallength get_total_length() const =0
Return the total length of all documents in this database.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
The frequencies for a term.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
virtual void get_freqs(const string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const =0
Returns frequencies for a term.
Abstract base class for termlists.
virtual Xapian::doccount get_doccount() const =0
Return the number of docs in this (sub) database.
Various assertion macros.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::totallength total_length
Total length of all documents in the collection.
Wrapper around standard unique_ptr template.
A relevance set (R-Set).
Definition: enquire.h:60