00001 00004 /* Copyright (C) 2007 Lemur Consulting Ltd 00005 * Copyright (C) 2009,2010 Olly Betts 00006 * 00007 * This program is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU General Public License as 00009 * published by the Free Software Foundation; either version 2 of the 00010 * License, or (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #ifndef XAPIAN_INCLUDED_WEIGHTINTERNAL_H 00023 #define XAPIAN_INCLUDED_WEIGHTINTERNAL_H 00024 00025 #include "xapian/weight.h" 00026 00027 #include "xapian/database.h" 00028 #include "xapian/query.h" 00029 00030 #include "database.h" 00031 #include "internaltypes.h" 00032 00033 #include <map> 00034 #include <string> 00035 00037 struct TermFreqs { 00038 Xapian::doccount termfreq; 00039 Xapian::doccount reltermfreq; 00040 00041 TermFreqs() : termfreq(0), reltermfreq(0) {} 00042 TermFreqs(Xapian::doccount termfreq_, Xapian::doccount reltermfreq_) 00043 : termfreq(termfreq_), reltermfreq(reltermfreq_) {} 00044 00045 void operator +=(const TermFreqs & other) { 00046 termfreq += other.termfreq; 00047 reltermfreq += other.reltermfreq; 00048 } 00049 00051 std::string get_description() const; 00052 }; 00053 00054 namespace Xapian { 00055 00056 class RSet; 00057 00059 class Weight::Internal { 00060 public: 00062 totlen_t total_length; 00063 00065 Xapian::doccount collection_size; 00066 00068 Xapian::doccount rset_size; 00069 00071 Xapian::Database db; 00072 00075 std::map<std::string, TermFreqs> termfreqs; 00076 00077 Internal() : total_length(0), collection_size(0), rset_size(0) { } 00078 00084 Internal & operator +=(const Internal & inc); 00085 00087 void mark_wanted_terms(const Xapian::Query::Internal &query) { 00088 Xapian::TermIterator t; 00089 for (t = query.get_terms(); t != Xapian::TermIterator(); ++t) { 00090 termfreqs.insert(make_pair(*t, TermFreqs())); 00091 } 00092 } 00093 00095 void accumulate_stats(const Xapian::Database::Internal &sub_db, 00096 const Xapian::RSet &rset); 00097 00103 Xapian::doccount get_termfreq(const std::string & term) const; 00104 00110 Xapian::doccount get_reltermfreq(const std::string & term) const; 00111 00112 Xapian::doclength get_average_length() const { 00113 if (rare(collection_size == 0)) return 0; 00114 return Xapian::doclength(total_length) / collection_size; 00115 } 00116 00118 void set_bounds_from_db(const Xapian::Database &db_) { db = db_; } 00119 00121 std::string get_description() const; 00122 }; 00123 00124 } 00125 00126 #endif // XAPIAN_INCLUDED_WEIGHTINTERNAL_H