00001 00004 /* Copyright (C) 2007 Lemur Consulting Ltd 00005 * Copyright (C) 2009,2010,2012 Olly Betts 00006 * 00007 * This program is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU General Public License as 00009 * published by the Free Software Foundation; either version 2 of the 00010 * License, or (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include <config.h> 00023 00024 #include "weightinternal.h" 00025 00026 #include "xapian/enquire.h" 00027 00028 #include "omassert.h" 00029 #include "omenquireinternal.h" 00030 #include "str.h" 00031 #include "termlist.h" 00032 00033 #include "autoptr.h" 00034 #include <set> 00035 00036 using namespace std; 00037 00038 string 00039 TermFreqs::get_description() const { 00040 string desc("TermFreqs("); 00041 desc += str(termfreq); 00042 desc += ", "; 00043 desc += str(reltermfreq); 00044 desc += ")"; 00045 return desc; 00046 } 00047 00048 namespace Xapian { 00049 00050 Weight::Internal & 00051 Weight::Internal::operator +=(const Weight::Internal & inc) 00052 { 00053 total_length += inc.total_length; 00054 collection_size += inc.collection_size; 00055 rset_size += inc.rset_size; 00056 00057 // Add termfreqs and reltermfreqs 00058 map<string, TermFreqs>::const_iterator i; 00059 for (i = inc.termfreqs.begin(); i != inc.termfreqs.end(); ++i) { 00060 termfreqs[i->first] += i->second; 00061 } 00062 return *this; 00063 } 00064 00065 Xapian::doccount 00066 Weight::Internal::get_termfreq(const string & term) const 00067 { 00068 // We pass an empty std::string for term when calculating the extra weight. 00069 if (term.empty()) return 0; 00070 00071 map<string, TermFreqs>::const_iterator tfreq = termfreqs.find(term); 00072 Assert(tfreq != termfreqs.end()); 00073 return tfreq->second.termfreq; 00074 } 00075 00076 void 00077 Weight::Internal::accumulate_stats(const Xapian::Database::Internal &subdb, 00078 const Xapian::RSet &rset) 00079 { 00080 total_length += subdb.get_total_length(); 00081 collection_size += subdb.get_doccount(); 00082 rset_size += rset.size(); 00083 00084 map<string, TermFreqs>::iterator t; 00085 for (t = termfreqs.begin(); t != termfreqs.end(); ++t) { 00086 const string & term = t->first; 00087 t->second.termfreq += subdb.get_termfreq(term); 00088 } 00089 00090 const set<Xapian::docid> & items(rset.internal->get_items()); 00091 set<Xapian::docid>::const_iterator d; 00092 for (d = items.begin(); d != items.end(); ++d) { 00093 Xapian::docid did = *d; 00094 Assert(did); 00095 // The query is likely to far fewer terms than the documents, and we 00096 // can skip the document's termlist, so look for each query term in the 00097 // document. 00098 AutoPtr<TermList> tl(subdb.open_term_list(did)); 00099 for (t = termfreqs.begin(); t != termfreqs.end(); ++t) { 00100 const string & term = t->first; 00101 TermList * ret = tl->skip_to(term); 00102 Assert(ret == NULL); 00103 (void)ret; 00104 if (tl->at_end()) 00105 break; 00106 if (term == tl->get_termname()) 00107 ++t->second.reltermfreq; 00108 } 00109 } 00110 } 00111 00112 Xapian::doccount 00113 Weight::Internal::get_reltermfreq(const string & term) const 00114 { 00115 // We pass an empty string for term when calculating the extra weight. 00116 if (term.empty()) return 0; 00117 00118 map<string, TermFreqs>::const_iterator tfreq = termfreqs.find(term); 00119 Assert(tfreq != termfreqs.end()); 00120 return tfreq->second.reltermfreq; 00121 } 00122 00123 string 00124 Weight::Internal::get_description() const 00125 { 00126 string desc = "Weight::Internal(totlen="; 00127 desc += str(total_length); 00128 desc += ", collection_size="; 00129 desc += str(collection_size); 00130 desc += ", rset_size="; 00131 desc += str(rset_size); 00132 desc += ')'; 00133 return desc; 00134 } 00135 00136 }