matcher/rset.cc

Go to the documentation of this file.
00001 /* rset.cc
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2003,2007,2009 Olly Betts
00006  * Copyright 2007 Lemur Consulting Ltd
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #include <config.h>
00025 
00026 #include "database.h"
00027 #include "rset.h"
00028 #include "stats.h"
00029 #include "omdebug.h"
00030 
00031 #include "autoptr.h"
00032 #include "termlist.h"
00033 
00034 void
00035 RSetI::calculate_stats()
00036 {
00037     DEBUGCALL(MATCH, void, "RSetI::calculate_stats", "");
00038     Assert(!calculated_reltermfreqs);
00039     std::set<Xapian::docid>::const_iterator doc;
00040     for (doc = documents.begin(); doc != documents.end(); doc++) {
00041         DEBUGLINE(WTCALC, "Counting reltermfreqs in document " << *doc << " [ ");
00042         if (dbroot) {
00043             AutoPtr<TermList> tl(dbroot->open_term_list(*doc));
00044             tl->next();
00045             while (!tl->at_end()) {
00046                 // FIXME - can this lookup be done faster?
00047                 // Store termnames in a hash for each document, rather than
00048                 // a list?
00049                 string tname = tl->get_termname();
00050                 if (reltermfreqs.find(tname) != reltermfreqs.end()) {
00051                     reltermfreqs[tname] ++;
00052                     DEBUGLINE(WTCALC, tname << " now has reltermfreq of " << reltermfreqs[tname]);
00053                 }
00054                 tl->next();
00055             }
00056         } else {
00057             Xapian::TermIterator tl = root.termlist_begin(*doc);
00058             Xapian::TermIterator tlend = root.termlist_end(*doc);
00059             while (tl != tlend) {
00060                 // FIXME - can this lookup be done faster?
00061                 // Store termnames in a hash for each document, rather than
00062                 // a list?
00063                 string tname = *tl;
00064                 if (reltermfreqs.find(tname) != reltermfreqs.end()) {
00065                     reltermfreqs[tname] ++;
00066                     DEBUGLINE(WTCALC, tname << " now has reltermfreq of " << reltermfreqs[tname]);
00067                 }
00068                 tl++;
00069             }
00070         }
00071         DEBUGLINE(WTCALC, "] ");
00072     }
00073     calculated_reltermfreqs = true;
00074 }
00075 
00076 void
00077 RSetI::contribute_stats(Stats & stats)
00078 {
00079     DEBUGCALL(MATCH, void, "RSetI::contribute_stats", stats);
00080     calculate_stats();
00081 
00082     std::map<string, Xapian::doccount>::const_iterator i;
00083     for (i = reltermfreqs.begin(); i != reltermfreqs.end(); i++) {
00084         stats.set_reltermfreq(i->first, i->second);
00085     }
00086     stats.rset_size += get_rsize();
00087 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.