api/omdocument.cc

Go to the documentation of this file.
00001 /* omdocument.cc: class for performing a match
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2002 Ananova Ltd
00005  * Copyright 2003,2004,2006,2007,2009 Olly Betts
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License as
00009  * published by the Free Software Foundation; either version 2 of the
00010  * License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00020  * USA
00021  */
00022 
00023 #include <config.h>
00024 
00025 #include <xapian/document.h>
00026 #include <xapian/types.h>
00027 #include "document.h"
00028 #include "maptermlist.h"
00029 #include <xapian/error.h>
00030 #include <xapian/valueiterator.h>
00031 #include "utils.h"
00032 
00033 #include <algorithm>
00034 #include <string>
00035 
00036 using namespace std;
00037 
00038 namespace Xapian {
00039 
00040 // implementation of Document
00041 
00042 Document::Document(Document::Internal *internal_) : internal(internal_)
00043 {
00044 }
00045 
00046 Document::Document() : internal(new Xapian::Document::Internal())
00047 {
00048 }
00049 
00050 string
00051 Document::get_value(Xapian::valueno value) const
00052 {
00053     DEBUGAPICALL(string, "Document::get_value", value);
00054     RETURN(internal->get_value(value));
00055 }
00056 
00057 string
00058 Document::get_data() const
00059 {
00060     DEBUGAPICALL(string, "Document::get_data", "");
00061     RETURN(internal->get_data());
00062 }
00063 
00064 void
00065 Document::set_data(const string &data)
00066 {
00067     DEBUGAPICALL(void, "Document::set_data", data);
00068     internal->set_data(data);
00069 }
00070 
00071 void
00072 Document::operator=(const Document &other)
00073 {
00074     // pointers are reference counted.
00075     internal = other.internal;
00076 }
00077 
00078 Document::Document(const Document &other)
00079         : internal(other.internal)
00080 {
00081 }
00082 
00083 Document::~Document()
00084 {
00085 }
00086 
00087 string
00088 Document::get_description() const
00089 {
00090     return "Document(" + internal->get_description() + ")";
00091 }
00092 
00093 void
00094 Document::add_value(Xapian::valueno valueno, const string &value)
00095 {
00096     DEBUGAPICALL(void, "Document::add_value", valueno << ", " << value);
00097     internal->add_value(valueno, value);
00098 }
00099 
00100 void
00101 Document::remove_value(Xapian::valueno valueno)
00102 {
00103     DEBUGAPICALL(void, "Document::remove_value", valueno);
00104     internal->remove_value(valueno);
00105 }
00106 
00107 void
00108 Document::clear_values()
00109 {
00110     DEBUGAPICALL(void, "Document::clear_values", "");
00111     internal->clear_values();
00112 }
00113 
00114 void
00115 Document::add_posting(const string & tname,
00116                         Xapian::termpos tpos,
00117                         Xapian::termcount wdfinc)
00118 {
00119     DEBUGAPICALL(void, "Document::add_posting",
00120                  tname << ", " << tpos << ", " << wdfinc);
00121     if (tname.empty()) {
00122         throw InvalidArgumentError("Empty termnames aren't allowed.");
00123     }
00124     internal->add_posting(tname, tpos, wdfinc);
00125 }
00126 
00127 void
00128 Document::add_term(const string & tname, Xapian::termcount wdfinc)
00129 {
00130     DEBUGAPICALL(void, "Document::add_term", tname << ", " << wdfinc);
00131     if (tname.empty()) {
00132         throw InvalidArgumentError("Empty termnames aren't allowed.");
00133     }
00134     internal->add_term(tname, wdfinc);
00135 }
00136 
00137 void
00138 Document::remove_posting(const string & tname, Xapian::termpos tpos,
00139                          Xapian::termcount wdfdec)
00140 {
00141     DEBUGAPICALL(void, "Document::remove_posting",
00142                  tname << ", " << tpos << ", " << wdfdec);
00143     if (tname.empty()) {
00144         throw InvalidArgumentError("Empty termnames aren't allowed.");
00145     }
00146     internal->remove_posting(tname, tpos, wdfdec);
00147 }
00148 
00149 void
00150 Document::remove_term(const string & tname)
00151 {
00152     DEBUGAPICALL(void, "Document::remove_term", tname);
00153     internal->remove_term(tname);
00154 }
00155 
00156 void
00157 Document::clear_terms()
00158 {
00159     DEBUGAPICALL(void, "Document::clear_terms", "");
00160     internal->clear_terms();
00161 }
00162 
00163 Xapian::termcount
00164 Document::termlist_count() const {
00165     DEBUGAPICALL(Xapian::termcount, "Document::termlist_count", "");
00166     RETURN(internal->termlist_count());
00167 }
00168 
00169 TermIterator
00170 Document::termlist_begin() const
00171 {
00172     DEBUGAPICALL(TermIterator, "Document::termlist_begin", "");
00173     RETURN(TermIterator(internal->open_term_list()));
00174 }
00175 
00176 Xapian::termcount
00177 Document::values_count() const {
00178     DEBUGAPICALL(Xapian::termcount, "Document::values_count", "");
00179     RETURN(internal->values_count());
00180 }
00181 
00182 ValueIterator
00183 Document::values_begin() const
00184 {
00185     DEBUGAPICALL(ValueIterator, "Document::values_begin", "");
00186     // Force the values to be read and cached.
00187     internal->need_values();
00188     RETURN(ValueIterator(0, *this));
00189 }
00190 
00191 ValueIterator
00192 Document::values_end() const
00193 {
00194     DEBUGAPICALL(ValueIterator, "Document::values_end", "");
00195     RETURN(ValueIterator(internal->values_count(), *this));
00196 }
00197 
00198 docid
00199 Document::get_docid() const
00200 {
00201     DEBUGAPICALL(docid, "Document::get_docid", "");
00202     RETURN(internal->get_docid());
00203 }
00204 
00205 }
00206 
00208 
00209 void
00210 OmDocumentTerm::add_position(Xapian::termpos tpos)
00211 {
00212     DEBUGAPICALL(void, "OmDocumentTerm::add_position", tpos);
00213 
00214     // We generally expect term positions to be added in approximately
00215     // increasing order, so check the end first
00216     if (positions.empty() || tpos > positions.back()) {
00217         positions.push_back(tpos);
00218         return;
00219     }
00220 
00221     // Search for the position the term occurs at.  Use binary chop to
00222     // search, since this is a sorted list.
00223     vector<Xapian::termpos>::iterator i;
00224     i = lower_bound(positions.begin(), positions.end(), tpos);
00225     if (i == positions.end() || *i != tpos) {
00226         positions.insert(i, tpos);
00227     }
00228 }
00229 
00230 void
00231 OmDocumentTerm::remove_position(Xapian::termpos tpos)
00232 {
00233     DEBUGAPICALL(void, "OmDocumentTerm::remove_position", tpos);
00234     
00235     // Search for the position the term occurs at.  Use binary chop to
00236     // search, since this is a sorted list.
00237     vector<Xapian::termpos>::iterator i;
00238     i = lower_bound(positions.begin(), positions.end(), tpos);
00239     if (i == positions.end() || *i != tpos) {
00240         throw Xapian::InvalidArgumentError("Position `" + om_tostring(tpos) +
00241                                      "' not found in list of positions that `" +
00242                                      tname +
00243                                      "' occurs at,"
00244                                      " when removing position from list");
00245     }
00246     positions.erase(i);
00247 }
00248 
00249 string
00250 OmDocumentTerm::get_description() const
00251 {
00252     string description;
00253 
00254     description = "OmDocumentTerm(" + tname +
00255             ", wdf = " + om_tostring(wdf) +
00256             ", positions[" + om_tostring(positions.size()) + "]" +
00257             ")";
00258     return description;
00259 }
00260 
00261 string
00262 Xapian::Document::Internal::get_value(Xapian::valueno valueid) const
00263 {
00264     if (values_here) {
00265         map<Xapian::valueno, string>::const_iterator i;
00266         i = values.find(valueid);
00267         if (i == values.end()) return string();
00268         return i->second;
00269     }
00270     if (!database.get()) return string();
00271     return do_get_value(valueid);
00272 }
00273         
00274 string
00275 Xapian::Document::Internal::get_data() const
00276 {
00277     if (data_here) return data;
00278     if (!database.get()) return string();
00279     return do_get_data();
00280 }
00281 
00282 void
00283 Xapian::Document::Internal::set_data(const string &data_)
00284 {
00285     data = data_;
00286     data_here = true;
00287 }
00288 
00289 TermList *
00290 Xapian::Document::Internal::open_term_list() const
00291 {
00292     DEBUGCALL(MATCH, TermList *, "Document::Internal::open_term_list", "");
00293     if (terms_here) {
00294         RETURN(new MapTermList(terms.begin(), terms.end()));
00295     }
00296     if (!database.get()) RETURN(NULL);
00297     RETURN(database->open_term_list(did));
00298 }
00299 
00300 void
00301 Xapian::Document::Internal::add_value(Xapian::valueno valueno, const string &value)
00302 {
00303     need_values();
00304     if (!value.empty()) {
00305         values[valueno] = value;
00306     } else {
00307         // Empty values aren't stored, but replace any existing value by
00308         // removing it.
00309         values.erase(valueno);
00310     }
00311     value_nos.clear();
00312 }
00313 
00314 void
00315 Xapian::Document::Internal::remove_value(Xapian::valueno valueno)
00316 {
00317     need_values();
00318     map<Xapian::valueno, string>::iterator i = values.find(valueno);
00319     if (i == values.end()) {
00320         throw Xapian::InvalidArgumentError("Value #" + om_tostring(valueno) +
00321                 " is not present in document, in "
00322                 "Xapian::Document::Internal::remove_value()");
00323     }
00324     values.erase(i);
00325     value_nos.clear();
00326 }
00327 
00328 void
00329 Xapian::Document::Internal::clear_values()
00330 {
00331     values.clear();
00332     value_nos.clear();
00333     values_here = true;
00334 }
00335 
00336 void
00337 Xapian::Document::Internal::add_posting(const string & tname, Xapian::termpos tpos,
00338                               Xapian::termcount wdfinc)
00339 {
00340     need_terms();
00341 
00342     map<string, OmDocumentTerm>::iterator i;
00343     i = terms.find(tname);
00344     if (i == terms.end()) {
00345         OmDocumentTerm newterm(tname, wdfinc);
00346         newterm.add_position(tpos);
00347         terms.insert(make_pair(tname, newterm));
00348     } else {
00349         i->second.add_position(tpos);
00350         if (wdfinc) i->second.inc_wdf(wdfinc);
00351     }
00352 }
00353 
00354 void
00355 Xapian::Document::Internal::add_term(const string & tname, Xapian::termcount wdfinc)
00356 {
00357     need_terms();
00358 
00359     map<string, OmDocumentTerm>::iterator i;
00360     i = terms.find(tname);
00361     if (i == terms.end()) {
00362         OmDocumentTerm newterm(tname, wdfinc);
00363         terms.insert(make_pair(tname, newterm));
00364     } else {
00365         if (wdfinc) i->second.inc_wdf(wdfinc);
00366     }
00367 }
00368 
00369 void
00370 Xapian::Document::Internal::remove_posting(const string & tname,
00371                                            Xapian::termpos tpos,
00372                                            Xapian::termcount wdfdec)    
00373 {
00374     need_terms();
00375 
00376     map<string, OmDocumentTerm>::iterator i;
00377     i = terms.find(tname);
00378     if (i == terms.end()) {
00379         throw Xapian::InvalidArgumentError("Term `" + tname +
00380                 "' is not present in document, in "
00381                 "Xapian::Document::Internal::remove_posting()");
00382     }
00383     i->second.remove_position(tpos);
00384     if (wdfdec) i->second.dec_wdf(wdfdec);
00385 }
00386 
00387 void
00388 Xapian::Document::Internal::remove_term(const string & tname)
00389 {
00390     need_terms();
00391     map<string, OmDocumentTerm>::iterator i;
00392     i = terms.find(tname);
00393     if (i == terms.end()) {
00394         throw Xapian::InvalidArgumentError("Term `" + tname +
00395                 "' is not present in document, in "
00396                 "Xapian::Document::Internal::remove_term()");
00397     }
00398     terms.erase(i);
00399 }
00400         
00401 void
00402 Xapian::Document::Internal::clear_terms()
00403 {
00404     terms.clear();
00405     terms_here = true;
00406 }
00407 
00408 Xapian::termcount
00409 Xapian::Document::Internal::termlist_count() const
00410 {
00411     if (!terms_here) {
00412         // How equivalent is this line to the rest?
00413         // return database.get() ? database->open_term_list(did)->get_approx_size() : 0;
00414         need_terms();
00415     }
00416     Assert(terms_here);
00417     return terms.size();
00418 }
00419 
00420 void
00421 Xapian::Document::Internal::need_terms() const
00422 {
00423     if (terms_here) return;
00424     if (database.get()) {
00425         Xapian::TermIterator t(database->open_term_list(did));
00426         Xapian::TermIterator tend(NULL);
00427         for ( ; t != tend; ++t) {
00428             Xapian::PositionIterator p = t.positionlist_begin();
00429             Xapian::PositionIterator pend = t.positionlist_end();
00430             OmDocumentTerm term(*t, t.get_wdf());
00431             for ( ; p != pend; ++p) {
00432                 term.add_position(*p);
00433             }
00434             terms.insert(make_pair(*t, term));
00435         }
00436     }
00437     terms_here = true;
00438 }
00439 
00440 Xapian::valueno
00441 Xapian::Document::Internal::values_count() const
00442 {
00443     DEBUGLINE(UNKNOWN, "Xapian::Document::Internal::values_count() called");
00444     need_values();
00445     Assert(values_here);
00446     return values.size();
00447 }
00448 
00449 string
00450 Xapian::Document::Internal::get_description() const
00451 {
00452     string description = "Xapian::Document::Internal(";
00453 
00454     if (data_here) description += "data=`" + data + "'";
00455 
00456     if (values_here) {
00457         if (data_here) description += ", ";
00458         description += "values[" + om_tostring(values.size()) + "]";
00459     }
00460 
00461     if (terms_here) {
00462         if (data_here || values_here) description += ", ";
00463         description += "terms[" + om_tostring(terms.size()) + "]";
00464     }
00465 
00466     if (database.get()) {
00467         if (data_here || values_here || terms_here) description += ", ";
00468         description += "doc=";
00469         description += "?"; // do_get_description(); ?
00470     }
00471 
00472     description += ')';
00473 
00474     return description;
00475 }
00476 
00477 void
00478 Xapian::Document::Internal::need_values() const
00479 {
00480     if (!values_here) {
00481         if (database.get()) {
00482             values = do_get_all_values();
00483             value_nos.clear();
00484         }
00485         values_here = true;
00486     }
00487 }
00488 
00489 Xapian::Document::Internal::~Internal()
00490 {
00491     if (database.get())
00492         database->invalidate_doc_object(this);
00493 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.