00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include <xapian/document.h>
00027
00028 #include "document.h"
00029 #include "documentvaluelist.h"
00030 #include "maptermlist.h"
00031 #include "serialise.h"
00032 #include "str.h"
00033
00034 #include <xapian/error.h>
00035 #include <xapian/types.h>
00036 #include <xapian/valueiterator.h>
00037
00038 #include <algorithm>
00039 #include <string>
00040
00041 using namespace std;
00042
00043 namespace Xapian {
00044
00045
00046
00047 Document::Document(Document::Internal *internal_) : internal(internal_)
00048 {
00049 }
00050
00051 Document::Document() : internal(new Xapian::Document::Internal)
00052 {
00053 }
00054
00055 string
00056 Document::get_value(Xapian::valueno slot) const
00057 {
00058 LOGCALL(API, string, "Document::get_value", slot);
00059 RETURN(internal->get_value(slot));
00060 }
00061
00062 string
00063 Document::get_data() const
00064 {
00065 LOGCALL(API, string, "Document::get_data", NO_ARGS);
00066 RETURN(internal->get_data());
00067 }
00068
00069 void
00070 Document::set_data(const string &data)
00071 {
00072 LOGCALL_VOID(API, "Document::set_data", data);
00073 internal->set_data(data);
00074 }
00075
00076 void
00077 Document::operator=(const Document &other)
00078 {
00079
00080 internal = other.internal;
00081 }
00082
00083 Document::Document(const Document &other)
00084 : internal(other.internal)
00085 {
00086 }
00087
00088 Document::~Document()
00089 {
00090 }
00091
00092 string
00093 Document::get_description() const
00094 {
00095 return "Document(" + internal->get_description() + ")";
00096 }
00097
00098 void
00099 Document::add_value(Xapian::valueno slot, const string &value)
00100 {
00101 LOGCALL_VOID(API, "Document::add_value", slot | value);
00102 internal->add_value(slot, value);
00103 }
00104
00105 void
00106 Document::remove_value(Xapian::valueno slot)
00107 {
00108 LOGCALL_VOID(API, "Document::remove_value", slot);
00109 internal->remove_value(slot);
00110 }
00111
00112 void
00113 Document::clear_values()
00114 {
00115 LOGCALL_VOID(API, "Document::clear_values", NO_ARGS);
00116 internal->clear_values();
00117 }
00118
00119 void
00120 Document::add_posting(const string & tname,
00121 Xapian::termpos tpos,
00122 Xapian::termcount wdfinc)
00123 {
00124 LOGCALL_VOID(API, "Document::add_posting", tname | tpos | wdfinc);
00125 if (tname.empty()) {
00126 throw InvalidArgumentError("Empty termnames aren't allowed.");
00127 }
00128 internal->add_posting(tname, tpos, wdfinc);
00129 }
00130
00131 void
00132 Document::add_term(const string & tname, Xapian::termcount wdfinc)
00133 {
00134 LOGCALL_VOID(API, "Document::add_term", tname | wdfinc);
00135 if (tname.empty()) {
00136 throw InvalidArgumentError("Empty termnames aren't allowed.");
00137 }
00138 internal->add_term(tname, wdfinc);
00139 }
00140
00141 void
00142 Document::remove_posting(const string & tname, Xapian::termpos tpos,
00143 Xapian::termcount wdfdec)
00144 {
00145 LOGCALL_VOID(API, "Document::remove_posting", tname | tpos | wdfdec);
00146 if (tname.empty()) {
00147 throw InvalidArgumentError("Empty termnames aren't allowed.");
00148 }
00149 internal->remove_posting(tname, tpos, wdfdec);
00150 }
00151
00152 void
00153 Document::remove_term(const string & tname)
00154 {
00155 LOGCALL_VOID(API, "Document::remove_term", tname);
00156 internal->remove_term(tname);
00157 }
00158
00159 void
00160 Document::clear_terms()
00161 {
00162 LOGCALL_VOID(API, "Document::clear_terms", NO_ARGS);
00163 internal->clear_terms();
00164 }
00165
00166 Xapian::termcount
00167 Document::termlist_count() const {
00168 LOGCALL(API, Xapian::termcount, "Document::termlist_count", NO_ARGS);
00169 RETURN(internal->termlist_count());
00170 }
00171
00172 TermIterator
00173 Document::termlist_begin() const
00174 {
00175 LOGCALL(API, TermIterator, "Document::termlist_begin", NO_ARGS);
00176 RETURN(TermIterator(internal->open_term_list()));
00177 }
00178
00179 Xapian::termcount
00180 Document::values_count() const {
00181 LOGCALL(API, Xapian::termcount, "Document::values_count", NO_ARGS);
00182 RETURN(internal->values_count());
00183 }
00184
00185 ValueIterator
00186 Document::values_begin() const
00187 {
00188 LOGCALL(API, ValueIterator, "Document::values_begin", NO_ARGS);
00189
00190
00191 if (internal->values_count() == 0) RETURN(ValueIterator());
00192 RETURN(ValueIterator(new DocumentValueList(internal)));
00193 }
00194
00195 docid
00196 Document::get_docid() const
00197 {
00198 LOGCALL(API, docid, "Document::get_docid", NO_ARGS);
00199 RETURN(internal->get_docid());
00200 }
00201
00202 std::string
00203 Document::serialise() const
00204 {
00205 LOGCALL(API, std::string, "Document::serialise", NO_ARGS);
00206 return serialise_document(*this);
00207 }
00208
00209 Document
00210 Document::unserialise(const std::string &s)
00211 {
00212 LOGCALL_STATIC(API, Document, "Document::unserialise", s);
00213 return unserialise_document(s);
00214 }
00215
00216 }
00217
00219
00220 void
00221 OmDocumentTerm::add_position(Xapian::termpos tpos)
00222 {
00223 LOGCALL_VOID(DB, "OmDocumentTerm::add_position", tpos);
00224
00225
00226
00227 if (positions.empty() || tpos > positions.back()) {
00228 positions.push_back(tpos);
00229 return;
00230 }
00231
00232
00233
00234 vector<Xapian::termpos>::iterator i;
00235 i = lower_bound(positions.begin(), positions.end(), tpos);
00236 if (i == positions.end() || *i != tpos) {
00237 positions.insert(i, tpos);
00238 }
00239 }
00240
00241 void
00242 OmDocumentTerm::remove_position(Xapian::termpos tpos)
00243 {
00244 LOGCALL_VOID(DB, "OmDocumentTerm::remove_position", tpos);
00245
00246
00247
00248 vector<Xapian::termpos>::iterator i;
00249 i = lower_bound(positions.begin(), positions.end(), tpos);
00250 if (i == positions.end() || *i != tpos) {
00251 throw Xapian::InvalidArgumentError("Position `" + str(tpos) +
00252 "' not found in list of positions that `" +
00253 tname +
00254 "' occurs at,"
00255 " when removing position from list");
00256 }
00257 positions.erase(i);
00258 }
00259
00260 string
00261 OmDocumentTerm::get_description() const
00262 {
00263 string description;
00264
00265 description = "OmDocumentTerm(" + tname +
00266 ", wdf = " + str(wdf) +
00267 ", positions[" + str(positions.size()) + "]" +
00268 ")";
00269 return description;
00270 }
00271
00272 string
00273 Xapian::Document::Internal::get_value(Xapian::valueno slot) const
00274 {
00275 if (values_here) {
00276 map<Xapian::valueno, string>::const_iterator i;
00277 i = values.find(slot);
00278 if (i == values.end()) return string();
00279 return i->second;
00280 }
00281 if (!database.get()) return string();
00282 return do_get_value(slot);
00283 }
00284
00285 string
00286 Xapian::Document::Internal::get_data() const
00287 {
00288 if (data_here) return data;
00289 if (!database.get()) return string();
00290 return do_get_data();
00291 }
00292
00293 void
00294 Xapian::Document::Internal::set_data(const string &data_)
00295 {
00296 data = data_;
00297 data_here = true;
00298 }
00299
00300 TermList *
00301 Xapian::Document::Internal::open_term_list() const
00302 {
00303 LOGCALL(DB, TermList *, "Document::Internal::open_term_list", NO_ARGS);
00304 if (terms_here) {
00305 RETURN(new MapTermList(terms.begin(), terms.end()));
00306 }
00307 if (!database.get()) RETURN(NULL);
00308 RETURN(database->open_term_list(did));
00309 }
00310
00311 void
00312 Xapian::Document::Internal::add_value(Xapian::valueno slot, const string &value)
00313 {
00314 need_values();
00315 if (!value.empty()) {
00316 values[slot] = value;
00317 } else {
00318
00319
00320 values.erase(slot);
00321 }
00322 }
00323
00324 void
00325 Xapian::Document::Internal::remove_value(Xapian::valueno slot)
00326 {
00327 need_values();
00328 map<Xapian::valueno, string>::iterator i = values.find(slot);
00329 if (i == values.end()) {
00330 throw Xapian::InvalidArgumentError("Value #" + str(slot) +
00331 " is not present in document, in "
00332 "Xapian::Document::Internal::remove_value()");
00333 }
00334 values.erase(i);
00335 }
00336
00337 void
00338 Xapian::Document::Internal::clear_values()
00339 {
00340 values.clear();
00341 values_here = true;
00342 }
00343
00344 void
00345 Xapian::Document::Internal::add_posting(const string & tname, Xapian::termpos tpos,
00346 Xapian::termcount wdfinc)
00347 {
00348 need_terms();
00349 positions_modified = true;
00350
00351 map<string, OmDocumentTerm>::iterator i;
00352 i = terms.find(tname);
00353 if (i == terms.end()) {
00354 OmDocumentTerm newterm(tname, wdfinc);
00355 newterm.add_position(tpos);
00356 terms.insert(make_pair(tname, newterm));
00357 } else {
00358 i->second.add_position(tpos);
00359 if (wdfinc) i->second.inc_wdf(wdfinc);
00360 }
00361 }
00362
00363 void
00364 Xapian::Document::Internal::add_term(const string & tname, Xapian::termcount wdfinc)
00365 {
00366 need_terms();
00367
00368 map<string, OmDocumentTerm>::iterator i;
00369 i = terms.find(tname);
00370 if (i == terms.end()) {
00371 OmDocumentTerm newterm(tname, wdfinc);
00372 terms.insert(make_pair(tname, newterm));
00373 } else {
00374 if (wdfinc) i->second.inc_wdf(wdfinc);
00375 }
00376 }
00377
00378 void
00379 Xapian::Document::Internal::remove_posting(const string & tname,
00380 Xapian::termpos tpos,
00381 Xapian::termcount wdfdec)
00382 {
00383 need_terms();
00384
00385 map<string, OmDocumentTerm>::iterator i;
00386 i = terms.find(tname);
00387 if (i == terms.end()) {
00388 throw Xapian::InvalidArgumentError("Term `" + tname +
00389 "' is not present in document, in "
00390 "Xapian::Document::Internal::remove_posting()");
00391 }
00392 i->second.remove_position(tpos);
00393 if (wdfdec) i->second.dec_wdf(wdfdec);
00394 positions_modified = true;
00395 }
00396
00397 void
00398 Xapian::Document::Internal::remove_term(const string & tname)
00399 {
00400 need_terms();
00401 map<string, OmDocumentTerm>::iterator i;
00402 i = terms.find(tname);
00403 if (i == terms.end()) {
00404 throw Xapian::InvalidArgumentError("Term `" + tname +
00405 "' is not present in document, in "
00406 "Xapian::Document::Internal::remove_term()");
00407 }
00408 positions_modified = !i->second.positions.empty();
00409 terms.erase(i);
00410 }
00411
00412 void
00413 Xapian::Document::Internal::clear_terms()
00414 {
00415 terms.clear();
00416 terms_here = true;
00417
00418
00419 positions_modified = true;
00420 }
00421
00422 Xapian::termcount
00423 Xapian::Document::Internal::termlist_count() const
00424 {
00425 if (!terms_here) {
00426
00427
00428 need_terms();
00429 }
00430 Assert(terms_here);
00431 return terms.size();
00432 }
00433
00434 void
00435 Xapian::Document::Internal::need_terms() const
00436 {
00437 if (terms_here) return;
00438 if (database.get()) {
00439 Xapian::TermIterator t(database->open_term_list(did));
00440 Xapian::TermIterator tend(NULL);
00441 for ( ; t != tend; ++t) {
00442 Xapian::PositionIterator p = t.positionlist_begin();
00443 Xapian::PositionIterator pend = t.positionlist_end();
00444 OmDocumentTerm term(*t, t.get_wdf());
00445 for ( ; p != pend; ++p) {
00446 term.add_position(*p);
00447 }
00448 terms.insert(make_pair(*t, term));
00449 }
00450 }
00451 terms_here = true;
00452 }
00453
00454 Xapian::valueno
00455 Xapian::Document::Internal::values_count() const
00456 {
00457 LOGCALL(DB, Xapian::valueno, "Document::Internal::values_count", NO_ARGS);
00458 need_values();
00459 Assert(values_here);
00460 RETURN(values.size());
00461 }
00462
00463 string
00464 Xapian::Document::Internal::get_description() const
00465 {
00466 string description = "Xapian::Document::Internal(";
00467
00468 if (data_here) description += "data=`" + data + "'";
00469
00470 if (values_here) {
00471 if (data_here) description += ", ";
00472 description += "values[" + str(values.size()) + "]";
00473 }
00474
00475 if (terms_here) {
00476 if (data_here || values_here) description += ", ";
00477 description += "terms[" + str(terms.size()) + "]";
00478 }
00479
00480 if (database.get()) {
00481 if (data_here || values_here || terms_here) description += ", ";
00482 description += "doc=";
00483 description += "?";
00484 }
00485
00486 description += ')';
00487
00488 return description;
00489 }
00490
00491 void
00492 Xapian::Document::Internal::need_values() const
00493 {
00494 if (!values_here) {
00495 if (database.get()) {
00496 Assert(values.empty());
00497 do_get_all_values(values);
00498 }
00499 values_here = true;
00500 }
00501 }
00502
00503 Xapian::Document::Internal::~Internal()
00504 {
00505 if (database.get())
00506 database->invalidate_doc_object(this);
00507 }