00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <config.h>
00025
00026 #include "inmemory_database.h"
00027
00028 #include "debuglog.h"
00029
00030 #include "expandweight.h"
00031 #include "inmemory_document.h"
00032 #include "inmemory_alltermslist.h"
00033 #include "str.h"
00034 #include "valuestats.h"
00035
00036 #include <string>
00037 #include <vector>
00038 #include <map>
00039
00040 #include <xapian/error.h>
00041 #include <xapian/valueiterator.h>
00042
00043 using std::make_pair;
00044
00045 inline void
00046 InMemoryTerm::add_posting(const InMemoryPosting & post)
00047 {
00048
00049 vector<InMemoryPosting>::iterator p;
00050 p = lower_bound(docs.begin(), docs.end(),
00051 post, InMemoryPostingLessThan());
00052 if (p == docs.end() || InMemoryPostingLessThan()(post, *p)) {
00053 docs.insert(p, post);
00054 } else if (!p->valid) {
00055 *p = post;
00056 } else {
00057 (*p).merge(post);
00058 }
00059 }
00060
00061 inline void
00062 InMemoryDoc::add_posting(const InMemoryTermEntry & post)
00063 {
00064
00065 vector<InMemoryTermEntry>::iterator p;
00066 p = lower_bound(terms.begin(), terms.end(),
00067 post, InMemoryTermEntryLessThan());
00068 if (p == terms.end() || InMemoryTermEntryLessThan()(post, *p)) {
00069 terms.insert(p, post);
00070 } else {
00071 (*p).merge(post);
00072 }
00073 }
00074
00076
00078
00079 InMemoryPostList::InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_,
00080 const InMemoryTerm & imterm,
00081 const std::string & term_)
00082 : LeafPostList(term_),
00083 pos(imterm.docs.begin()),
00084 end(imterm.docs.end()),
00085 termfreq(imterm.term_freq),
00086 started(false),
00087 db(db_)
00088 {
00089 while (pos != end && !pos->valid) ++pos;
00090 }
00091
00092 Xapian::doccount
00093 InMemoryPostList::get_termfreq() const
00094 {
00095 return termfreq;
00096 }
00097
00098 Xapian::docid
00099 InMemoryPostList::get_docid() const
00100 {
00101 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00102 Assert(started);
00103 Assert(!at_end());
00104 return (*pos).did;
00105 }
00106
00107 PostList *
00108 InMemoryPostList::next(Xapian::weight )
00109 {
00110 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00111 if (started) {
00112 Assert(!at_end());
00113 ++pos;
00114 while (pos != end && !pos->valid) ++pos;
00115 } else {
00116 started = true;
00117 }
00118 return NULL;
00119 }
00120
00121 PostList *
00122 InMemoryPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
00123 {
00124 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00125
00126
00127
00128
00129
00130
00131 started = true;
00132 Assert(!at_end());
00133 while (!at_end() && (*pos).did < did) {
00134 (void) next(w_min);
00135 }
00136 return NULL;
00137 }
00138
00139 bool
00140 InMemoryPostList::at_end() const
00141 {
00142 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00143 return (pos == end);
00144 }
00145
00146 string
00147 InMemoryPostList::get_description() const
00148 {
00149 return "InMemoryPostList " + str(termfreq);
00150 }
00151
00152 Xapian::termcount
00153 InMemoryPostList::get_doclength() const
00154 {
00155 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00156 return db->get_doclength(get_docid());
00157 }
00158
00159 PositionList *
00160 InMemoryPostList::read_position_list()
00161 {
00162 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00163 mypositions.set_data(pos->positions);
00164 return &mypositions;
00165 }
00166
00167 PositionList *
00168 InMemoryPostList::open_position_list() const
00169 {
00170 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00171 return new InMemoryPositionList(pos->positions);
00172 }
00173
00174 Xapian::termcount
00175 InMemoryPostList::get_wdf() const
00176 {
00177 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00178 return (*pos).wdf;
00179 }
00180
00182
00184
00185 InMemoryTermList::InMemoryTermList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_,
00186 Xapian::docid did_,
00187 const InMemoryDoc & doc,
00188 Xapian::termcount len)
00189 : pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
00190 started(false), db(db_), did(did_), document_length(len)
00191 {
00192 LOGLINE(DB, "InMemoryTermList::InMemoryTermList(): " <<
00193 terms << " terms starting from " << pos->tname);
00194 }
00195
00196 Xapian::termcount
00197 InMemoryTermList::get_wdf() const
00198 {
00199 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00200 Assert(started);
00201 Assert(!at_end());
00202 return (*pos).wdf;
00203 }
00204
00205 Xapian::doccount
00206 InMemoryTermList::get_termfreq() const
00207 {
00208 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00209 Assert(started);
00210 Assert(!at_end());
00211
00212 return db->get_termfreq((*pos).tname);
00213 }
00214
00215 Xapian::termcount
00216 InMemoryTermList::get_approx_size() const
00217 {
00218 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00219 return terms;
00220 }
00221
00222 void
00223 InMemoryTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
00224 {
00225 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00226 Assert(started);
00227 Assert(!at_end());
00228 stats.accumulate(InMemoryTermList::get_wdf(), document_length,
00229 InMemoryTermList::get_termfreq(),
00230 db->get_doccount());
00231 }
00232
00233 string
00234 InMemoryTermList::get_termname() const
00235 {
00236 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00237 Assert(started);
00238 Assert(!at_end());
00239 return (*pos).tname;
00240 }
00241
00242 TermList *
00243 InMemoryTermList::next()
00244 {
00245 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00246 if (started) {
00247 Assert(!at_end());
00248 ++pos;
00249 } else {
00250 started = true;
00251 }
00252 return NULL;
00253 }
00254
00255 TermList *
00256 InMemoryTermList::skip_to(const string & term)
00257 {
00258 if (rare(db->is_closed()))
00259 InMemoryDatabase::throw_database_closed();
00260
00261 while (pos != end && pos->tname < term) {
00262 ++pos;
00263 }
00264
00265 started = true;
00266 return NULL;
00267 }
00268
00269 bool
00270 InMemoryTermList::at_end() const
00271 {
00272 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00273 Assert(started);
00274 return (pos == end);
00275 }
00276
00277 Xapian::termcount
00278 InMemoryTermList::positionlist_count() const
00279 {
00280 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00281 return db->positionlist_count(did, (*pos).tname);
00282 }
00283
00284 Xapian::PositionIterator
00285 InMemoryTermList::positionlist_begin() const
00286 {
00287 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00288 return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname));
00289 }
00290
00292
00294
00295 InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_)
00296 : LeafPostList(std::string()), did(0), db(db_)
00297 {
00298 }
00299
00300 Xapian::doccount
00301 InMemoryAllDocsPostList::get_termfreq() const
00302 {
00303 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00304 return db->totdocs;
00305 }
00306
00307 Xapian::docid
00308 InMemoryAllDocsPostList::get_docid() const
00309 {
00310 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00311 Assert(did > 0);
00312 Assert(did <= db->termlists.size());
00313 Assert(db->termlists[did - 1].is_valid);
00314 return did;
00315 }
00316
00317 Xapian::termcount
00318 InMemoryAllDocsPostList::get_doclength() const
00319 {
00320 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00321 return db->get_doclength(did);
00322 }
00323
00324 Xapian::termcount
00325 InMemoryAllDocsPostList::get_wdf() const
00326 {
00327 return 1;
00328 }
00329
00330 PositionList *
00331 InMemoryAllDocsPostList::read_position_list()
00332 {
00333 throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
00334 }
00335
00336 PositionList *
00337 InMemoryAllDocsPostList::open_position_list() const
00338 {
00339 throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
00340 }
00341
00342 PostList *
00343 InMemoryAllDocsPostList::next(Xapian::weight )
00344 {
00345 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00346 Assert(!at_end());
00347 do {
00348 ++did;
00349 } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
00350 return NULL;
00351 }
00352
00353 PostList *
00354 InMemoryAllDocsPostList::skip_to(Xapian::docid did_, Xapian::weight )
00355 {
00356 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00357 Assert(!at_end());
00358 if (did <= did_) {
00359 did = did_;
00360 while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
00361 ++did;
00362 }
00363 }
00364 return NULL;
00365 }
00366
00367 bool
00368 InMemoryAllDocsPostList::at_end() const
00369 {
00370 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
00371 return (did > db->termlists.size());
00372 }
00373
00374 string
00375 InMemoryAllDocsPostList::get_description() const
00376 {
00377 return "InMemoryAllDocsPostList " + str(did);
00378 }
00379
00381
00383
00384 InMemoryDatabase::InMemoryDatabase()
00385 : totdocs(0), totlen(0), positions_present(false), closed(false)
00386 {
00387
00388 transaction_state = TRANSACTION_UNIMPLEMENTED;
00389
00390
00391
00392 postlists.insert(make_pair(string(), InMemoryTerm()));
00393 }
00394
00395 InMemoryDatabase::~InMemoryDatabase()
00396 {
00397 dtor_called();
00398 }
00399
00400 void
00401 InMemoryDatabase::reopen()
00402 {
00403 if (closed) InMemoryDatabase::throw_database_closed();
00404 }
00405
00406 void
00407 InMemoryDatabase::close()
00408 {
00409
00410 postlists.clear();
00411 termlists.clear();
00412 doclists.clear();
00413 valuelists.clear();
00414 valuestats.clear();
00415 doclengths.clear();
00416 metadata.clear();
00417 closed = true;
00418 }
00419
00420 LeafPostList *
00421 InMemoryDatabase::open_post_list(const string & tname) const
00422 {
00423 if (closed) InMemoryDatabase::throw_database_closed();
00424 if (tname.empty()) {
00425 Xapian::Internal::RefCntPtr<const InMemoryDatabase> ptrtothis(this);
00426 return new InMemoryAllDocsPostList(ptrtothis);
00427 }
00428 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
00429 if (i == postlists.end() || i->second.term_freq == 0) {
00430 i = postlists.begin();
00431
00432 Assert(i->first.empty());
00433 }
00434 Xapian::Internal::RefCntPtr<const InMemoryDatabase> ptrtothis(this);
00435 return new InMemoryPostList(ptrtothis, i->second, tname);
00436 }
00437
00438 bool
00439 InMemoryDatabase::doc_exists(Xapian::docid did) const
00440 {
00441 if (closed) InMemoryDatabase::throw_database_closed();
00442 return (did > 0 && did <= termlists.size() && termlists[did - 1].is_valid);
00443 }
00444
00445 Xapian::doccount
00446 InMemoryDatabase::get_termfreq(const string & tname) const
00447 {
00448 if (closed) InMemoryDatabase::throw_database_closed();
00449 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
00450 if (i == postlists.end()) return 0;
00451 return i->second.term_freq;
00452 }
00453
00454 Xapian::termcount
00455 InMemoryDatabase::get_collection_freq(const string &tname) const
00456 {
00457 if (closed) InMemoryDatabase::throw_database_closed();
00458 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
00459 if (i == postlists.end()) return 0;
00460 return i->second.collection_freq;
00461 }
00462
00463 Xapian::doccount
00464 InMemoryDatabase::get_value_freq(Xapian::valueno slot) const
00465 {
00466 if (closed) InMemoryDatabase::throw_database_closed();
00467 map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
00468 if (i == valuestats.end()) return 0;
00469 return i->second.freq;
00470 }
00471
00472 std::string
00473 InMemoryDatabase::get_value_lower_bound(Xapian::valueno slot) const
00474 {
00475 if (closed) InMemoryDatabase::throw_database_closed();
00476 map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
00477 if (i == valuestats.end()) return string();
00478 return i->second.lower_bound;
00479 }
00480
00481 std::string
00482 InMemoryDatabase::get_value_upper_bound(Xapian::valueno slot) const
00483 {
00484 if (closed) InMemoryDatabase::throw_database_closed();
00485 map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
00486 if (i == valuestats.end()) return string();
00487 return i->second.upper_bound;
00488 }
00489
00490 Xapian::doccount
00491 InMemoryDatabase::get_doccount() const
00492 {
00493 if (closed) InMemoryDatabase::throw_database_closed();
00494 return totdocs;
00495 }
00496
00497 Xapian::docid
00498 InMemoryDatabase::get_lastdocid() const
00499 {
00500 if (closed) InMemoryDatabase::throw_database_closed();
00501 return termlists.size();
00502 }
00503
00504 totlen_t
00505 InMemoryDatabase::get_total_length() const
00506 {
00507 return totlen;
00508 }
00509
00510 Xapian::doclength
00511 InMemoryDatabase::get_avlength() const
00512 {
00513 if (closed) InMemoryDatabase::throw_database_closed();
00514 if (totdocs == 0) return 0;
00515 return Xapian::doclength(totlen) / totdocs;
00516 }
00517
00518 Xapian::termcount
00519 InMemoryDatabase::get_doclength(Xapian::docid did) const
00520 {
00521 if (closed) InMemoryDatabase::throw_database_closed();
00522 if (!doc_exists(did)) {
00523 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
00524 string(" not found"));
00525 }
00526 return doclengths[did - 1];
00527 }
00528
00529 TermList *
00530 InMemoryDatabase::open_term_list(Xapian::docid did) const
00531 {
00532 if (closed) InMemoryDatabase::throw_database_closed();
00533 Assert(did != 0);
00534 if (!doc_exists(did)) {
00535
00536 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
00537 string(" not found"));
00538 }
00539 return new InMemoryTermList(Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this), did,
00540 termlists[did - 1], doclengths[did - 1]);
00541 }
00542
00543 Xapian::Document::Internal *
00544 InMemoryDatabase::open_document(Xapian::docid did, bool lazy) const
00545 {
00546 if (closed) InMemoryDatabase::throw_database_closed();
00547 Assert(did != 0);
00548 if (!doc_exists(did)) {
00549 if (lazy) return NULL;
00550
00551 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
00552 string(" not found"));
00553 }
00554 return new InMemoryDocument(this, did);
00555 }
00556
00557 std::string
00558 InMemoryDatabase::get_metadata(const std::string & key) const
00559 {
00560 if (closed) InMemoryDatabase::throw_database_closed();
00561 map<string, string>::const_iterator i = metadata.find(key);
00562 if (i == metadata.end())
00563 return string();
00564 return i->second;
00565 }
00566
00567 TermList *
00568 InMemoryDatabase::open_metadata_keylist(const string &) const
00569 {
00570 if (!closed && metadata.empty()) return NULL;
00571
00572 throw Xapian::UnimplementedError("InMemory backend doesn't currently implement Database::metadata_keys_begin()");
00573 }
00574
00575 void
00576 InMemoryDatabase::set_metadata(const std::string & key,
00577 const std::string & value)
00578 {
00579 if (closed) InMemoryDatabase::throw_database_closed();
00580 if (!value.empty()) {
00581 metadata[key] = value;
00582 } else {
00583 metadata.erase(key);
00584 }
00585 }
00586
00587 Xapian::termcount
00588 InMemoryDatabase::positionlist_count(Xapian::docid did,
00589 const string & tname) const
00590 {
00591 if (closed) InMemoryDatabase::throw_database_closed();
00592 if (!doc_exists(did)) {
00593 return 0;
00594 }
00595 const InMemoryDoc &doc = termlists[did-1];
00596
00597 vector<InMemoryTermEntry>::const_iterator i;
00598 for (i = doc.terms.begin(); i != doc.terms.end(); ++i) {
00599 if (i->tname == tname) {
00600 return i->positions.size();
00601 }
00602 }
00603 return 0;
00604 }
00605
00606 PositionList *
00607 InMemoryDatabase::open_position_list(Xapian::docid did,
00608 const string & tname) const
00609 {
00610 if (closed) InMemoryDatabase::throw_database_closed();
00611 if (usual(doc_exists(did))) {
00612 const InMemoryDoc &doc = termlists[did-1];
00613
00614 vector<InMemoryTermEntry>::const_iterator i;
00615 for (i = doc.terms.begin(); i != doc.terms.end(); ++i) {
00616 if (i->tname == tname) {
00617 return new InMemoryPositionList(i->positions);
00618 }
00619 }
00620 }
00621 return new InMemoryPositionList(false);
00622 }
00623
00624 void
00625 InMemoryDatabase::add_values(Xapian::docid did,
00626 const map<Xapian::valueno, string> &values_)
00627 {
00628 if (closed) InMemoryDatabase::throw_database_closed();
00629 if (did > valuelists.size()) {
00630 valuelists.resize(did);
00631 }
00632 valuelists[did-1] = values_;
00633
00634
00635 map<Xapian::valueno, string>::const_iterator j;
00636 for (j = values_.begin(); j != values_.end(); ++j) {
00637 std::pair<map<Xapian::valueno, ValueStats>::iterator, bool> i;
00638 i = valuestats.insert(make_pair(j->first, ValueStats()));
00639
00640
00641 if ((i.first->second.freq)++ == 0) {
00642
00643
00644 i.first->second.lower_bound = j->second;
00645 i.first->second.upper_bound = j->second;
00646 } else {
00647
00648 if (j->second < i.first->second.lower_bound) {
00649 i.first->second.lower_bound = j->second;
00650 }
00651 if (j->second > i.first->second.upper_bound) {
00652 i.first->second.upper_bound = j->second;
00653 }
00654 }
00655 }
00656 }
00657
00658
00659 void
00660 InMemoryDatabase::commit()
00661 {
00662 }
00663
00664
00665 void
00666 InMemoryDatabase::cancel()
00667 {
00668 }
00669
00670 void
00671 InMemoryDatabase::delete_document(Xapian::docid did)
00672 {
00673 if (closed) InMemoryDatabase::throw_database_closed();
00674 if (!doc_exists(did)) {
00675 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
00676 string(" not found"));
00677 }
00678 termlists[did-1].is_valid = false;
00679 doclists[did-1] = string();
00680 map<Xapian::valueno, string>::const_iterator j;
00681 for (j = valuelists[did-1].begin(); j != valuelists[did-1].end(); ++j) {
00682 map<Xapian::valueno, ValueStats>::iterator i;
00683 i = valuestats.find(j->first);
00684 if (--(i->second.freq) == 0) {
00685 i->second.lower_bound.resize(0);
00686 i->second.upper_bound.resize(0);
00687 }
00688 }
00689 valuelists[did-1].clear();
00690
00691 totlen -= doclengths[did-1];
00692 doclengths[did-1] = 0;
00693 totdocs--;
00694
00695
00696 if (totdocs == 0) positions_present = false;
00697
00698 vector<InMemoryTermEntry>::const_iterator i;
00699 for (i = termlists[did - 1].terms.begin();
00700 i != termlists[did - 1].terms.end();
00701 ++i) {
00702 map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
00703 Assert(t != postlists.end());
00704 t->second.collection_freq -= i->wdf;
00705 --t->second.term_freq;
00706 vector<InMemoryPosting>::iterator posting = t->second.docs.begin();
00707 while (posting != t->second.docs.end()) {
00708
00709
00710
00711 if (posting->did == did) posting->valid = false;
00712 ++posting;
00713 }
00714 }
00715 termlists[did-1].terms.clear();
00716 }
00717
00718 void
00719 InMemoryDatabase::replace_document(Xapian::docid did,
00720 const Xapian::Document & document)
00721 {
00722 LOGCALL_VOID(DB, "InMemoryDatabase::replace_document", did | document);
00723
00724 if (closed) InMemoryDatabase::throw_database_closed();
00725
00726 if (doc_exists(did)) {
00727 map<Xapian::valueno, string>::const_iterator j;
00728 for (j = valuelists[did-1].begin(); j != valuelists[did-1].end(); ++j) {
00729 map<Xapian::valueno, ValueStats>::iterator i;
00730 i = valuestats.find(j->first);
00731 if (--(i->second.freq) == 0) {
00732 i->second.lower_bound.resize(0);
00733 i->second.upper_bound.resize(0);
00734 }
00735 }
00736
00737 totlen -= doclengths[did - 1];
00738 totdocs--;
00739 } else if (did > termlists.size()) {
00740 termlists.resize(did);
00741 termlists[did - 1].is_valid = true;
00742 doclengths.resize(did);
00743 doclists.resize(did);
00744 valuelists.resize(did);
00745 } else {
00746 termlists[did - 1].is_valid = true;
00747 }
00748
00749 vector<InMemoryTermEntry>::const_iterator i;
00750 for (i = termlists[did - 1].terms.begin();
00751 i != termlists[did - 1].terms.end();
00752 ++i) {
00753 map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
00754 Assert(t != postlists.end());
00755 t->second.collection_freq -= i->wdf;
00756 --t->second.term_freq;
00757 vector<InMemoryPosting>::iterator posting = t->second.docs.begin();
00758 while (posting != t->second.docs.end()) {
00759
00760
00761
00762 if (posting->did == did) posting->valid = false;
00763 ++posting;
00764 }
00765 }
00766
00767 doclengths[did - 1] = 0;
00768 doclists[did - 1] = document.get_data();
00769
00770 finish_add_doc(did, document);
00771 }
00772
00773 Xapian::docid
00774 InMemoryDatabase::add_document(const Xapian::Document & document)
00775 {
00776 LOGCALL(DB, Xapian::docid, "InMemoryDatabase::add_document", document);
00777 if (closed) InMemoryDatabase::throw_database_closed();
00778
00779 Xapian::docid did = make_doc(document.get_data());
00780
00781 finish_add_doc(did, document);
00782
00783 RETURN(did);
00784 }
00785
00786 void
00787 InMemoryDatabase::finish_add_doc(Xapian::docid did, const Xapian::Document &document)
00788 {
00789 {
00790 map<Xapian::valueno, string> values;
00791 Xapian::ValueIterator k = document.values_begin();
00792 for ( ; k != document.values_end(); ++k) {
00793 values.insert(make_pair(k.get_valueno(), *k));
00794 LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding value " <<
00795 k.get_valueno() << " -> " << *k);
00796 }
00797 add_values(did, values);
00798 }
00799
00800 InMemoryDoc doc(true);
00801 Xapian::TermIterator i = document.termlist_begin();
00802 Xapian::TermIterator i_end = document.termlist_end();
00803 for ( ; i != i_end; ++i) {
00804 make_term(*i);
00805
00806 LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding term " << *i);
00807 Xapian::PositionIterator j = i.positionlist_begin();
00808 Xapian::PositionIterator j_end = i.positionlist_end();
00809
00810 if (j == j_end) {
00811
00812 make_posting(&doc, *i, did, 0, i.get_wdf(), false);
00813 } else {
00814 positions_present = true;
00815 for ( ; j != j_end; ++j) {
00816 make_posting(&doc, *i, did, *j, i.get_wdf());
00817 }
00818 }
00819
00820 Assert(did > 0 && did <= doclengths.size());
00821 doclengths[did - 1] += i.get_wdf();
00822 totlen += i.get_wdf();
00823 postlists[*i].collection_freq += i.get_wdf();
00824 ++postlists[*i].term_freq;
00825 }
00826 swap(termlists[did - 1], doc);
00827
00828 totdocs++;
00829 }
00830
00831 void
00832 InMemoryDatabase::make_term(const string & tname)
00833 {
00834 postlists[tname];
00835 }
00836
00837 Xapian::docid
00838 InMemoryDatabase::make_doc(const string & docdata)
00839 {
00840 termlists.push_back(InMemoryDoc(true));
00841 doclengths.push_back(0);
00842 doclists.push_back(docdata);
00843
00844 AssertEqParanoid(termlists.size(), doclengths.size());
00845
00846 return termlists.size();
00847 }
00848
00849 void InMemoryDatabase::make_posting(InMemoryDoc * doc,
00850 const string & tname,
00851 Xapian::docid did,
00852 Xapian::termpos position,
00853 Xapian::termcount wdf,
00854 bool use_position)
00855 {
00856 Assert(doc);
00857 Assert(postlists.find(tname) != postlists.end());
00858 Assert(did > 0 && did <= termlists.size());
00859 Assert(did > 0 && did <= doclengths.size());
00860 Assert(doc_exists(did));
00861
00862
00863 InMemoryPosting posting;
00864 posting.did = did;
00865 if (use_position) {
00866 posting.positions.push_back(position);
00867 }
00868 posting.wdf = wdf;
00869 posting.valid = true;
00870
00871
00872 postlists[tname].add_posting(posting);
00873
00874
00875 InMemoryTermEntry termentry;
00876 termentry.tname = tname;
00877 if (use_position) {
00878 termentry.positions.push_back(position);
00879 }
00880 termentry.wdf = wdf;
00881
00882
00883 doc->add_posting(termentry);
00884 }
00885
00886 bool
00887 InMemoryDatabase::term_exists(const string & tname) const
00888 {
00889 if (closed) InMemoryDatabase::throw_database_closed();
00890 Assert(!tname.empty());
00891 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
00892 if (i == postlists.end()) return false;
00893 return (i->second.term_freq != 0);
00894 }
00895
00896 bool
00897 InMemoryDatabase::has_positions() const
00898 {
00899 if (closed) InMemoryDatabase::throw_database_closed();
00900 return positions_present;
00901 }
00902
00903 TermList *
00904 InMemoryDatabase::open_allterms(const string & prefix) const
00905 {
00906 if (closed) InMemoryDatabase::throw_database_closed();
00907 return new InMemoryAllTermsList(&postlists,
00908 Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this),
00909 prefix);
00910 }
00911
00912 void
00913 InMemoryDatabase::throw_database_closed()
00914 {
00915 throw Xapian::DatabaseError("Database has been closed");
00916 }