00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "brass_postlist.h"
00026
00027 #include "brass_cursor.h"
00028 #include "brass_database.h"
00029 #include "debuglog.h"
00030 #include "noreturn.h"
00031 #include "pack.h"
00032 #include "str.h"
00033
00034 Xapian::doccount
00035 BrassPostListTable::get_termfreq(const string & term) const
00036 {
00037 string key = make_key(term);
00038 string tag;
00039 if (!get_exact_entry(key, tag)) return 0;
00040
00041 Xapian::doccount termfreq;
00042 const char * p = tag.data();
00043 BrassPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
00044 return termfreq;
00045 }
00046
00047 Xapian::termcount
00048 BrassPostListTable::get_collection_freq(const string & term) const
00049 {
00050 string key = make_key(term);
00051 string tag;
00052 if (!get_exact_entry(key, tag)) return 0;
00053
00054 Xapian::termcount collfreq;
00055 const char * p = tag.data();
00056 BrassPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
00057 return collfreq;
00058 }
00059
00060 Xapian::termcount
00061 BrassPostListTable::get_doclength(Xapian::docid did,
00062 Xapian::Internal::RefCntPtr<const BrassDatabase> db) const {
00063 if (!doclen_pl.get()) {
00064
00065
00066 doclen_pl.reset(new BrassPostList(db, string(), false));
00067 }
00068 if (!doclen_pl->jump_to(did))
00069 throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
00070 return doclen_pl->get_wdf();
00071 }
00072
00073 bool
00074 BrassPostListTable::document_exists(Xapian::docid did,
00075 Xapian::Internal::RefCntPtr<const BrassDatabase> db) const
00076 {
00077 if (!doclen_pl.get()) {
00078
00079
00080 doclen_pl.reset(new BrassPostList(db, string(), false));
00081 }
00082 return (doclen_pl->jump_to(did));
00083 }
00084
00085
00086
00087
00088
00089
00090 const unsigned int CHUNKSIZE = 2000;
00091
00098 class Brass::PostlistChunkWriter {
00099 public:
00100 PostlistChunkWriter(const string &orig_key_,
00101 bool is_first_chunk_,
00102 const string &tname_,
00103 bool is_last_chunk_);
00104
00106 void append(BrassTable * table, Xapian::docid did,
00107 Xapian::termcount wdf);
00108
00110 void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
00111 const string & s) {
00112 Assert(!started);
00113 first_did = first_did_;
00114 current_did = current_did_;
00115 if (!s.empty()) {
00116 chunk.append(s);
00117 started = true;
00118 }
00119 }
00120
00125 void flush(BrassTable *table);
00126
00127 private:
00128 string orig_key;
00129 string tname;
00130 bool is_first_chunk;
00131 bool is_last_chunk;
00132 bool started;
00133
00134 Xapian::docid first_did;
00135 Xapian::docid current_did;
00136
00137 string chunk;
00138 };
00139
00140 using Brass::PostlistChunkWriter;
00141
00142
00143
00145 XAPIAN_NORETURN(static void report_read_error(const char * position));
00146 static void report_read_error(const char * position)
00147 {
00148 if (position == 0) {
00149
00150 LOGLINE(DB, "BrassPostList data ran out");
00151 throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
00152 }
00153
00154 LOGLINE(DB, "BrassPostList value too large");
00155 throw Xapian::RangeError("Value in posting list too large.");
00156 }
00157
00158 static inline bool get_tname_from_key(const char **src, const char *end,
00159 string &tname)
00160 {
00161 return unpack_string_preserving_sort(src, end, tname);
00162 }
00163
00164 static inline bool
00165 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
00166 {
00167 string tname_in_key;
00168
00169 if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
00170 *keypos += 2;
00171 } else {
00172
00173 if (!get_tname_from_key(keypos, keyend, tname_in_key))
00174 report_read_error(*keypos);
00175 }
00176
00177
00178 return tname_in_key == tname;
00179 }
00180
00181 static inline bool
00182 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
00183 {
00184 if (*keypos == keyend) return false;
00185
00186 return check_tname_in_key_lite(keypos, keyend, tname);
00187 }
00188
00190 static Xapian::docid
00191 read_start_of_first_chunk(const char ** posptr,
00192 const char * end,
00193 Xapian::doccount * number_of_entries_ptr,
00194 Xapian::termcount * collection_freq_ptr)
00195 {
00196 LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
00197
00198 BrassPostList::read_number_of_entries(posptr, end,
00199 number_of_entries_ptr, collection_freq_ptr);
00200 if (number_of_entries_ptr)
00201 LOGVALUE(DB, *number_of_entries_ptr);
00202 if (collection_freq_ptr)
00203 LOGVALUE(DB, *collection_freq_ptr);
00204
00205 Xapian::docid did;
00206
00207 if (!unpack_uint(posptr, end, &did))
00208 report_read_error(*posptr);
00209 ++did;
00210 LOGVALUE(DB, did);
00211 RETURN(did);
00212 }
00213
00214 static inline void
00215 read_did_increase(const char ** posptr, const char * end,
00216 Xapian::docid * did_ptr)
00217 {
00218 Xapian::docid did_increase;
00219 if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
00220 *did_ptr += did_increase + 1;
00221 }
00222
00224 static inline void
00225 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
00226 {
00227 if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
00228 }
00229
00231 static Xapian::docid
00232 read_start_of_chunk(const char ** posptr,
00233 const char * end,
00234 Xapian::docid first_did_in_chunk,
00235 bool * is_last_chunk_ptr)
00236 {
00237 LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
00238 Assert(is_last_chunk_ptr);
00239
00240
00241 if (!unpack_bool(posptr, end, is_last_chunk_ptr))
00242 report_read_error(*posptr);
00243 LOGVALUE(DB, *is_last_chunk_ptr);
00244
00245
00246 Xapian::docid increase_to_last;
00247 if (!unpack_uint(posptr, end, &increase_to_last))
00248 report_read_error(*posptr);
00249 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
00250 LOGVALUE(DB, last_did_in_chunk);
00251 RETURN(last_did_in_chunk);
00252 }
00253
00258 class Brass::PostlistChunkReader {
00259 string data;
00260
00261 const char *pos;
00262 const char *end;
00263
00264 bool at_end;
00265
00266 Xapian::docid did;
00267 Xapian::termcount wdf;
00268
00269 public:
00275 PostlistChunkReader(Xapian::docid first_did, const string & data_)
00276 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
00277 {
00278 if (!at_end) read_wdf(&pos, end, &wdf);
00279 }
00280
00281 Xapian::docid get_docid() const {
00282 return did;
00283 }
00284 Xapian::termcount get_wdf() const {
00285 return wdf;
00286 }
00287
00288 bool is_at_end() const {
00289 return at_end;
00290 }
00291
00294 void next();
00295 };
00296
00297 using Brass::PostlistChunkReader;
00298
00299 void
00300 PostlistChunkReader::next()
00301 {
00302 if (pos == end) {
00303 at_end = true;
00304 } else {
00305 read_did_increase(&pos, end, &did);
00306 read_wdf(&pos, end, &wdf);
00307 }
00308 }
00309
00310 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
00311 bool is_first_chunk_,
00312 const string &tname_,
00313 bool is_last_chunk_)
00314 : orig_key(orig_key_),
00315 tname(tname_), is_first_chunk(is_first_chunk_),
00316 is_last_chunk(is_last_chunk_),
00317 started(false)
00318 {
00319 LOGCALL_VOID(DB, "PostlistChunkWriter::PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
00320 }
00321
00322 void
00323 PostlistChunkWriter::append(BrassTable * table, Xapian::docid did,
00324 Xapian::termcount wdf)
00325 {
00326 if (!started) {
00327 started = true;
00328 first_did = did;
00329 } else {
00330 Assert(did > current_did);
00331
00332 if (chunk.size() >= CHUNKSIZE) {
00333 bool save_is_last_chunk = is_last_chunk;
00334 is_last_chunk = false;
00335 flush(table);
00336 is_last_chunk = save_is_last_chunk;
00337 is_first_chunk = false;
00338 first_did = did;
00339 chunk.resize(0);
00340 orig_key = BrassPostListTable::make_key(tname, first_did);
00341 } else {
00342 pack_uint(chunk, did - current_did - 1);
00343 }
00344 }
00345 current_did = did;
00346 pack_uint(chunk, wdf);
00347 }
00348
00351 static inline string
00352 make_start_of_first_chunk(Xapian::doccount entries,
00353 Xapian::termcount collectionfreq,
00354 Xapian::docid new_did)
00355 {
00356 string chunk;
00357 pack_uint(chunk, entries);
00358 pack_uint(chunk, collectionfreq);
00359 pack_uint(chunk, new_did - 1);
00360 return chunk;
00361 }
00362
00365 static inline string
00366 make_start_of_chunk(bool new_is_last_chunk,
00367 Xapian::docid new_first_did,
00368 Xapian::docid new_final_did)
00369 {
00370 Assert(new_final_did >= new_first_did);
00371 string chunk;
00372 pack_bool(chunk, new_is_last_chunk);
00373 pack_uint(chunk, new_final_did - new_first_did);
00374 return chunk;
00375 }
00376
00377 static void
00378 write_start_of_chunk(string & chunk,
00379 unsigned int start_of_chunk_header,
00380 unsigned int end_of_chunk_header,
00381 bool is_last_chunk,
00382 Xapian::docid first_did_in_chunk,
00383 Xapian::docid last_did_in_chunk)
00384 {
00385 Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
00386
00387 chunk.replace(start_of_chunk_header,
00388 end_of_chunk_header - start_of_chunk_header,
00389 make_start_of_chunk(is_last_chunk, first_did_in_chunk,
00390 last_did_in_chunk));
00391 }
00392
00393 void
00394 PostlistChunkWriter::flush(BrassTable *table)
00395 {
00396 LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406 if (!started) {
00407
00408
00409
00410
00411
00412
00413
00414
00415 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
00416 Assert(!orig_key.empty());
00417 if (is_first_chunk) {
00418 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
00419 if (is_last_chunk) {
00420
00421
00422
00423 table->del(orig_key);
00424 return;
00425 }
00426
00427
00428
00429
00430
00431
00432 AutoPtr<BrassCursor> cursor(table->cursor_get());
00433
00434 if (!cursor->find_entry(orig_key)) {
00435 throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447 Xapian::doccount num_ent;
00448 Xapian::termcount coll_freq;
00449 {
00450 cursor->read_tag();
00451 const char *tagpos = cursor->current_tag.data();
00452 const char *tagend = tagpos + cursor->current_tag.size();
00453
00454 (void)read_start_of_first_chunk(&tagpos, tagend,
00455 &num_ent, &coll_freq);
00456 }
00457
00458
00459 cursor->next();
00460 if (cursor->after_end()) {
00461 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00462 }
00463 const char *kpos = cursor->current_key.data();
00464 const char *kend = kpos + cursor->current_key.size();
00465 if (!check_tname_in_key(&kpos, kend, tname)) {
00466 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00467 }
00468
00469
00470 Xapian::docid new_first_did;
00471 if (!unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
00472 report_read_error(kpos);
00473 }
00474
00475 cursor->read_tag();
00476 const char *tagpos = cursor->current_tag.data();
00477 const char *tagend = tagpos + cursor->current_tag.size();
00478
00479
00480 bool new_is_last_chunk;
00481 Xapian::docid new_last_did_in_chunk =
00482 read_start_of_chunk(&tagpos, tagend, new_first_did,
00483 &new_is_last_chunk);
00484
00485 string chunk_data(tagpos, tagend);
00486
00487
00488 table->del(cursor->current_key);
00489
00490
00491 string tag;
00492 tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
00493 tag += make_start_of_chunk(new_is_last_chunk,
00494 new_first_did,
00495 new_last_did_in_chunk);
00496 tag += chunk_data;
00497 table->add(orig_key, tag);
00498 return;
00499 }
00500
00501 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
00502
00503
00504
00505 table->del(orig_key);
00506
00507 if (is_last_chunk) {
00508 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
00509
00510 AutoPtr<BrassCursor> cursor(table->cursor_get());
00511
00512
00513
00514 if (cursor->find_entry(orig_key)) {
00515 throw Xapian::DatabaseCorruptError("Brass key not deleted as we expected");
00516 }
00517
00518 const char * keypos = cursor->current_key.data();
00519 const char * keyend = keypos + cursor->current_key.size();
00520 if (!check_tname_in_key(&keypos, keyend, tname)) {
00521 throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
00522 }
00523
00524 bool is_prev_first_chunk = (keypos == keyend);
00525
00526
00527 cursor->read_tag();
00528 string tag = cursor->current_tag;
00529
00530 const char *tagpos = tag.data();
00531 const char *tagend = tagpos + tag.size();
00532
00533
00534 Xapian::docid first_did_in_chunk;
00535 if (is_prev_first_chunk) {
00536 first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
00537 0, 0);
00538 } else {
00539 if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
00540 report_read_error(keypos);
00541 }
00542 bool wrong_is_last_chunk;
00543 string::size_type start_of_chunk_header = tagpos - tag.data();
00544 Xapian::docid last_did_in_chunk =
00545 read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
00546 &wrong_is_last_chunk);
00547 string::size_type end_of_chunk_header = tagpos - tag.data();
00548
00549
00550 write_start_of_chunk(tag,
00551 start_of_chunk_header,
00552 end_of_chunk_header,
00553 true,
00554 first_did_in_chunk,
00555 last_did_in_chunk);
00556 table->add(cursor->current_key, tag);
00557 }
00558 } else {
00559 LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
00560
00561
00562
00563
00564
00565
00566 string tag;
00567
00568
00569
00570
00571 if (is_first_chunk) {
00572
00573
00574
00575 LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
00576 string key = BrassPostListTable::make_key(tname);
00577 bool ok = table->get_exact_entry(key, tag);
00578 (void)ok;
00579 Assert(ok);
00580 Assert(!tag.empty());
00581
00582 Xapian::doccount num_ent;
00583 Xapian::termcount coll_freq;
00584 {
00585 const char * tagpos = tag.data();
00586 const char * tagend = tagpos + tag.size();
00587 (void)read_start_of_first_chunk(&tagpos, tagend,
00588 &num_ent, &coll_freq);
00589 }
00590
00591 tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
00592
00593 tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
00594 tag += chunk;
00595 table->add(key, tag);
00596 return;
00597 }
00598
00599 LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611 const char *keypos = orig_key.data();
00612 const char *keyend = keypos + orig_key.size();
00613 if (!check_tname_in_key(&keypos, keyend, tname)) {
00614 throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
00615 }
00616 Xapian::docid initial_did;
00617 if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
00618 report_read_error(keypos);
00619 }
00620 string new_key;
00621 if (initial_did != first_did) {
00622
00623
00624
00625
00626 new_key = BrassPostListTable::make_key(tname, first_did);
00627 table->del(orig_key);
00628 } else {
00629 new_key = orig_key;
00630 }
00631
00632
00633 tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
00634
00635 tag += chunk;
00636 table->add(new_key, tag);
00637 }
00638 }
00639
00644 void BrassPostList::read_number_of_entries(const char ** posptr,
00645 const char * end,
00646 Xapian::doccount * number_of_entries_ptr,
00647 Xapian::termcount * collection_freq_ptr)
00648 {
00649 if (!unpack_uint(posptr, end, number_of_entries_ptr))
00650 report_read_error(*posptr);
00651 if (!unpack_uint(posptr, end, collection_freq_ptr))
00652 report_read_error(*posptr);
00653 }
00654
00674 BrassPostList::BrassPostList(Xapian::Internal::RefCntPtr<const BrassDatabase> this_db_,
00675 const string & term_,
00676 bool keep_reference)
00677 : LeafPostList(term_),
00678 this_db(keep_reference ? this_db_ : NULL),
00679 have_started(false),
00680 is_at_end(false),
00681 cursor(this_db_->postlist_table.cursor_get())
00682 {
00683 LOGCALL_VOID(DB, "BrassPostList::BrassPostList", this_db_.get() | term_ | keep_reference);
00684 string key = BrassPostListTable::make_key(term);
00685 int found = cursor->find_entry(key);
00686 if (!found) {
00687 LOGLINE(DB, "postlist for term not found");
00688 number_of_entries = 0;
00689 is_at_end = true;
00690 pos = 0;
00691 end = 0;
00692 first_did_in_chunk = 0;
00693 last_did_in_chunk = 0;
00694 return;
00695 }
00696 cursor->read_tag();
00697 pos = cursor->current_tag.data();
00698 end = pos + cursor->current_tag.size();
00699
00700 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00701 first_did_in_chunk = did;
00702 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00703 &is_last_chunk);
00704 read_wdf(&pos, end, &wdf);
00705 LOGLINE(DB, "Initial docid " << did);
00706 }
00707
00708 BrassPostList::~BrassPostList()
00709 {
00710 LOGCALL_VOID(DB, "BrassPostList::~BrassPostList", NO_ARGS);
00711 }
00712
00713 Xapian::termcount
00714 BrassPostList::get_doclength() const
00715 {
00716 LOGCALL(DB, Xapian::termcount, "BrassPostList::get_doclength", NO_ARGS);
00717 Assert(have_started);
00718 Assert(this_db.get());
00719 RETURN(this_db->get_doclength(did));
00720 }
00721
00722 bool
00723 BrassPostList::next_in_chunk()
00724 {
00725 LOGCALL(DB, bool, "BrassPostList::next_in_chunk", NO_ARGS);
00726 if (pos == end) RETURN(false);
00727
00728 read_did_increase(&pos, end, &did);
00729 read_wdf(&pos, end, &wdf);
00730
00731
00732 Assert(did <= last_did_in_chunk);
00733 Assert(did < last_did_in_chunk || pos == end);
00734 Assert(pos != end || did == last_did_in_chunk);
00735
00736 RETURN(true);
00737 }
00738
00739 void
00740 BrassPostList::next_chunk()
00741 {
00742 LOGCALL_VOID(DB, "BrassPostList::next_chunk", NO_ARGS);
00743 if (is_last_chunk) {
00744 is_at_end = true;
00745 return;
00746 }
00747
00748 cursor->next();
00749 if (cursor->after_end()) {
00750 is_at_end = true;
00751 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00752 term + "'");
00753 }
00754 const char * keypos = cursor->current_key.data();
00755 const char * keyend = keypos + cursor->current_key.size();
00756
00757 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00758 is_at_end = true;
00759 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00760 term + "'");
00761 }
00762
00763 Xapian::docid newdid;
00764 if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
00765 report_read_error(keypos);
00766 }
00767 if (newdid <= did) {
00768 throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
00769 str(newdid) +
00770 ") is not greater than final document ID in previous chunk (" +
00771 str(did) + ")");
00772 }
00773 did = newdid;
00774
00775 cursor->read_tag();
00776 pos = cursor->current_tag.data();
00777 end = pos + cursor->current_tag.size();
00778
00779 first_did_in_chunk = did;
00780 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00781 &is_last_chunk);
00782 read_wdf(&pos, end, &wdf);
00783 }
00784
00785 PositionList *
00786 BrassPostList::read_position_list()
00787 {
00788 LOGCALL(DB, PositionList *, "BrassPostList::read_position_list", NO_ARGS);
00789 Assert(this_db.get());
00790 positionlist.read_data(&this_db->position_table, did, term);
00791 RETURN(&positionlist);
00792 }
00793
00794 PositionList *
00795 BrassPostList::open_position_list() const
00796 {
00797 LOGCALL(DB, PositionList *, "BrassPostList::open_position_list", NO_ARGS);
00798 Assert(this_db.get());
00799 RETURN(new BrassPositionList(&this_db->position_table, did, term));
00800 }
00801
00802 PostList *
00803 BrassPostList::next(Xapian::weight w_min)
00804 {
00805 LOGCALL(DB, PostList *, "BrassPostList::next", w_min);
00806 (void)w_min;
00807
00808 if (!have_started) {
00809 have_started = true;
00810 } else {
00811 if (!next_in_chunk()) next_chunk();
00812 }
00813
00814 if (is_at_end) {
00815 LOGLINE(DB, "Moved to end");
00816 } else {
00817 LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
00818 }
00819
00820 RETURN(NULL);
00821 }
00822
00823 bool
00824 BrassPostList::current_chunk_contains(Xapian::docid desired_did)
00825 {
00826 LOGCALL(DB, bool, "BrassPostList::current_chunk_contains", desired_did);
00827 if (desired_did >= first_did_in_chunk &&
00828 desired_did <= last_did_in_chunk) {
00829 RETURN(true);
00830 }
00831 RETURN(false);
00832 }
00833
00834 void
00835 BrassPostList::move_to_chunk_containing(Xapian::docid desired_did)
00836 {
00837 LOGCALL_VOID(DB, "BrassPostList::move_to_chunk_containing", desired_did);
00838 (void)cursor->find_entry(BrassPostListTable::make_key(term, desired_did));
00839 Assert(!cursor->after_end());
00840
00841 const char * keypos = cursor->current_key.data();
00842 const char * keyend = keypos + cursor->current_key.size();
00843
00844 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00845
00846 is_at_end = true;
00847 is_last_chunk = true;
00848 return;
00849 }
00850 is_at_end = false;
00851
00852 cursor->read_tag();
00853 pos = cursor->current_tag.data();
00854 end = pos + cursor->current_tag.size();
00855
00856 if (keypos == keyend) {
00857
00858 #ifdef XAPIAN_ASSERTIONS
00859 Xapian::doccount old_number_of_entries = number_of_entries;
00860 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00861 Assert(old_number_of_entries == number_of_entries);
00862 #else
00863 did = read_start_of_first_chunk(&pos, end, NULL, NULL);
00864 #endif
00865 } else {
00866
00867 if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
00868 report_read_error(keypos);
00869 }
00870 }
00871
00872 first_did_in_chunk = did;
00873 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00874 &is_last_chunk);
00875 read_wdf(&pos, end, &wdf);
00876
00877
00878
00879 if (desired_did > last_did_in_chunk) next_chunk();
00880 }
00881
00882 bool
00883 BrassPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
00884 {
00885 LOGCALL(DB, bool, "BrassPostList::move_forward_in_chunk_to_at_least", desired_did);
00886 if (did >= desired_did)
00887 RETURN(true);
00888
00889 if (desired_did <= last_did_in_chunk) {
00890 while (pos != end) {
00891 read_did_increase(&pos, end, &did);
00892 if (did >= desired_did) {
00893 read_wdf(&pos, end, &wdf);
00894 RETURN(true);
00895 }
00896
00897 read_wdf(&pos, end, NULL);
00898 }
00899
00900
00901 Assert(false);
00902 }
00903
00904 pos = end;
00905 RETURN(false);
00906 }
00907
00908 PostList *
00909 BrassPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
00910 {
00911 LOGCALL(DB, PostList *, "BrassPostList::skip_to", desired_did | w_min);
00912 (void)w_min;
00913
00914
00915 have_started = true;
00916
00917
00918 if (is_at_end || desired_did <= did) RETURN(NULL);
00919
00920
00921 if (!current_chunk_contains(desired_did)) {
00922 move_to_chunk_containing(desired_did);
00923
00924
00925 if (is_at_end) RETURN(NULL);
00926 }
00927
00928
00929 bool have_document = move_forward_in_chunk_to_at_least(desired_did);
00930 (void)have_document;
00931 Assert(have_document);
00932
00933 if (is_at_end) {
00934 LOGLINE(DB, "Skipped to end");
00935 } else {
00936 LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
00937 }
00938
00939 RETURN(NULL);
00940 }
00941
00942
00943 bool
00944 BrassPostList::jump_to(Xapian::docid desired_did)
00945 {
00946 LOGCALL(DB, bool, "BrassPostList::jump_to", desired_did);
00947
00948
00949 have_started = true;
00950
00951
00952 if (pos == 0) RETURN(false);
00953
00954
00955
00956
00957 if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
00958
00959 is_at_end = false;
00960
00961 move_to_chunk_containing(desired_did);
00962
00963
00964 if (is_at_end) RETURN(false);
00965 }
00966
00967
00968 if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
00969 RETURN(desired_did == did);
00970 }
00971
00972 string
00973 BrassPostList::get_description() const
00974 {
00975 return term + ":" + str(number_of_entries);
00976 }
00977
00978
00979 Xapian::docid
00980 BrassPostListTable::get_chunk(const string &tname,
00981 Xapian::docid did, bool adding,
00982 PostlistChunkReader ** from, PostlistChunkWriter **to)
00983 {
00984 LOGCALL(DB, Xapian::docid, "BrassPostListTable::get_chunk", tname | did | adding | from | to);
00985
00986 string key = make_key(tname, did);
00987
00988
00989 AutoPtr<BrassCursor> cursor(cursor_get());
00990
00991 (void)cursor->find_entry(key);
00992 Assert(!cursor->after_end());
00993
00994 const char * keypos = cursor->current_key.data();
00995 const char * keyend = keypos + cursor->current_key.size();
00996
00997 if (!check_tname_in_key(&keypos, keyend, tname)) {
00998
00999
01000
01001
01002 if (!adding)
01003 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
01004
01005 *from = NULL;
01006 *to = new PostlistChunkWriter(string(), true, tname, true);
01007 RETURN(Xapian::docid(-1));
01008 }
01009
01010
01011
01012 bool is_first_chunk = (keypos == keyend);
01013 LOGVALUE(DB, is_first_chunk);
01014
01015 cursor->read_tag();
01016 const char * pos = cursor->current_tag.data();
01017 const char * end = pos + cursor->current_tag.size();
01018 Xapian::docid first_did_in_chunk;
01019 if (is_first_chunk) {
01020 first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
01021 } else {
01022 if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
01023 report_read_error(keypos);
01024 }
01025 }
01026
01027 bool is_last_chunk;
01028 Xapian::docid last_did_in_chunk;
01029 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
01030 *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
01031 is_last_chunk);
01032 if (did > last_did_in_chunk) {
01033
01034
01035
01036 *from = NULL;
01037 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
01038 string(pos, end));
01039 } else {
01040 *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
01041 }
01042 if (is_last_chunk) RETURN(Xapian::docid(-1));
01043
01044
01045 cursor->next();
01046 if (cursor->after_end()) {
01047 throw Xapian::DatabaseCorruptError("Expected another key but found none");
01048 }
01049 const char *kpos = cursor->current_key.data();
01050 const char *kend = kpos + cursor->current_key.size();
01051 if (!check_tname_in_key(&kpos, kend, tname)) {
01052 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
01053 }
01054
01055
01056 Xapian::docid first_did_of_next_chunk;
01057 if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
01058 report_read_error(kpos);
01059 }
01060 RETURN(first_did_of_next_chunk - 1);
01061 }
01062
01063 void
01064 BrassPostListTable::merge_doclen_changes(const map<Xapian::docid, Xapian::termcount> & doclens)
01065 {
01066 LOGCALL_VOID(DB, "BrassPostListTable::merge_doclen_changes", doclens);
01067
01068
01069 doclen_pl.reset(0);
01070
01071 LOGVALUE(DB, doclens.size());
01072 if (doclens.empty()) return;
01073
01074
01075 string current_key = make_key(string());
01076 if (!key_exists(current_key)) {
01077 LOGLINE(DB, "Adding dummy first chunk");
01078 string newtag = make_start_of_first_chunk(0, 0, 0);
01079 newtag += make_start_of_chunk(true, 0, 0);
01080 add(current_key, newtag);
01081 }
01082
01083 map<Xapian::docid, Xapian::termcount>::const_iterator j;
01084 j = doclens.begin();
01085 Assert(j != doclens.end());
01086
01087 Xapian::docid max_did;
01088 PostlistChunkReader *from;
01089 PostlistChunkWriter *to;
01090 max_did = get_chunk(string(), j->first, true, &from, &to);
01091 LOGVALUE(DB, max_did);
01092 for ( ; j != doclens.end(); ++j) {
01093 Xapian::docid did = j->first;
01094
01095 next_doclen_chunk:
01096 LOGLINE(DB, "Updating doclens, did=" << did);
01097 if (from) while (!from->is_at_end()) {
01098 Xapian::docid copy_did = from->get_docid();
01099 if (copy_did >= did) {
01100 if (copy_did == did) from->next();
01101 break;
01102 }
01103 to->append(this, copy_did, from->get_wdf());
01104 from->next();
01105 }
01106 if ((!from || from->is_at_end()) && did > max_did) {
01107 delete from;
01108 to->flush(this);
01109 delete to;
01110 max_did = get_chunk(string(), did, false, &from, &to);
01111 goto next_doclen_chunk;
01112 }
01113
01114 Xapian::termcount new_doclen = j->second;
01115 if (new_doclen != static_cast<Xapian::termcount>(-1)) {
01116 to->append(this, did, new_doclen);
01117 }
01118 }
01119
01120 if (from) {
01121 while (!from->is_at_end()) {
01122 to->append(this, from->get_docid(), from->get_wdf());
01123 from->next();
01124 }
01125 delete from;
01126 }
01127 to->flush(this);
01128 delete to;
01129 }
01130
01131 void
01132 BrassPostListTable::merge_changes(const string &term,
01133 const Inverter::PostingChanges & changes)
01134 {
01135 {
01136
01137
01138 string current_key = make_key(term);
01139 string tag;
01140 (void)get_exact_entry(current_key, tag);
01141
01142
01143 const char *pos = tag.data();
01144 const char *end = pos + tag.size();
01145 Xapian::doccount termfreq;
01146 Xapian::termcount collfreq;
01147 Xapian::docid firstdid, lastdid;
01148 bool islast;
01149 if (pos == end) {
01150 termfreq = 0;
01151 collfreq = 0;
01152 firstdid = 0;
01153 lastdid = 0;
01154 islast = true;
01155 } else {
01156 firstdid = read_start_of_first_chunk(&pos, end,
01157 &termfreq, &collfreq);
01158
01159 lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
01160 }
01161
01162 termfreq += changes.get_tfdelta();
01163 if (termfreq == 0) {
01164
01165
01166 if (islast) {
01167
01168 del(current_key);
01169 return;
01170 }
01171 MutableBrassCursor cursor(this);
01172 bool found = cursor.find_entry(current_key);
01173 Assert(found);
01174 if (!found) return;
01175 while (cursor.del()) {
01176 const char *kpos = cursor.current_key.data();
01177 const char *kend = kpos + cursor.current_key.size();
01178 if (!check_tname_in_key_lite(&kpos, kend, term)) break;
01179 }
01180 return;
01181 }
01182 collfreq += changes.get_cfdelta();
01183
01184
01185 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
01186 newhdr += make_start_of_chunk(islast, firstdid, lastdid);
01187 if (pos == end) {
01188 add(current_key, newhdr);
01189 } else {
01190 Assert((size_t)(pos - tag.data()) <= tag.size());
01191 tag.replace(0, pos - tag.data(), newhdr);
01192 add(current_key, tag);
01193 }
01194 }
01195 map<Xapian::docid, Xapian::termcount>::const_iterator j;
01196 j = changes.pl_changes.begin();
01197 Assert(j != changes.pl_changes.end());
01198
01199 Xapian::docid max_did;
01200 PostlistChunkReader *from;
01201 PostlistChunkWriter *to;
01202 max_did = get_chunk(term, j->first, false, &from, &to);
01203 for ( ; j != changes.pl_changes.end(); ++j) {
01204 Xapian::docid did = j->first;
01205
01206 next_chunk:
01207 LOGLINE(DB, "Updating term=" << term << ", did=" << did);
01208 if (from) while (!from->is_at_end()) {
01209 Xapian::docid copy_did = from->get_docid();
01210 if (copy_did >= did) {
01211 if (copy_did == did) {
01212 from->next();
01213 }
01214 break;
01215 }
01216 to->append(this, copy_did, from->get_wdf());
01217 from->next();
01218 }
01219 if ((!from || from->is_at_end()) && did > max_did) {
01220 delete from;
01221 to->flush(this);
01222 delete to;
01223 max_did = get_chunk(term, did, false, &from, &to);
01224 goto next_chunk;
01225 }
01226
01227 Xapian::termcount new_wdf = j->second;
01228 if (new_wdf != Xapian::termcount(-1)) {
01229 to->append(this, did, new_wdf);
01230 }
01231 }
01232
01233 if (from) {
01234 while (!from->is_at_end()) {
01235 to->append(this, from->get_docid(), from->get_wdf());
01236 from->next();
01237 }
01238 delete from;
01239 }
01240 to->flush(this);
01241 delete to;
01242 }