00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include "chert_postlist.h"
00026
00027 #include "chert_cursor.h"
00028 #include "chert_database.h"
00029 #include "debuglog.h"
00030 #include "noreturn.h"
00031 #include "pack.h"
00032 #include "str.h"
00033
00034 Xapian::doccount
00035 ChertPostListTable::get_termfreq(const string & term) const
00036 {
00037 string key = make_key(term);
00038 string tag;
00039 if (!get_exact_entry(key, tag)) return 0;
00040
00041 Xapian::doccount termfreq;
00042 const char * p = tag.data();
00043 ChertPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
00044 return termfreq;
00045 }
00046
00047 Xapian::termcount
00048 ChertPostListTable::get_collection_freq(const string & term) const
00049 {
00050 string key = make_key(term);
00051 string tag;
00052 if (!get_exact_entry(key, tag)) return 0;
00053
00054 Xapian::termcount collfreq;
00055 const char * p = tag.data();
00056 ChertPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
00057 return collfreq;
00058 }
00059
00060 Xapian::termcount
00061 ChertPostListTable::get_doclength(Xapian::docid did,
00062 Xapian::Internal::RefCntPtr<const ChertDatabase> db) const {
00063 if (!doclen_pl.get()) {
00064
00065
00066 doclen_pl.reset(new ChertPostList(db, string(), false));
00067 }
00068 if (!doclen_pl->jump_to(did))
00069 throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
00070 return doclen_pl->get_wdf();
00071 }
00072
00073 bool
00074 ChertPostListTable::document_exists(Xapian::docid did,
00075 Xapian::Internal::RefCntPtr<const ChertDatabase> db) const
00076 {
00077 if (!doclen_pl.get()) {
00078
00079
00080 doclen_pl.reset(new ChertPostList(db, string(), false));
00081 }
00082 return (doclen_pl->jump_to(did));
00083 }
00084
00085
00086
00087
00088
00089
00090 const unsigned int CHUNKSIZE = 2000;
00091
00098 class Chert::PostlistChunkWriter {
00099 public:
00100 PostlistChunkWriter(const string &orig_key_,
00101 bool is_first_chunk_,
00102 const string &tname_,
00103 bool is_last_chunk_);
00104
00106 void append(ChertTable * table, Xapian::docid did,
00107 Xapian::termcount wdf);
00108
00110 void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
00111 const string & s) {
00112 Assert(!started);
00113 first_did = first_did_;
00114 current_did = current_did_;
00115 if (!s.empty()) {
00116 chunk.append(s);
00117 started = true;
00118 }
00119 }
00120
00125 void flush(ChertTable *table);
00126
00127 private:
00128 string orig_key;
00129 string tname;
00130 bool is_first_chunk;
00131 bool is_last_chunk;
00132 bool started;
00133
00134 Xapian::docid first_did;
00135 Xapian::docid current_did;
00136
00137 string chunk;
00138 };
00139
00140 using Chert::PostlistChunkWriter;
00141
00142
00143
00145 XAPIAN_NORETURN(static void report_read_error(const char * position));
00146 static void report_read_error(const char * position)
00147 {
00148 if (position == 0) {
00149
00150 LOGLINE(DB, "ChertPostList data ran out");
00151 throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
00152 }
00153
00154 LOGLINE(DB, "ChertPostList value too large");
00155 throw Xapian::RangeError("Value in posting list too large.");
00156 }
00157
00158 static inline bool get_tname_from_key(const char **src, const char *end,
00159 string &tname)
00160 {
00161 return unpack_string_preserving_sort(src, end, tname);
00162 }
00163
00164 static inline bool
00165 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
00166 {
00167 string tname_in_key;
00168
00169 if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
00170 *keypos += 2;
00171 } else {
00172
00173 if (!get_tname_from_key(keypos, keyend, tname_in_key))
00174 report_read_error(*keypos);
00175 }
00176
00177
00178 return tname_in_key == tname;
00179 }
00180
00181 static inline bool
00182 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
00183 {
00184 if (*keypos == keyend) return false;
00185
00186 return check_tname_in_key_lite(keypos, keyend, tname);
00187 }
00188
00190 static Xapian::docid
00191 read_start_of_first_chunk(const char ** posptr,
00192 const char * end,
00193 Xapian::doccount * number_of_entries_ptr,
00194 Xapian::termcount * collection_freq_ptr)
00195 {
00196 LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
00197
00198 ChertPostList::read_number_of_entries(posptr, end,
00199 number_of_entries_ptr, collection_freq_ptr);
00200 if (number_of_entries_ptr)
00201 LOGVALUE(DB, *number_of_entries_ptr);
00202 if (collection_freq_ptr)
00203 LOGVALUE(DB, *collection_freq_ptr);
00204
00205 Xapian::docid did;
00206
00207 if (!unpack_uint(posptr, end, &did))
00208 report_read_error(*posptr);
00209 ++did;
00210 LOGVALUE(DB, did);
00211 RETURN(did);
00212 }
00213
00214 static inline void
00215 read_did_increase(const char ** posptr, const char * end,
00216 Xapian::docid * did_ptr)
00217 {
00218 Xapian::docid did_increase;
00219 if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
00220 *did_ptr += did_increase + 1;
00221 }
00222
00224 static inline void
00225 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
00226 {
00227 if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
00228 }
00229
00231 static Xapian::docid
00232 read_start_of_chunk(const char ** posptr,
00233 const char * end,
00234 Xapian::docid first_did_in_chunk,
00235 bool * is_last_chunk_ptr)
00236 {
00237 LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
00238 Assert(is_last_chunk_ptr);
00239
00240
00241 if (!unpack_bool(posptr, end, is_last_chunk_ptr))
00242 report_read_error(*posptr);
00243 LOGVALUE(DB, *is_last_chunk_ptr);
00244
00245
00246 Xapian::docid increase_to_last;
00247 if (!unpack_uint(posptr, end, &increase_to_last))
00248 report_read_error(*posptr);
00249 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
00250 LOGVALUE(DB, last_did_in_chunk);
00251 RETURN(last_did_in_chunk);
00252 }
00253
00258 class Chert::PostlistChunkReader {
00259 string data;
00260
00261 const char *pos;
00262 const char *end;
00263
00264 bool at_end;
00265
00266 Xapian::docid did;
00267 Xapian::termcount wdf;
00268
00269 public:
00275 PostlistChunkReader(Xapian::docid first_did, const string & data_)
00276 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
00277 {
00278 if (!at_end) read_wdf(&pos, end, &wdf);
00279 }
00280
00281 Xapian::docid get_docid() const {
00282 return did;
00283 }
00284 Xapian::termcount get_wdf() const {
00285 return wdf;
00286 }
00287
00288 bool is_at_end() const {
00289 return at_end;
00290 }
00291
00294 void next();
00295 };
00296
00297 using Chert::PostlistChunkReader;
00298
00299 void
00300 PostlistChunkReader::next()
00301 {
00302 if (pos == end) {
00303 at_end = true;
00304 } else {
00305 read_did_increase(&pos, end, &did);
00306 read_wdf(&pos, end, &wdf);
00307 }
00308 }
00309
00310 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
00311 bool is_first_chunk_,
00312 const string &tname_,
00313 bool is_last_chunk_)
00314 : orig_key(orig_key_),
00315 tname(tname_), is_first_chunk(is_first_chunk_),
00316 is_last_chunk(is_last_chunk_),
00317 started(false)
00318 {
00319 LOGCALL_VOID(DB, "PostlistChunkWriter::PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
00320 }
00321
00322 void
00323 PostlistChunkWriter::append(ChertTable * table, Xapian::docid did,
00324 Xapian::termcount wdf)
00325 {
00326 if (!started) {
00327 started = true;
00328 first_did = did;
00329 } else {
00330 Assert(did > current_did);
00331
00332 if (chunk.size() >= CHUNKSIZE) {
00333 bool save_is_last_chunk = is_last_chunk;
00334 is_last_chunk = false;
00335 flush(table);
00336 is_last_chunk = save_is_last_chunk;
00337 is_first_chunk = false;
00338 first_did = did;
00339 chunk.resize(0);
00340 orig_key = ChertPostListTable::make_key(tname, first_did);
00341 } else {
00342 pack_uint(chunk, did - current_did - 1);
00343 }
00344 }
00345 current_did = did;
00346 pack_uint(chunk, wdf);
00347 }
00348
00351 static inline string
00352 make_start_of_first_chunk(Xapian::doccount entries,
00353 Xapian::termcount collectionfreq,
00354 Xapian::docid new_did)
00355 {
00356 string chunk;
00357 pack_uint(chunk, entries);
00358 pack_uint(chunk, collectionfreq);
00359 pack_uint(chunk, new_did - 1);
00360 return chunk;
00361 }
00362
00365 static inline string
00366 make_start_of_chunk(bool new_is_last_chunk,
00367 Xapian::docid new_first_did,
00368 Xapian::docid new_final_did)
00369 {
00370 Assert(new_final_did >= new_first_did);
00371 string chunk;
00372 pack_bool(chunk, new_is_last_chunk);
00373 pack_uint(chunk, new_final_did - new_first_did);
00374 return chunk;
00375 }
00376
00377 static void
00378 write_start_of_chunk(string & chunk,
00379 unsigned int start_of_chunk_header,
00380 unsigned int end_of_chunk_header,
00381 bool is_last_chunk,
00382 Xapian::docid first_did_in_chunk,
00383 Xapian::docid last_did_in_chunk)
00384 {
00385 Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
00386
00387 chunk.replace(start_of_chunk_header,
00388 end_of_chunk_header - start_of_chunk_header,
00389 make_start_of_chunk(is_last_chunk, first_did_in_chunk,
00390 last_did_in_chunk));
00391 }
00392
00393 void
00394 PostlistChunkWriter::flush(ChertTable *table)
00395 {
00396 LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406 if (!started) {
00407
00408
00409
00410
00411
00412
00413
00414
00415 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
00416 Assert(!orig_key.empty());
00417 if (is_first_chunk) {
00418 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
00419 if (is_last_chunk) {
00420
00421
00422
00423 table->del(orig_key);
00424 return;
00425 }
00426
00427
00428
00429
00430
00431
00432 AutoPtr<ChertCursor> cursor(table->cursor_get());
00433
00434 if (!cursor->find_entry(orig_key)) {
00435 throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447 Xapian::doccount num_ent;
00448 Xapian::termcount coll_freq;
00449 {
00450 cursor->read_tag();
00451 const char *tagpos = cursor->current_tag.data();
00452 const char *tagend = tagpos + cursor->current_tag.size();
00453
00454 (void)read_start_of_first_chunk(&tagpos, tagend,
00455 &num_ent, &coll_freq);
00456 }
00457
00458
00459 cursor->next();
00460 if (cursor->after_end()) {
00461 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00462 }
00463 const char *kpos = cursor->current_key.data();
00464 const char *kend = kpos + cursor->current_key.size();
00465 if (!check_tname_in_key(&kpos, kend, tname)) {
00466 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00467 }
00468
00469
00470 Xapian::docid new_first_did;
00471 if (!unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
00472 report_read_error(kpos);
00473 }
00474
00475 cursor->read_tag();
00476 const char *tagpos = cursor->current_tag.data();
00477 const char *tagend = tagpos + cursor->current_tag.size();
00478
00479
00480 bool new_is_last_chunk;
00481 Xapian::docid new_last_did_in_chunk =
00482 read_start_of_chunk(&tagpos, tagend, new_first_did,
00483 &new_is_last_chunk);
00484
00485 string chunk_data(tagpos, tagend);
00486
00487
00488 table->del(cursor->current_key);
00489
00490
00491 string tag;
00492 tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
00493 tag += make_start_of_chunk(new_is_last_chunk,
00494 new_first_did,
00495 new_last_did_in_chunk);
00496 tag += chunk_data;
00497 table->add(orig_key, tag);
00498 return;
00499 }
00500
00501 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
00502
00503
00504
00505 table->del(orig_key);
00506
00507 if (is_last_chunk) {
00508 LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
00509
00510 AutoPtr<ChertCursor> cursor(table->cursor_get());
00511
00512
00513
00514 if (cursor->find_entry(orig_key)) {
00515 throw Xapian::DatabaseCorruptError("Chert key not deleted as we expected");
00516 }
00517
00518 const char * keypos = cursor->current_key.data();
00519 const char * keyend = keypos + cursor->current_key.size();
00520 if (!check_tname_in_key(&keypos, keyend, tname)) {
00521 throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
00522 }
00523
00524 bool is_prev_first_chunk = (keypos == keyend);
00525
00526
00527 cursor->read_tag();
00528 string tag = cursor->current_tag;
00529
00530 const char *tagpos = tag.data();
00531 const char *tagend = tagpos + tag.size();
00532
00533
00534 Xapian::docid first_did_in_chunk;
00535 if (is_prev_first_chunk) {
00536 first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
00537 0, 0);
00538 } else {
00539 if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
00540 report_read_error(keypos);
00541 }
00542 bool wrong_is_last_chunk;
00543 string::size_type start_of_chunk_header = tagpos - tag.data();
00544 Xapian::docid last_did_in_chunk =
00545 read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
00546 &wrong_is_last_chunk);
00547 string::size_type end_of_chunk_header = tagpos - tag.data();
00548
00549
00550 write_start_of_chunk(tag,
00551 start_of_chunk_header,
00552 end_of_chunk_header,
00553 true,
00554 first_did_in_chunk,
00555 last_did_in_chunk);
00556 table->add(cursor->current_key, tag);
00557 }
00558 } else {
00559 LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
00560
00561
00562
00563
00564
00565
00566 string tag;
00567
00568
00569
00570
00571 if (is_first_chunk) {
00572
00573
00574
00575 LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
00576 string key = ChertPostListTable::make_key(tname);
00577 bool ok = table->get_exact_entry(key, tag);
00578 (void)ok;
00579 Assert(ok);
00580 Assert(!tag.empty());
00581
00582 Xapian::doccount num_ent;
00583 Xapian::termcount coll_freq;
00584 {
00585 const char * tagpos = tag.data();
00586 const char * tagend = tagpos + tag.size();
00587 (void)read_start_of_first_chunk(&tagpos, tagend,
00588 &num_ent, &coll_freq);
00589 }
00590
00591 tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
00592
00593 tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
00594 tag += chunk;
00595 table->add(key, tag);
00596 return;
00597 }
00598
00599 LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611 const char *keypos = orig_key.data();
00612 const char *keyend = keypos + orig_key.size();
00613 if (!check_tname_in_key(&keypos, keyend, tname)) {
00614 throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
00615 }
00616 Xapian::docid initial_did;
00617 if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
00618 report_read_error(keypos);
00619 }
00620 string new_key;
00621 if (initial_did != first_did) {
00622
00623
00624
00625
00626 new_key = ChertPostListTable::make_key(tname, first_did);
00627 table->del(orig_key);
00628 } else {
00629 new_key = orig_key;
00630 }
00631
00632
00633 tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
00634
00635 tag += chunk;
00636 table->add(new_key, tag);
00637 }
00638 }
00639
00644 void ChertPostList::read_number_of_entries(const char ** posptr,
00645 const char * end,
00646 Xapian::doccount * number_of_entries_ptr,
00647 Xapian::termcount * collection_freq_ptr)
00648 {
00649 if (!unpack_uint(posptr, end, number_of_entries_ptr))
00650 report_read_error(*posptr);
00651 if (!unpack_uint(posptr, end, collection_freq_ptr))
00652 report_read_error(*posptr);
00653 }
00654
00674 ChertPostList::ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_,
00675 const string & term_,
00676 bool keep_reference)
00677 : LeafPostList(term_),
00678 this_db(keep_reference ? this_db_ : NULL),
00679 have_started(false),
00680 is_at_end(false),
00681 cursor(this_db_->postlist_table.cursor_get())
00682 {
00683 LOGCALL_VOID(DB, "ChertPostList::ChertPostList", this_db_.get() | term_ | keep_reference);
00684 string key = ChertPostListTable::make_key(term);
00685 int found = cursor->find_entry(key);
00686 if (!found) {
00687 LOGLINE(DB, "postlist for term not found");
00688 number_of_entries = 0;
00689 is_at_end = true;
00690 pos = 0;
00691 end = 0;
00692 first_did_in_chunk = 0;
00693 last_did_in_chunk = 0;
00694 return;
00695 }
00696 cursor->read_tag();
00697 pos = cursor->current_tag.data();
00698 end = pos + cursor->current_tag.size();
00699
00700 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00701 first_did_in_chunk = did;
00702 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00703 &is_last_chunk);
00704 read_wdf(&pos, end, &wdf);
00705 LOGLINE(DB, "Initial docid " << did);
00706 }
00707
00708 ChertPostList::~ChertPostList()
00709 {
00710 LOGCALL_VOID(DB, "ChertPostList::~ChertPostList", NO_ARGS);
00711 }
00712
00713 Xapian::termcount
00714 ChertPostList::get_doclength() const
00715 {
00716 LOGCALL(DB, Xapian::termcount, "ChertPostList::get_doclength", NO_ARGS);
00717 Assert(have_started);
00718 Assert(this_db.get());
00719 RETURN(this_db->get_doclength(did));
00720 }
00721
00722 bool
00723 ChertPostList::next_in_chunk()
00724 {
00725 LOGCALL(DB, bool, "ChertPostList::next_in_chunk", NO_ARGS);
00726 if (pos == end) RETURN(false);
00727
00728 read_did_increase(&pos, end, &did);
00729 read_wdf(&pos, end, &wdf);
00730
00731
00732 Assert(did <= last_did_in_chunk);
00733 Assert(did < last_did_in_chunk || pos == end);
00734 Assert(pos != end || did == last_did_in_chunk);
00735
00736 RETURN(true);
00737 }
00738
00739 void
00740 ChertPostList::next_chunk()
00741 {
00742 LOGCALL_VOID(DB, "ChertPostList::next_chunk", NO_ARGS);
00743 if (is_last_chunk) {
00744 is_at_end = true;
00745 return;
00746 }
00747
00748 cursor->next();
00749 if (cursor->after_end()) {
00750 is_at_end = true;
00751 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00752 term + "'");
00753 }
00754 const char * keypos = cursor->current_key.data();
00755 const char * keyend = keypos + cursor->current_key.size();
00756
00757 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00758 is_at_end = true;
00759 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00760 term + "'");
00761 }
00762
00763 Xapian::docid newdid;
00764 if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
00765 report_read_error(keypos);
00766 }
00767 if (newdid <= did) {
00768 throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
00769 str(newdid) +
00770 ") is not greater than final document ID in previous chunk (" +
00771 str(did) + ")");
00772 }
00773 did = newdid;
00774
00775 cursor->read_tag();
00776 pos = cursor->current_tag.data();
00777 end = pos + cursor->current_tag.size();
00778
00779 first_did_in_chunk = did;
00780 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00781 &is_last_chunk);
00782 read_wdf(&pos, end, &wdf);
00783 }
00784
00785 PositionList *
00786 ChertPostList::read_position_list()
00787 {
00788 LOGCALL(DB, PositionList *, "ChertPostList::read_position_list", NO_ARGS);
00789 Assert(this_db.get());
00790 positionlist.read_data(&this_db->position_table, did, term);
00791 RETURN(&positionlist);
00792 }
00793
00794 PositionList *
00795 ChertPostList::open_position_list() const
00796 {
00797 LOGCALL(DB, PositionList *, "ChertPostList::open_position_list", NO_ARGS);
00798 Assert(this_db.get());
00799 RETURN(new ChertPositionList(&this_db->position_table, did, term));
00800 }
00801
00802 PostList *
00803 ChertPostList::next(Xapian::weight w_min)
00804 {
00805 LOGCALL(DB, PostList *, "ChertPostList::next", w_min);
00806 (void)w_min;
00807
00808 if (!have_started) {
00809 have_started = true;
00810 } else {
00811 if (!next_in_chunk()) next_chunk();
00812 }
00813
00814 if (is_at_end) {
00815 LOGLINE(DB, "Moved to end");
00816 } else {
00817 LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
00818 }
00819
00820 RETURN(NULL);
00821 }
00822
00823 bool
00824 ChertPostList::current_chunk_contains(Xapian::docid desired_did)
00825 {
00826 LOGCALL(DB, bool, "ChertPostList::current_chunk_contains", desired_did);
00827 if (desired_did >= first_did_in_chunk &&
00828 desired_did <= last_did_in_chunk) {
00829 RETURN(true);
00830 }
00831 RETURN(false);
00832 }
00833
00834 void
00835 ChertPostList::move_to_chunk_containing(Xapian::docid desired_did)
00836 {
00837 LOGCALL_VOID(DB, "ChertPostList::move_to_chunk_containing", desired_did);
00838 (void)cursor->find_entry(ChertPostListTable::make_key(term, desired_did));
00839 Assert(!cursor->after_end());
00840
00841 const char * keypos = cursor->current_key.data();
00842 const char * keyend = keypos + cursor->current_key.size();
00843
00844 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00845
00846 is_at_end = true;
00847 is_last_chunk = true;
00848 return;
00849 }
00850 is_at_end = false;
00851
00852 cursor->read_tag();
00853 pos = cursor->current_tag.data();
00854 end = pos + cursor->current_tag.size();
00855
00856 if (keypos == keyend) {
00857
00858 #ifdef XAPIAN_ASSERTIONS
00859 Xapian::doccount old_number_of_entries = number_of_entries;
00860 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00861 Assert(old_number_of_entries == number_of_entries);
00862 #else
00863 did = read_start_of_first_chunk(&pos, end, NULL, NULL);
00864 #endif
00865 } else {
00866
00867 if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
00868 report_read_error(keypos);
00869 }
00870 }
00871
00872 first_did_in_chunk = did;
00873 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00874 &is_last_chunk);
00875 read_wdf(&pos, end, &wdf);
00876
00877
00878
00879 if (desired_did > last_did_in_chunk) next_chunk();
00880 }
00881
00882 bool
00883 ChertPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
00884 {
00885 LOGCALL(DB, bool, "ChertPostList::move_forward_in_chunk_to_at_least", desired_did);
00886 if (did >= desired_did)
00887 RETURN(true);
00888
00889 if (desired_did <= last_did_in_chunk) {
00890 while (pos != end) {
00891 read_did_increase(&pos, end, &did);
00892 if (did >= desired_did) {
00893 read_wdf(&pos, end, &wdf);
00894 RETURN(true);
00895 }
00896
00897 read_wdf(&pos, end, NULL);
00898 }
00899
00900
00901 Assert(false);
00902 }
00903
00904 pos = end;
00905 RETURN(false);
00906 }
00907
00908 PostList *
00909 ChertPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
00910 {
00911 LOGCALL(DB, PostList *, "ChertPostList::skip_to", desired_did | w_min);
00912 (void)w_min;
00913
00914
00915 have_started = true;
00916
00917
00918 if (is_at_end || desired_did <= did) RETURN(NULL);
00919
00920
00921 if (!current_chunk_contains(desired_did)) {
00922 move_to_chunk_containing(desired_did);
00923
00924
00925 if (is_at_end) RETURN(NULL);
00926 }
00927
00928
00929 bool have_document = move_forward_in_chunk_to_at_least(desired_did);
00930 (void)have_document;
00931 Assert(have_document);
00932
00933 if (is_at_end) {
00934 LOGLINE(DB, "Skipped to end");
00935 } else {
00936 LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
00937 }
00938
00939 RETURN(NULL);
00940 }
00941
00942
00943 bool
00944 ChertPostList::jump_to(Xapian::docid desired_did)
00945 {
00946 LOGCALL(DB, bool, "ChertPostList::jump_to", desired_did);
00947
00948
00949 have_started = true;
00950
00951
00952 if (pos == 0) RETURN(false);
00953
00954
00955
00956
00957 if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
00958
00959 is_at_end = false;
00960
00961 move_to_chunk_containing(desired_did);
00962
00963
00964 if (is_at_end) RETURN(false);
00965 }
00966
00967
00968 if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
00969 RETURN(desired_did == did);
00970 }
00971
00972 string
00973 ChertPostList::get_description() const
00974 {
00975 return term + ":" + str(number_of_entries);
00976 }
00977
00978
00979 Xapian::docid
00980 ChertPostListTable::get_chunk(const string &tname,
00981 Xapian::docid did, bool adding,
00982 PostlistChunkReader ** from, PostlistChunkWriter **to)
00983 {
00984 LOGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname | did | adding | from | to);
00985
00986 string key = make_key(tname, did);
00987
00988
00989 AutoPtr<ChertCursor> cursor(cursor_get());
00990
00991 (void)cursor->find_entry(key);
00992 Assert(!cursor->after_end());
00993
00994 const char * keypos = cursor->current_key.data();
00995 const char * keyend = keypos + cursor->current_key.size();
00996
00997 if (!check_tname_in_key(&keypos, keyend, tname)) {
00998
00999 if (!adding)
01000 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
01001
01002 *from = NULL;
01003 *to = new PostlistChunkWriter(string(), true, tname, true);
01004 RETURN(Xapian::docid(-1));
01005 }
01006
01007
01008
01009 bool is_first_chunk = (keypos == keyend);
01010 LOGVALUE(DB, is_first_chunk);
01011
01012 cursor->read_tag();
01013 const char * pos = cursor->current_tag.data();
01014 const char * end = pos + cursor->current_tag.size();
01015 Xapian::docid first_did_in_chunk;
01016 if (is_first_chunk) {
01017 first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
01018 } else {
01019 if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
01020 report_read_error(keypos);
01021 }
01022 }
01023
01024 bool is_last_chunk;
01025 Xapian::docid last_did_in_chunk;
01026 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
01027 *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
01028 is_last_chunk);
01029 if (did > last_did_in_chunk) {
01030
01031
01032
01033 *from = NULL;
01034 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
01035 string(pos, end));
01036 } else {
01037 *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
01038 }
01039 if (is_last_chunk) RETURN(Xapian::docid(-1));
01040
01041
01042 cursor->next();
01043 if (cursor->after_end()) {
01044 throw Xapian::DatabaseCorruptError("Expected another key but found none");
01045 }
01046 const char *kpos = cursor->current_key.data();
01047 const char *kend = kpos + cursor->current_key.size();
01048 if (!check_tname_in_key(&kpos, kend, tname)) {
01049 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
01050 }
01051
01052
01053 Xapian::docid first_did_of_next_chunk;
01054 if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
01055 report_read_error(kpos);
01056 }
01057 RETURN(first_did_of_next_chunk - 1);
01058 }
01059
01060 void
01061 ChertPostListTable::merge_changes(
01062 const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
01063 const map<Xapian::docid, Xapian::termcount> & doclens,
01064 const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
01065 {
01066 LOGCALL_VOID(DB, "ChertPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
01067
01068
01069 doclen_pl.reset(0);
01070
01071 LOGVALUE(DB, doclens.size());
01072 if (!doclens.empty()) {
01073
01074 string current_key = make_key(string());
01075 if (!key_exists(current_key)) {
01076 LOGLINE(DB, "Adding dummy first chunk");
01077 string newtag = make_start_of_first_chunk(0, 0, 0);
01078 newtag += make_start_of_chunk(true, 0, 0);
01079 add(current_key, newtag);
01080 }
01081
01082 map<Xapian::docid, Xapian::termcount>::const_iterator j;
01083 j = doclens.begin();
01084 Assert(j != doclens.end());
01085
01086 Xapian::docid max_did;
01087 PostlistChunkReader *from;
01088 PostlistChunkWriter *to;
01089 max_did = get_chunk(string(), j->first, true, &from, &to);
01090 LOGVALUE(DB, max_did);
01091 for ( ; j != doclens.end(); ++j) {
01092 Xapian::docid did = j->first;
01093
01094 next_doclen_chunk:
01095 LOGLINE(DB, "Updating doclens, did=" << did);
01096 if (from) while (!from->is_at_end()) {
01097 Xapian::docid copy_did = from->get_docid();
01098 if (copy_did >= did) {
01099 if (copy_did == did) from->next();
01100 break;
01101 }
01102 to->append(this, copy_did, from->get_wdf());
01103 from->next();
01104 }
01105 if ((!from || from->is_at_end()) && did > max_did) {
01106 delete from;
01107 to->flush(this);
01108 delete to;
01109 max_did = get_chunk(string(), did, false, &from, &to);
01110 goto next_doclen_chunk;
01111 }
01112
01113 Xapian::termcount new_doclen = j->second;
01114 if (new_doclen != static_cast<Xapian::termcount>(-1)) {
01115 to->append(this, did, new_doclen);
01116 }
01117 }
01118
01119 if (from) {
01120 while (!from->is_at_end()) {
01121 to->append(this, from->get_docid(), from->get_wdf());
01122 from->next();
01123 }
01124 delete from;
01125 }
01126 to->flush(this);
01127 delete to;
01128 }
01129
01130 map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
01131 for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
01132 if (i->second.empty()) continue;
01133 string tname = i->first;
01134 {
01135
01136
01137 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
01138 Assert(deltas != freq_deltas.end());
01139
01140 string current_key = make_key(tname);
01141 string tag;
01142 (void)get_exact_entry(current_key, tag);
01143
01144
01145 const char *pos = tag.data();
01146 const char *end = pos + tag.size();
01147 Xapian::doccount termfreq;
01148 Xapian::termcount collfreq;
01149 Xapian::docid firstdid, lastdid;
01150 bool islast;
01151 if (pos == end) {
01152 termfreq = 0;
01153 collfreq = 0;
01154 firstdid = 0;
01155 lastdid = 0;
01156 islast = true;
01157 } else {
01158 firstdid = read_start_of_first_chunk(&pos, end,
01159 &termfreq, &collfreq);
01160
01161 lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
01162 }
01163
01164 termfreq += deltas->second.first;
01165 if (termfreq == 0) {
01166
01167
01168 if (islast) {
01169
01170 del(current_key);
01171 continue;
01172 }
01173 MutableChertCursor cursor(this);
01174 bool found = cursor.find_entry(current_key);
01175 Assert(found);
01176 if (!found) continue;
01177 while (cursor.del()) {
01178 const char *kpos = cursor.current_key.data();
01179 const char *kend = kpos + cursor.current_key.size();
01180 if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
01181 }
01182 continue;
01183 }
01184 collfreq += deltas->second.second;
01185
01186
01187 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
01188 newhdr += make_start_of_chunk(islast, firstdid, lastdid);
01189 if (pos == end) {
01190 add(current_key, newhdr);
01191 } else {
01192 Assert((size_t)(pos - tag.data()) <= tag.size());
01193 tag.replace(0, pos - tag.data(), newhdr);
01194 add(current_key, tag);
01195 }
01196 }
01197 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
01198 j = i->second.begin();
01199 Assert(j != i->second.end());
01200
01201 Xapian::docid max_did;
01202 PostlistChunkReader *from;
01203 PostlistChunkWriter *to;
01204 max_did = get_chunk(tname, j->first, j->second.first == 'A',
01205 &from, &to);
01206 for ( ; j != i->second.end(); ++j) {
01207 Xapian::docid did = j->first;
01208
01209 next_chunk:
01210 LOGLINE(DB, "Updating tname=" << tname << ", did=" << did);
01211 if (from) while (!from->is_at_end()) {
01212 Xapian::docid copy_did = from->get_docid();
01213 if (copy_did >= did) {
01214 if (copy_did == did) {
01215 Assert(j->second.first != 'A');
01216 from->next();
01217 }
01218 break;
01219 }
01220 to->append(this, copy_did, from->get_wdf());
01221 from->next();
01222 }
01223 if ((!from || from->is_at_end()) && did > max_did) {
01224 delete from;
01225 to->flush(this);
01226 delete to;
01227 max_did = get_chunk(tname, did, false, &from, &to);
01228 goto next_chunk;
01229 }
01230
01231 if (j->second.first != 'D') {
01232 Xapian::termcount new_wdf = j->second.second;
01233 to->append(this, did, new_wdf);
01234 }
01235 }
01236
01237 if (from) {
01238 while (!from->is_at_end()) {
01239 to->append(this, from->get_docid(), from->get_wdf());
01240 from->next();
01241 }
01242 delete from;
01243 }
01244 to->flush(this);
01245 delete to;
01246 }
01247 }