00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024 #include "flint_postlist.h"
00025
00026 #include "flint_cursor.h"
00027 #include "flint_database.h"
00028 #include "flint_utils.h"
00029 #include "debuglog.h"
00030 #include "noreturn.h"
00031 #include "str.h"
00032
00033 Xapian::doccount
00034 FlintPostListTable::get_termfreq(const string & term) const
00035 {
00036 string key = make_key(term);
00037 string tag;
00038 if (!get_exact_entry(key, tag)) return 0;
00039
00040 Xapian::doccount termfreq;
00041 const char * p = tag.data();
00042 FlintPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
00043 return termfreq;
00044 }
00045
00046 Xapian::termcount
00047 FlintPostListTable::get_collection_freq(const string & term) const
00048 {
00049 string key = make_key(term);
00050 string tag;
00051 if (!get_exact_entry(key, tag)) return 0;
00052
00053 Xapian::termcount collfreq;
00054 const char * p = tag.data();
00055 FlintPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
00056 return collfreq;
00057 }
00058
00059
00060
00061
00062
00063
00064 const unsigned int CHUNKSIZE = 2000;
00065
00072 class FlintPostlistChunkWriter {
00073 public:
00074 FlintPostlistChunkWriter(const string &orig_key_,
00075 bool is_first_chunk_,
00076 const string &tname_,
00077 bool is_last_chunk_);
00078
00080 void append(FlintTable * table, Xapian::docid did,
00081 Xapian::termcount wdf, flint_doclen_t doclen);
00082
00084 void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
00085 const string & s) {
00086 Assert(!started);
00087 first_did = first_did_;
00088 current_did = current_did_;
00089 if (!s.empty()) {
00090 chunk.append(s);
00091 started = true;
00092 }
00093 }
00094
00099 void flush(FlintTable *table);
00100
00101 private:
00102 string orig_key;
00103 string tname;
00104 bool is_first_chunk;
00105 bool is_last_chunk;
00106 bool started;
00107
00108 Xapian::docid first_did;
00109 Xapian::docid current_did;
00110
00111 string chunk;
00112 };
00113
00114
00115
00117 XAPIAN_NORETURN(static void report_read_error(const char * position));
00118 static void report_read_error(const char * position)
00119 {
00120 if (position == 0) {
00121
00122 throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
00123 }
00124
00125 throw Xapian::RangeError("Value in posting list too large.");
00126 }
00127
00128 static inline bool get_tname_from_key(const char **src, const char *end,
00129 string &tname)
00130 {
00131 return F_unpack_string_preserving_sort(src, end, tname);
00132 }
00133
00134 static inline bool
00135 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
00136 {
00137 string tname_in_key;
00138
00139
00140 if (!get_tname_from_key(keypos, keyend, tname_in_key)) {
00141 report_read_error(*keypos);
00142 }
00143
00144
00145 return tname_in_key == tname;
00146 }
00147
00148 static inline bool
00149 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
00150 {
00151 if (*keypos == keyend) return false;
00152
00153 return check_tname_in_key_lite(keypos, keyend, tname);
00154 }
00155
00157 static Xapian::docid
00158 read_start_of_first_chunk(const char ** posptr,
00159 const char * end,
00160 Xapian::doccount * number_of_entries_ptr,
00161 Xapian::termcount * collection_freq_ptr)
00162 {
00163 LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
00164
00165 FlintPostList::read_number_of_entries(posptr, end,
00166 number_of_entries_ptr, collection_freq_ptr);
00167 if (number_of_entries_ptr)
00168 LOGVALUE(DB, *number_of_entries_ptr);
00169 if (collection_freq_ptr)
00170 LOGVALUE(DB, *collection_freq_ptr);
00171
00172 Xapian::docid did;
00173
00174 if (!F_unpack_uint(posptr, end, &did))
00175 report_read_error(*posptr);
00176 ++did;
00177 LOGVALUE(DB, did);
00178 RETURN(did);
00179 }
00180
00181 static inline void read_did_increase(const char ** posptr,
00182 const char * end,
00183 Xapian::docid * did_ptr)
00184 {
00185 Xapian::docid did_increase;
00186 if (!F_unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
00187 *did_ptr += did_increase + 1;
00188 }
00189
00191 static inline void read_wdf_and_length(const char ** posptr,
00192 const char * end,
00193 Xapian::termcount * wdf_ptr,
00194 flint_doclen_t * doclength_ptr)
00195 {
00196 if (!F_unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
00197 if (!F_unpack_uint(posptr, end, doclength_ptr)) report_read_error(*posptr);
00198 }
00199
00201 static Xapian::docid
00202 read_start_of_chunk(const char ** posptr,
00203 const char * end,
00204 Xapian::docid first_did_in_chunk,
00205 bool * is_last_chunk_ptr)
00206 {
00207 LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
00208
00209
00210 if (!F_unpack_bool(posptr, end, is_last_chunk_ptr))
00211 report_read_error(*posptr);
00212 if (is_last_chunk_ptr)
00213 LOGVALUE(DB, *is_last_chunk_ptr);
00214
00215
00216 Xapian::docid increase_to_last;
00217 if (!F_unpack_uint(posptr, end, &increase_to_last))
00218 report_read_error(*posptr);
00219 ++increase_to_last;
00220 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
00221 LOGVALUE(DB, last_did_in_chunk);
00222 RETURN(last_did_in_chunk);
00223 }
00224
00225 static string make_wdf_and_length(Xapian::termcount wdf, flint_doclen_t doclength)
00226 {
00227 return F_pack_uint(wdf) + F_pack_uint(doclength);
00228 }
00229
00230 static void write_start_of_chunk(string & chunk,
00231 unsigned int start_of_chunk_header,
00232 unsigned int end_of_chunk_header,
00233 bool is_last_chunk,
00234 Xapian::docid first_did_in_chunk,
00235 Xapian::docid last_did_in_chunk)
00236 {
00237 Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
00238 Assert(last_did_in_chunk >= first_did_in_chunk);
00239 Xapian::docid increase_to_last = last_did_in_chunk - first_did_in_chunk;
00240
00241 chunk.replace(start_of_chunk_header,
00242 end_of_chunk_header - start_of_chunk_header,
00243 F_pack_bool(is_last_chunk) + F_pack_uint(increase_to_last - 1));
00244
00245
00246
00247 }
00248
00253 class FlintPostlistChunkReader {
00254 public:
00260 FlintPostlistChunkReader(Xapian::docid first_did, const string & data_)
00261 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
00262 {
00263 if (!at_end) read_wdf_and_length(&pos, end, &wdf, &doclength);
00264 }
00265
00266 Xapian::docid get_docid() const {
00267 return did;
00268 }
00269 Xapian::termcount get_wdf() const {
00270 return wdf;
00271 }
00272 flint_doclen_t get_doclength() const {
00273 LOGCALL(DB, flint_doclen_t, "FlintPostlistChunkReader::get_doclength", NO_ARGS);
00274 RETURN(doclength);
00275 }
00276
00277 bool is_at_end() const {
00278 return at_end;
00279 }
00280
00283 void next();
00284
00285 private:
00286 string data;
00287
00288 const char *pos;
00289 const char *end;
00290
00291 bool at_end;
00292
00293 Xapian::docid did;
00294 Xapian::termcount wdf;
00295 flint_doclen_t doclength;
00296 };
00297
00298 void
00299 FlintPostlistChunkReader::next()
00300 {
00301 if (pos == end) {
00302 at_end = true;
00303 } else {
00304 read_did_increase(&pos, end, &did);
00305 read_wdf_and_length(&pos, end, &wdf, &doclength);
00306 }
00307 }
00308
00309 FlintPostlistChunkWriter::FlintPostlistChunkWriter(const string &orig_key_,
00310 bool is_first_chunk_,
00311 const string &tname_,
00312 bool is_last_chunk_)
00313 : orig_key(orig_key_),
00314 tname(tname_), is_first_chunk(is_first_chunk_),
00315 is_last_chunk(is_last_chunk_),
00316 started(false)
00317 {
00318 LOGCALL_VOID(DB, "FlintPostlistChunkWriter::FlintPostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
00319 }
00320
00321 void
00322 FlintPostlistChunkWriter::append(FlintTable * table, Xapian::docid did,
00323 Xapian::termcount wdf, flint_doclen_t doclen)
00324 {
00325 if (!started) {
00326 started = true;
00327 first_did = did;
00328 } else {
00329 Assert(did > current_did);
00330
00331 if (chunk.size() >= CHUNKSIZE) {
00332 bool save_is_last_chunk = is_last_chunk;
00333 is_last_chunk = false;
00334 flush(table);
00335 is_last_chunk = save_is_last_chunk;
00336 is_first_chunk = false;
00337 first_did = did;
00338 chunk.resize(0);
00339 orig_key = FlintPostListTable::make_key(tname, first_did);
00340 } else {
00341 chunk.append(F_pack_uint(did - current_did - 1));
00342 }
00343 }
00344 current_did = did;
00345 chunk.append(make_wdf_and_length(wdf, doclen));
00346 }
00347
00350 static inline string
00351 make_start_of_first_chunk(Xapian::doccount entries,
00352 Xapian::termcount collectionfreq,
00353 Xapian::docid new_did)
00354 {
00355 return F_pack_uint(entries) + F_pack_uint(collectionfreq) + F_pack_uint(new_did - 1);
00356 }
00357
00360 static inline string
00361 make_start_of_chunk(bool new_is_last_chunk,
00362 Xapian::docid new_first_did,
00363 Xapian::docid new_final_did)
00364 {
00365 Assert(new_final_did >= new_first_did);
00366 return F_pack_bool(new_is_last_chunk) +
00367 F_pack_uint(new_final_did - new_first_did - 1);
00368 }
00369
00370 void
00371 FlintPostlistChunkWriter::flush(FlintTable *table)
00372 {
00373 LOGCALL_VOID(DB, "FlintPostlistChunkWriter::flush", table);
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383 if (!started) {
00384
00385
00386
00387
00388
00389
00390
00391
00392 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): deleting chunk");
00393 Assert(!orig_key.empty());
00394 if (is_first_chunk) {
00395 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): deleting first chunk");
00396 if (is_last_chunk) {
00397
00398
00399
00400 table->del(orig_key);
00401 return;
00402 }
00403
00404
00405
00406
00407
00408
00409 AutoPtr<FlintCursor> cursor(table->cursor_get());
00410
00411 if (!cursor->find_entry(orig_key)) {
00412 throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
00413 }
00414
00415
00416
00417 Xapian::doccount num_ent;
00418 Xapian::termcount coll_freq;
00419 {
00420 cursor->read_tag();
00421 const char *tagpos = cursor->current_tag.data();
00422 const char *tagend = tagpos + cursor->current_tag.size();
00423
00424 (void)read_start_of_first_chunk(&tagpos, tagend,
00425 &num_ent, &coll_freq);
00426 }
00427
00428
00429 cursor->next();
00430 if (cursor->after_end()) {
00431 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00432 }
00433 const char *kpos = cursor->current_key.data();
00434 const char *kend = kpos + cursor->current_key.size();
00435 if (!check_tname_in_key(&kpos, kend, tname)) {
00436 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00437 }
00438
00439
00440 Xapian::docid new_first_did;
00441 if (!F_unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
00442 report_read_error(kpos);
00443 }
00444
00445 cursor->read_tag();
00446 const char *tagpos = cursor->current_tag.data();
00447 const char *tagend = tagpos + cursor->current_tag.size();
00448
00449
00450 bool new_is_last_chunk;
00451 Xapian::docid new_last_did_in_chunk =
00452 read_start_of_chunk(&tagpos, tagend, new_first_did,
00453 &new_is_last_chunk);
00454
00455 string chunk_data(tagpos, tagend);
00456
00457
00458 table->del(cursor->current_key);
00459
00460
00461 string tag;
00462 tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
00463 tag += make_start_of_chunk(new_is_last_chunk,
00464 new_first_did,
00465 new_last_did_in_chunk);
00466 tag += chunk_data;
00467 table->add(orig_key, tag);
00468 return;
00469 }
00470
00471 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): deleting secondary chunk");
00472
00473
00474
00475
00476
00477 table->del(orig_key);
00478
00479 if (is_last_chunk) {
00480 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): deleting secondary last chunk");
00481
00482 AutoPtr<FlintCursor> cursor(table->cursor_get());
00483
00484
00485
00486 if (cursor->find_entry(orig_key)) {
00487 throw Xapian::DatabaseCorruptError("Flint key not deleted as we expected");
00488 }
00489
00490 const char * keypos = cursor->current_key.data();
00491 const char * keyend = keypos + cursor->current_key.size();
00492 if (!check_tname_in_key(&keypos, keyend, tname)) {
00493 throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
00494 }
00495
00496 bool is_prev_first_chunk = (keypos == keyend);
00497
00498
00499 cursor->read_tag();
00500 string tag = cursor->current_tag;
00501
00502 const char *tagpos = tag.data();
00503 const char *tagend = tagpos + tag.size();
00504
00505
00506 Xapian::docid first_did_in_chunk;
00507 if (is_prev_first_chunk) {
00508 first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
00509 0, 0);
00510 } else {
00511 if (!F_unpack_uint_preserving_sort(&keypos, keyend,
00512 &first_did_in_chunk))
00513 report_read_error(keypos);
00514 }
00515 bool wrong_is_last_chunk;
00516 string::size_type start_of_chunk_header = tagpos - tag.data();
00517 Xapian::docid last_did_in_chunk =
00518 read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
00519 &wrong_is_last_chunk);
00520 string::size_type end_of_chunk_header = tagpos - tag.data();
00521
00522
00523 write_start_of_chunk(tag,
00524 start_of_chunk_header,
00525 end_of_chunk_header,
00526 true,
00527 first_did_in_chunk,
00528 last_did_in_chunk);
00529 table->add(cursor->current_key, tag);
00530 }
00531 } else {
00532 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): updating chunk which still has items in it");
00533
00534
00535
00536
00537
00538
00539 string tag;
00540
00541
00542
00543
00544 if (is_first_chunk) {
00545
00546
00547
00548 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
00549 string key = FlintPostListTable::make_key(tname);
00550 bool ok = table->get_exact_entry(key, tag);
00551 (void)ok;
00552 Assert(ok);
00553 Assert(!tag.empty());
00554
00555 Xapian::doccount num_ent;
00556 Xapian::termcount coll_freq;
00557 {
00558 const char * tagpos = tag.data();
00559 const char * tagend = tagpos + tag.size();
00560 (void)read_start_of_first_chunk(&tagpos, tagend,
00561 &num_ent, &coll_freq);
00562 }
00563
00564 tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
00565
00566 tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
00567 tag += chunk;
00568 table->add(key, tag);
00569 return;
00570 }
00571
00572 LOGLINE(DB, "FlintPostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584 const char *keypos = orig_key.data();
00585 const char *keyend = keypos + orig_key.size();
00586 if (!check_tname_in_key(&keypos, keyend, tname)) {
00587 throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
00588 }
00589 Xapian::docid initial_did;
00590 if (!F_unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
00591 report_read_error(keypos);
00592 }
00593 string new_key;
00594 if (initial_did != first_did) {
00595
00596
00597
00598
00599 new_key = FlintPostListTable::make_key(tname, first_did);
00600 table->del(orig_key);
00601 } else {
00602 new_key = orig_key;
00603 }
00604
00605
00606 tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
00607
00608 tag += chunk;
00609 table->add(new_key, tag);
00610 }
00611 }
00612
00617 void FlintPostList::read_number_of_entries(const char ** posptr,
00618 const char * end,
00619 Xapian::doccount * number_of_entries_ptr,
00620 Xapian::termcount * collection_freq_ptr)
00621 {
00622 if (!F_unpack_uint(posptr, end, number_of_entries_ptr))
00623 report_read_error(*posptr);
00624 if (!F_unpack_uint(posptr, end, collection_freq_ptr))
00625 report_read_error(*posptr);
00626 }
00627
00647 FlintPostList::FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_,
00648 const string & term_)
00649 : LeafPostList(term_),
00650 this_db(this_db_),
00651 have_started(false),
00652 cursor(this_db->postlist_table.cursor_get()),
00653 is_at_end(false)
00654 {
00655 LOGCALL_VOID(DB, "FlintPostList::FlintPostList", this_db_.get() | term_);
00656 string key = FlintPostListTable::make_key(term);
00657 int found = cursor->find_entry(key);
00658 if (!found) {
00659 number_of_entries = 0;
00660 is_at_end = true;
00661 pos = 0;
00662 end = 0;
00663 first_did_in_chunk = 0;
00664 last_did_in_chunk = 0;
00665 return;
00666 }
00667 cursor->read_tag();
00668 pos = cursor->current_tag.data();
00669 end = pos + cursor->current_tag.size();
00670
00671 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00672 first_did_in_chunk = did;
00673 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00674 &is_last_chunk);
00675 read_wdf_and_length(&pos, end, &wdf, &doclength);
00676 }
00677
00678 FlintPostList::~FlintPostList()
00679 {
00680 LOGCALL_VOID(DB, "FlintPostList::~FlintPostList", NO_ARGS);
00681 }
00682
00683 bool
00684 FlintPostList::next_in_chunk()
00685 {
00686 LOGCALL(DB, bool, "FlintPostList::next_in_chunk", NO_ARGS);
00687 if (pos == end) RETURN(false);
00688
00689 read_did_increase(&pos, end, &did);
00690 read_wdf_and_length(&pos, end, &wdf, &doclength);
00691
00692
00693 Assert(did <= last_did_in_chunk);
00694 Assert(did < last_did_in_chunk || pos == end);
00695 Assert(pos != end || did == last_did_in_chunk);
00696
00697 RETURN(true);
00698 }
00699
00700 void
00701 FlintPostList::next_chunk()
00702 {
00703 LOGCALL_VOID(DB, "FlintPostList::next_chunk", NO_ARGS);
00704 if (is_last_chunk) {
00705 is_at_end = true;
00706 return;
00707 }
00708
00709 cursor->next();
00710 if (cursor->after_end()) {
00711 is_at_end = true;
00712 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00713 term + "'");
00714 }
00715 const char * keypos = cursor->current_key.data();
00716 const char * keyend = keypos + cursor->current_key.size();
00717
00718 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00719 is_at_end = true;
00720 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00721 term + "'");
00722 }
00723
00724 Xapian::docid newdid;
00725 if (!F_unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
00726 report_read_error(keypos);
00727 }
00728 if (newdid <= did) {
00729 throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
00730 str(newdid) +
00731 ") is not greater than final document ID in previous chunk (" +
00732 str(did) + ")");
00733 }
00734 did = newdid;
00735
00736 cursor->read_tag();
00737 pos = cursor->current_tag.data();
00738 end = pos + cursor->current_tag.size();
00739
00740 first_did_in_chunk = did;
00741 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00742 &is_last_chunk);
00743 read_wdf_and_length(&pos, end, &wdf, &doclength);
00744 }
00745
00746 Xapian::termcount
00747 FlintPostList::get_doclength() const
00748 {
00749 LOGCALL(DB, Xapian::termcount, "FlintPostList::get_doclength", NO_ARGS);
00750 Assert(have_started);
00751 RETURN(static_cast<Xapian::termcount>(doclength));
00752 }
00753
00754 PositionList *
00755 FlintPostList::read_position_list()
00756 {
00757 LOGCALL(DB, PositionList *, "FlintPostList::read_position_list", NO_ARGS);
00758 positionlist.read_data(&this_db->position_table, did, term);
00759 RETURN(&positionlist);
00760 }
00761
00762 PositionList *
00763 FlintPostList::open_position_list() const
00764 {
00765 LOGCALL(DB, PositionList *, "FlintPostList::open_position_list", NO_ARGS);
00766 RETURN(new FlintPositionList(&this_db->position_table, did, term));
00767 }
00768
00769 PostList *
00770 FlintPostList::next(Xapian::weight w_min)
00771 {
00772 LOGCALL(DB, PostList *, "FlintPostList::next", w_min);
00773 (void)w_min;
00774
00775 if (!have_started) {
00776 have_started = true;
00777 } else {
00778 if (!next_in_chunk()) next_chunk();
00779 }
00780
00781 if (is_at_end) {
00782 LOGLINE(DB, "Moved to end");
00783 } else {
00784 LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf <<
00785 ", doclength = " << doclength);
00786 }
00787
00788 RETURN(NULL);
00789 }
00790
00791 bool
00792 FlintPostList::current_chunk_contains(Xapian::docid desired_did)
00793 {
00794 LOGCALL(DB, bool, "FlintPostList::current_chunk_contains", desired_did);
00795 if (desired_did >= first_did_in_chunk &&
00796 desired_did <= last_did_in_chunk) {
00797 RETURN(true);
00798 }
00799 RETURN(false);
00800 }
00801
00802 void
00803 FlintPostList::move_to_chunk_containing(Xapian::docid desired_did)
00804 {
00805 LOGCALL_VOID(DB, "FlintPostList::move_to_chunk_containing", desired_did);
00806 (void)cursor->find_entry(FlintPostListTable::make_key(term, desired_did));
00807 Assert(!cursor->after_end());
00808
00809 const char * keypos = cursor->current_key.data();
00810 const char * keyend = keypos + cursor->current_key.size();
00811
00812 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00813
00814 is_at_end = true;
00815 is_last_chunk = true;
00816 return;
00817 }
00818 is_at_end = false;
00819
00820 cursor->read_tag();
00821 pos = cursor->current_tag.data();
00822 end = pos + cursor->current_tag.size();
00823
00824 if (keypos == keyend) {
00825
00826 #ifdef XAPIAN_ASSERTIONS
00827 Xapian::doccount old_number_of_entries = number_of_entries;
00828 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00829 Assert(old_number_of_entries == number_of_entries);
00830 #else
00831 did = read_start_of_first_chunk(&pos, end, NULL, NULL);
00832 #endif
00833 } else {
00834
00835 if (!F_unpack_uint_preserving_sort(&keypos, keyend, &did)) {
00836 report_read_error(keypos);
00837 }
00838 }
00839
00840 first_did_in_chunk = did;
00841 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00842 &is_last_chunk);
00843 read_wdf_and_length(&pos, end, &wdf, &doclength);
00844
00845
00846
00847 if (desired_did > last_did_in_chunk) next_chunk();
00848 }
00849
00850 bool
00851 FlintPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
00852 {
00853 LOGCALL(DB, bool, "FlintPostList::move_forward_in_chunk_to_at_least", desired_did);
00854 if (desired_did > last_did_in_chunk) {
00855 pos = end;
00856 RETURN(false);
00857 }
00858 while (did < desired_did) {
00859
00860
00861 bool at_end_of_chunk = !next_in_chunk();
00862 if (at_end_of_chunk) RETURN(false);
00863 }
00864 RETURN(true);
00865 }
00866
00867 PostList *
00868 FlintPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
00869 {
00870 LOGCALL(DB, PostList *, "FlintPostList::skip_to", desired_did | w_min);
00871 (void)w_min;
00872
00873
00874 have_started = true;
00875
00876
00877 if (is_at_end || desired_did <= did) RETURN(NULL);
00878
00879
00880 if (!current_chunk_contains(desired_did)) {
00881 move_to_chunk_containing(desired_did);
00882
00883
00884 if (is_at_end) RETURN(NULL);
00885 }
00886
00887
00888 bool have_document = move_forward_in_chunk_to_at_least(desired_did);
00889 (void)have_document;
00890 Assert(have_document);
00891
00892 if (is_at_end) {
00893 LOGLINE(DB, "Skipped to end");
00894 } else {
00895 LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf <<
00896 ", doclength = " << doclength);
00897 }
00898
00899 RETURN(NULL);
00900 }
00901
00902 string
00903 FlintPostList::get_description() const
00904 {
00905 return term + ":" + str(number_of_entries);
00906 }
00907
00908
00909 Xapian::docid
00910 FlintPostListTable::get_chunk(const string &tname,
00911 Xapian::docid did, bool adding,
00912 FlintPostlistChunkReader ** from, FlintPostlistChunkWriter **to)
00913 {
00914
00915 string key = make_key(tname, did);
00916
00917
00918 AutoPtr<FlintCursor> cursor(cursor_get());
00919
00920 cursor->find_entry(key);
00921 Assert(!cursor->after_end());
00922
00923 const char * keypos = cursor->current_key.data();
00924 const char * keyend = keypos + cursor->current_key.size();
00925
00926 if (!check_tname_in_key(&keypos, keyend, tname)) {
00927
00928 if (!adding)
00929 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
00930
00931 *from = NULL;
00932 *to = new FlintPostlistChunkWriter(string(), true, tname, true);
00933 return Xapian::docid(-1);
00934 }
00935
00936
00937
00938 bool is_first_chunk = (keypos == keyend);
00939
00940 cursor->read_tag();
00941 const char * pos = cursor->current_tag.data();
00942 const char * end = pos + cursor->current_tag.size();
00943 Xapian::docid first_did_in_chunk;
00944 if (is_first_chunk) {
00945 first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
00946 } else {
00947 if (!F_unpack_uint_preserving_sort(&keypos, keyend,
00948 &first_did_in_chunk)) {
00949 report_read_error(keypos);
00950 }
00951 }
00952
00953 bool is_last_chunk;
00954 Xapian::docid last_did_in_chunk;
00955 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
00956 *to = new FlintPostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
00957 is_last_chunk);
00958 if (did > last_did_in_chunk) {
00959
00960
00961
00962 *from = NULL;
00963 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
00964 string(pos, end));
00965 } else {
00966 *from = new FlintPostlistChunkReader(first_did_in_chunk, string(pos, end));
00967 }
00968 if (is_last_chunk) return Xapian::docid(-1);
00969
00970
00971 cursor->next();
00972 if (cursor->after_end()) {
00973 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00974 }
00975 const char *kpos = cursor->current_key.data();
00976 const char *kend = kpos + cursor->current_key.size();
00977 if (!check_tname_in_key(&kpos, kend, tname)) {
00978 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00979 }
00980
00981
00982 Xapian::docid first_did_of_next_chunk;
00983 if (!F_unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
00984 report_read_error(kpos);
00985 }
00986 return first_did_of_next_chunk - 1;
00987 }
00988
00989 void
00990 FlintPostListTable::merge_changes(
00991 const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
00992 const map<Xapian::docid, Xapian::termcount> & doclens,
00993 const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
00994 {
00995 LOGCALL_VOID(DB, "FlintPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
00996 map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
00997 for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
00998 if (i->second.empty()) continue;
00999 string tname = i->first;
01000 {
01001
01002
01003 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
01004 Assert(deltas != freq_deltas.end());
01005
01006 string current_key = make_key(tname);
01007 string tag;
01008 (void)get_exact_entry(current_key, tag);
01009
01010
01011 const char *pos = tag.data();
01012 const char *end = pos + tag.size();
01013 Xapian::doccount termfreq;
01014 Xapian::termcount collfreq;
01015 Xapian::docid firstdid, lastdid;
01016 bool islast;
01017 if (pos == end) {
01018 termfreq = 0;
01019 collfreq = 0;
01020 firstdid = 0;
01021 lastdid = 0;
01022 islast = true;
01023 } else {
01024 firstdid = read_start_of_first_chunk(&pos, end,
01025 &termfreq, &collfreq);
01026
01027 lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
01028 }
01029
01030 termfreq += deltas->second.first;
01031 if (termfreq == 0) {
01032
01033
01034 if (islast) {
01035
01036 del(current_key);
01037 continue;
01038 }
01039 AutoPtr<FlintCursor> cursor(cursor_get());
01040 bool found = cursor->find_entry(current_key);
01041 Assert(found);
01042 if (!found) continue;
01043 while (cursor->del()) {
01044 const char *kpos = cursor->current_key.data();
01045 const char *kend = kpos + cursor->current_key.size();
01046 if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
01047 }
01048 continue;
01049 }
01050 collfreq += deltas->second.second;
01051
01052
01053 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
01054 newhdr += make_start_of_chunk(islast, firstdid, lastdid);
01055 if (pos == end) {
01056 add(current_key, newhdr);
01057 } else {
01058 Assert((size_t)(pos - tag.data()) <= tag.size());
01059 tag.replace(0, pos - tag.data(), newhdr);
01060 add(current_key, tag);
01061 }
01062 }
01063 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
01064 j = i->second.begin();
01065 Assert(j != i->second.end());
01066
01067 Xapian::docid max_did;
01068 FlintPostlistChunkReader *from;
01069 FlintPostlistChunkWriter *to;
01070 max_did = get_chunk(tname, j->first, j->second.first == 'A',
01071 &from, &to);
01072 for ( ; j != i->second.end(); ++j) {
01073 Xapian::docid did = j->first;
01074
01075 next_chunk:
01076 LOGLINE(DB, "Updating tname=" << tname << ", did=" << did);
01077 if (from) while (!from->is_at_end()) {
01078 Xapian::docid copy_did = from->get_docid();
01079 if (copy_did >= did) {
01080 if (copy_did == did) {
01081 Assert(j->second.first != 'A');
01082 from->next();
01083 }
01084 break;
01085 }
01086 to->append(this, copy_did,
01087 from->get_wdf(), from->get_doclength());
01088 from->next();
01089 }
01090 if ((!from || from->is_at_end()) && did > max_did) {
01091 delete from;
01092 to->flush(this);
01093 delete to;
01094 max_did = get_chunk(tname, did, false, &from, &to);
01095 goto next_chunk;
01096 }
01097
01098 if (j->second.first != 'D') {
01099 map<Xapian::docid, Xapian::termcount>::const_iterator k = doclens.find(did);
01100 Assert(k != doclens.end());
01101 Xapian::termcount new_doclen = k->second;
01102 Xapian::termcount new_wdf = j->second.second;
01103
01104 to->append(this, did, new_wdf, new_doclen);
01105 }
01106 }
01107
01108 if (from) {
01109 while (!from->is_at_end()) {
01110 to->append(this, from->get_docid(),
01111 from->get_wdf(), from->get_doclength());
01112 from->next();
01113 }
01114 delete from;
01115 }
01116 to->flush(this);
01117 delete to;
01118 }
01119 }