00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024 #include "flint_postlist.h"
00025
00026 #include "flint_cursor.h"
00027 #include "flint_database.h"
00028 #include "flint_utils.h"
00029 #include "noreturn.h"
00030 #include "omdebug.h"
00031 #include "utils.h"
00032
00033 Xapian::doccount
00034 FlintPostListTable::get_termfreq(const string & term) const
00035 {
00036 string key = make_key(term);
00037 string tag;
00038 if (!get_exact_entry(key, tag)) return 0;
00039
00040 Xapian::doccount termfreq;
00041 const char * p = tag.data();
00042 FlintPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
00043 return termfreq;
00044 }
00045
00046 Xapian::termcount
00047 FlintPostListTable::get_collection_freq(const string & term) const
00048 {
00049 string key = make_key(term);
00050 string tag;
00051 if (!get_exact_entry(key, tag)) return 0;
00052
00053 Xapian::termcount collfreq;
00054 const char * p = tag.data();
00055 FlintPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
00056 return collfreq;
00057 }
00058
00059
00060
00061
00062
00063
00064 const unsigned int CHUNKSIZE = 2000;
00065
00072 class PostlistChunkWriter {
00073 public:
00074 PostlistChunkWriter(const string &orig_key_,
00075 bool is_first_chunk_,
00076 const string &tname_,
00077 bool is_last_chunk_);
00078
00080 void append(FlintTable * table, Xapian::docid did,
00081 Xapian::termcount wdf, flint_doclen_t doclen);
00082
00084 void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
00085 const string & s) {
00086 Assert(!started);
00087 first_did = first_did_;
00088 current_did = current_did_;
00089 if (!s.empty()) {
00090 chunk.append(s);
00091 started = true;
00092 }
00093 }
00094
00099 void flush(FlintTable *table);
00100
00101 private:
00102 string orig_key;
00103 string tname;
00104 bool is_first_chunk;
00105 bool is_last_chunk;
00106 bool started;
00107
00108 Xapian::docid first_did;
00109 Xapian::docid current_did;
00110
00111 string chunk;
00112 };
00113
00114
00115
00117 XAPIAN_NORETURN(static void report_read_error(const char * position));
00118 static void report_read_error(const char * position)
00119 {
00120 if (position == 0) {
00121
00122 throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
00123 }
00124
00125 throw Xapian::RangeError("Value in posting list too large.");
00126 }
00127
00128 static inline bool get_tname_from_key(const char **src, const char *end,
00129 string &tname)
00130 {
00131 return unpack_string_preserving_sort(src, end, tname);
00132 }
00133
00134 static inline bool
00135 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
00136 {
00137 string tname_in_key;
00138
00139
00140 if (!get_tname_from_key(keypos, keyend, tname_in_key)) {
00141 report_read_error(*keypos);
00142 }
00143
00144
00145 return tname_in_key == tname;
00146 }
00147
00148 static inline bool
00149 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
00150 {
00151 if (*keypos == keyend) return false;
00152
00153 return check_tname_in_key_lite(keypos, keyend, tname);
00154 }
00155
00157 static Xapian::docid
00158 read_start_of_first_chunk(const char ** posptr,
00159 const char * end,
00160 Xapian::doccount * number_of_entries_ptr,
00161 Xapian::termcount * collection_freq_ptr)
00162 {
00163 DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk",
00164 (const void *)posptr << ", " <<
00165 (const void *)end << ", " <<
00166 (void *)number_of_entries_ptr << ", " <<
00167 (void *)collection_freq_ptr);
00168
00169 FlintPostList::read_number_of_entries(posptr, end,
00170 number_of_entries_ptr, collection_freq_ptr);
00171 if (number_of_entries_ptr)
00172 DEBUGLINE(DB, "number_of_entries = " << *number_of_entries_ptr);
00173 if (collection_freq_ptr)
00174 DEBUGLINE(DB, "collection_freq = " << *collection_freq_ptr);
00175
00176 Xapian::docid did;
00177
00178 if (!unpack_uint(posptr, end, &did))
00179 report_read_error(*posptr);
00180 ++did;
00181 DEBUGLINE(DB, "doc_id = " << did);
00182 RETURN(did);
00183 }
00184
00185 static inline void read_did_increase(const char ** posptr,
00186 const char * end,
00187 Xapian::docid * did_ptr)
00188 {
00189 Xapian::docid did_increase;
00190 if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
00191 *did_ptr += did_increase + 1;
00192 }
00193
00195 static inline void read_wdf_and_length(const char ** posptr,
00196 const char * end,
00197 Xapian::termcount * wdf_ptr,
00198 flint_doclen_t * doclength_ptr)
00199 {
00200 if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
00201 if (!unpack_uint(posptr, end, doclength_ptr)) report_read_error(*posptr);
00202 }
00203
00205 static Xapian::docid
00206 read_start_of_chunk(const char ** posptr,
00207 const char * end,
00208 Xapian::docid first_did_in_chunk,
00209 bool * is_last_chunk_ptr)
00210 {
00211 DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk",
00212 reinterpret_cast<const void*>(posptr) << ", " <<
00213 reinterpret_cast<const void*>(end) << ", " <<
00214 first_did_in_chunk << ", " <<
00215 reinterpret_cast<const void*>(is_last_chunk_ptr));
00216
00217
00218 if (!unpack_bool(posptr, end, is_last_chunk_ptr))
00219 report_read_error(*posptr);
00220 if (is_last_chunk_ptr)
00221 DEBUGLINE(DB, "is_last_chunk = " << *is_last_chunk_ptr);
00222
00223
00224 Xapian::docid increase_to_last;
00225 if (!unpack_uint(posptr, end, &increase_to_last))
00226 report_read_error(*posptr);
00227 ++increase_to_last;
00228 Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
00229 DEBUGLINE(DB, "last_did_in_chunk = " << last_did_in_chunk);
00230 RETURN(last_did_in_chunk);
00231 }
00232
00233 static string make_wdf_and_length(Xapian::termcount wdf, flint_doclen_t doclength)
00234 {
00235 return pack_uint(wdf) + pack_uint(doclength);
00236 }
00237
00238 static void write_start_of_chunk(string & chunk,
00239 unsigned int start_of_chunk_header,
00240 unsigned int end_of_chunk_header,
00241 bool is_last_chunk,
00242 Xapian::docid first_did_in_chunk,
00243 Xapian::docid last_did_in_chunk)
00244 {
00245 Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
00246 Assert(last_did_in_chunk >= first_did_in_chunk);
00247 Xapian::docid increase_to_last = last_did_in_chunk - first_did_in_chunk;
00248
00249 chunk.replace(start_of_chunk_header,
00250 end_of_chunk_header - start_of_chunk_header,
00251 pack_bool(is_last_chunk) + pack_uint(increase_to_last - 1));
00252
00253
00254
00255 }
00256
00261 class PostlistChunkReader {
00262 public:
00268 PostlistChunkReader(Xapian::docid first_did, const string & data_)
00269 : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
00270 {
00271 if (!at_end) read_wdf_and_length(&pos, end, &wdf, &doclength);
00272 }
00273
00274 Xapian::docid get_docid() const {
00275 return did;
00276 }
00277 Xapian::termcount get_wdf() const {
00278 return wdf;
00279 }
00280 flint_doclen_t get_doclength() const {
00281 DEBUGCALL(DB, flint_doclen_t, "PostlistChunkReader::get_doclength", "");
00282 RETURN(doclength);
00283 }
00284
00285 bool is_at_end() const {
00286 return at_end;
00287 }
00288
00291 void next();
00292
00293 private:
00294 string data;
00295
00296 const char *pos;
00297 const char *end;
00298
00299 bool at_end;
00300
00301 Xapian::docid did;
00302 Xapian::termcount wdf;
00303 flint_doclen_t doclength;
00304 };
00305
00306 void
00307 PostlistChunkReader::next()
00308 {
00309 if (pos == end) {
00310 at_end = true;
00311 } else {
00312 read_did_increase(&pos, end, &did);
00313 read_wdf_and_length(&pos, end, &wdf, &doclength);
00314 }
00315 }
00316
00317 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
00318 bool is_first_chunk_,
00319 const string &tname_,
00320 bool is_last_chunk_)
00321 : orig_key(orig_key_),
00322 tname(tname_), is_first_chunk(is_first_chunk_),
00323 is_last_chunk(is_last_chunk_),
00324 started(false)
00325 {
00326 DEBUGCALL(DB, void, "PostlistChunkWriter::PostlistChunkWriter",
00327 orig_key_ << ", " << is_first_chunk_ << ", " << tname_ << ", " <<
00328 is_last_chunk_);
00329 }
00330
00331 void
00332 PostlistChunkWriter::append(FlintTable * table, Xapian::docid did,
00333 Xapian::termcount wdf, flint_doclen_t doclen)
00334 {
00335 if (!started) {
00336 started = true;
00337 first_did = did;
00338 } else {
00339 Assert(did > current_did);
00340
00341 if (chunk.size() >= CHUNKSIZE) {
00342 bool save_is_last_chunk = is_last_chunk;
00343 is_last_chunk = false;
00344 flush(table);
00345 is_last_chunk = save_is_last_chunk;
00346 is_first_chunk = false;
00347 first_did = did;
00348 chunk = "";
00349 orig_key = FlintPostListTable::make_key(tname, first_did);
00350 } else {
00351 chunk.append(pack_uint(did - current_did - 1));
00352 }
00353 }
00354 current_did = did;
00355 chunk.append(make_wdf_and_length(wdf, doclen));
00356 }
00357
00360 static inline string
00361 make_start_of_first_chunk(Xapian::doccount entries,
00362 Xapian::termcount collectionfreq,
00363 Xapian::docid new_did)
00364 {
00365 return pack_uint(entries) + pack_uint(collectionfreq) + pack_uint(new_did - 1);
00366 }
00367
00370 static inline string
00371 make_start_of_chunk(bool new_is_last_chunk,
00372 Xapian::docid new_first_did,
00373 Xapian::docid new_final_did)
00374 {
00375 Assert(new_final_did >= new_first_did);
00376 return pack_bool(new_is_last_chunk) +
00377 pack_uint(new_final_did - new_first_did - 1);
00378 }
00379
00380 void
00381 PostlistChunkWriter::flush(FlintTable *table)
00382 {
00383 DEBUGCALL(DB, void, "PostlistChunkWriter::flush", table);
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 if (!started) {
00394
00395
00396
00397
00398
00399
00400
00401
00402 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
00403 Assert(!orig_key.empty());
00404 if (is_first_chunk) {
00405 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
00406 if (is_last_chunk) {
00407
00408
00409
00410 table->del(orig_key);
00411 return;
00412 }
00413
00414
00415
00416
00417
00418
00419 AutoPtr<FlintCursor> cursor(table->cursor_get());
00420
00421 if (!cursor->find_entry(orig_key)) {
00422 throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
00423 }
00424
00425
00426
00427 Xapian::doccount num_ent;
00428 Xapian::termcount coll_freq;
00429 {
00430 cursor->read_tag();
00431 const char *tagpos = cursor->current_tag.data();
00432 const char *tagend = tagpos + cursor->current_tag.size();
00433
00434 (void)read_start_of_first_chunk(&tagpos, tagend,
00435 &num_ent, &coll_freq);
00436 }
00437
00438
00439 cursor->next();
00440 if (cursor->after_end()) {
00441 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00442 }
00443 const char *kpos = cursor->current_key.data();
00444 const char *kend = kpos + cursor->current_key.size();
00445 if (!check_tname_in_key(&kpos, kend, tname)) {
00446 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00447 }
00448
00449
00450 Xapian::docid new_first_did;
00451 if (!unpack_uint_preserving_sort(&kpos, kend,
00452 &new_first_did)) {
00453 report_read_error(kpos);
00454 }
00455
00456 cursor->read_tag();
00457 const char *tagpos = cursor->current_tag.data();
00458 const char *tagend = tagpos + cursor->current_tag.size();
00459
00460
00461 bool new_is_last_chunk;
00462 Xapian::docid new_last_did_in_chunk =
00463 read_start_of_chunk(&tagpos, tagend, new_first_did,
00464 &new_is_last_chunk);
00465
00466 string chunk_data(tagpos, tagend);
00467
00468
00469 table->del(cursor->current_key);
00470
00471
00472 string tag;
00473 tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
00474 tag += make_start_of_chunk(new_is_last_chunk,
00475 new_first_did,
00476 new_last_did_in_chunk);
00477 tag += chunk_data;
00478 table->add(orig_key, tag);
00479 return;
00480 }
00481
00482 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
00483
00484
00485
00486
00487
00488 table->del(orig_key);
00489
00490 if (is_last_chunk) {
00491 DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
00492
00493 AutoPtr<FlintCursor> cursor(table->cursor_get());
00494
00495
00496
00497 if (cursor->find_entry(orig_key)) {
00498 throw Xapian::DatabaseCorruptError("Flint key not deleted as we expected");
00499 }
00500
00501 const char * keypos = cursor->current_key.data();
00502 const char * keyend = keypos + cursor->current_key.size();
00503 if (!check_tname_in_key(&keypos, keyend, tname)) {
00504 throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
00505 }
00506
00507 bool is_prev_first_chunk = (keypos == keyend);
00508
00509
00510 cursor->read_tag();
00511 string tag = cursor->current_tag;
00512
00513 const char *tagpos = tag.data();
00514 const char *tagend = tagpos + tag.size();
00515
00516
00517 Xapian::docid first_did_in_chunk;
00518 if (is_prev_first_chunk) {
00519 first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
00520 0, 0);
00521 } else {
00522 if (!unpack_uint_preserving_sort(&keypos, keyend,
00523 &first_did_in_chunk))
00524 report_read_error(keypos);
00525 }
00526 bool wrong_is_last_chunk;
00527 string::size_type start_of_chunk_header = tagpos - tag.data();
00528 Xapian::docid last_did_in_chunk =
00529 read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
00530 &wrong_is_last_chunk);
00531 string::size_type end_of_chunk_header = tagpos - tag.data();
00532
00533
00534 write_start_of_chunk(tag,
00535 start_of_chunk_header,
00536 end_of_chunk_header,
00537 true,
00538 first_did_in_chunk,
00539 last_did_in_chunk);
00540 table->add(cursor->current_key, tag);
00541 }
00542 } else {
00543 DEBUGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
00544
00545
00546
00547
00548
00549
00550 string tag;
00551
00552
00553
00554
00555 if (is_first_chunk) {
00556
00557
00558
00559 DEBUGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
00560 string key = FlintPostListTable::make_key(tname);
00561 bool ok = table->get_exact_entry(key, tag);
00562 (void)ok;
00563 Assert(ok);
00564 Assert(!tag.empty());
00565
00566 Xapian::doccount num_ent;
00567 Xapian::termcount coll_freq;
00568 {
00569 const char * tagpos = tag.data();
00570 const char * tagend = tagpos + tag.size();
00571 (void)read_start_of_first_chunk(&tagpos, tagend,
00572 &num_ent, &coll_freq);
00573 }
00574
00575 tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
00576
00577 tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
00578 tag += chunk;
00579 table->add(key, tag);
00580 return;
00581 }
00582
00583 DEBUGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595 const char *keypos = orig_key.data();
00596 const char *keyend = keypos + orig_key.size();
00597 if (!check_tname_in_key(&keypos, keyend, tname)) {
00598 throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
00599 }
00600 Xapian::docid initial_did;
00601 if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
00602 report_read_error(keypos);
00603 }
00604 string new_key;
00605 if (initial_did != first_did) {
00606
00607
00608
00609
00610 new_key = FlintPostListTable::make_key(tname, first_did);
00611 table->del(orig_key);
00612 } else {
00613 new_key = orig_key;
00614 }
00615
00616
00617 tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
00618
00619 tag += chunk;
00620 table->add(new_key, tag);
00621 }
00622 }
00623
00628 void FlintPostList::read_number_of_entries(const char ** posptr,
00629 const char * end,
00630 Xapian::doccount * number_of_entries_ptr,
00631 Xapian::termcount * collection_freq_ptr)
00632 {
00633 if (!unpack_uint(posptr, end, number_of_entries_ptr))
00634 report_read_error(*posptr);
00635 if (!unpack_uint(posptr, end, collection_freq_ptr))
00636 report_read_error(*posptr);
00637 }
00638
00658 FlintPostList::FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_,
00659 const string & term_)
00660 : this_db(this_db_),
00661 term(term_),
00662 have_started(false),
00663 cursor(this_db->postlist_table.cursor_get()),
00664 is_at_end(false)
00665 {
00666 DEBUGCALL(DB, void, "FlintPostList::FlintPostList",
00667 this_db_.get() << ", " << term_);
00668 string key = FlintPostListTable::make_key(term);
00669 int found = cursor->find_entry(key);
00670 if (!found) {
00671 number_of_entries = 0;
00672 is_at_end = true;
00673 pos = 0;
00674 end = 0;
00675 first_did_in_chunk = 0;
00676 last_did_in_chunk = 0;
00677 return;
00678 }
00679 cursor->read_tag();
00680 pos = cursor->current_tag.data();
00681 end = pos + cursor->current_tag.size();
00682
00683 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00684 first_did_in_chunk = did;
00685 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00686 &is_last_chunk);
00687 read_wdf_and_length(&pos, end, &wdf, &doclength);
00688 }
00689
00690 FlintPostList::~FlintPostList()
00691 {
00692 DEBUGCALL(DB, void, "FlintPostList::~FlintPostList", "");
00693 }
00694
00695 bool
00696 FlintPostList::next_in_chunk()
00697 {
00698 DEBUGCALL(DB, bool, "FlintPostList::next_in_chunk", "");
00699 if (pos == end) RETURN(false);
00700
00701 read_did_increase(&pos, end, &did);
00702 read_wdf_and_length(&pos, end, &wdf, &doclength);
00703
00704
00705 Assert(did <= last_did_in_chunk);
00706 Assert(did < last_did_in_chunk || pos == end);
00707 Assert(pos != end || did == last_did_in_chunk);
00708
00709 RETURN(true);
00710 }
00711
00712 void
00713 FlintPostList::next_chunk()
00714 {
00715 DEBUGCALL(DB, void, "FlintPostList::next_chunk", "");
00716 if (is_last_chunk) {
00717 is_at_end = true;
00718 return;
00719 }
00720
00721 cursor->next();
00722 if (cursor->after_end()) {
00723 is_at_end = true;
00724 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00725 term + "'");
00726 }
00727 const char * keypos = cursor->current_key.data();
00728 const char * keyend = keypos + cursor->current_key.size();
00729
00730 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00731 is_at_end = true;
00732 throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
00733 term + "'");
00734 }
00735
00736 Xapian::docid newdid;
00737 if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
00738 report_read_error(keypos);
00739 }
00740 if (newdid <= did) {
00741 throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
00742 om_tostring(newdid) +
00743 ") is not greater than final document ID in previous chunk (" +
00744 om_tostring(did) + ")");
00745 }
00746 did = newdid;
00747
00748 cursor->read_tag();
00749 pos = cursor->current_tag.data();
00750 end = pos + cursor->current_tag.size();
00751
00752 first_did_in_chunk = did;
00753 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00754 &is_last_chunk);
00755 read_wdf_and_length(&pos, end, &wdf, &doclength);
00756 }
00757
00758 PositionList *
00759 FlintPostList::read_position_list()
00760 {
00761 DEBUGCALL(DB, PositionList *, "FlintPostList::read_position_list", "");
00762 positionlist.read_data(&this_db->position_table, did, term);
00763 RETURN(&positionlist);
00764 }
00765
00766 PositionList *
00767 FlintPostList::open_position_list() const
00768 {
00769 DEBUGCALL(DB, PositionList *, "FlintPostList::open_position_list", "");
00770 RETURN(new FlintPositionList(&this_db->position_table, did, term));
00771 }
00772
00773 PostList *
00774 FlintPostList::next(Xapian::weight w_min)
00775 {
00776 DEBUGCALL(DB, PostList *, "FlintPostList::next", w_min);
00777 (void)w_min;
00778
00779 if (!have_started) {
00780 have_started = true;
00781 } else {
00782 if (!next_in_chunk()) next_chunk();
00783 }
00784
00785 DEBUGLINE(DB, string("Moved to ") <<
00786 (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
00787 om_tostring(did) + ", " + om_tostring(wdf) + ", " +
00788 om_tostring(doclength)));
00789
00790 RETURN(NULL);
00791 }
00792
00793 bool
00794 FlintPostList::current_chunk_contains(Xapian::docid desired_did)
00795 {
00796 DEBUGCALL(DB, bool, "FlintPostList::current_chunk_contains", desired_did);
00797 if (desired_did >= first_did_in_chunk &&
00798 desired_did <= last_did_in_chunk) {
00799 RETURN(true);
00800 }
00801 RETURN(false);
00802 }
00803
00804 void
00805 FlintPostList::move_to_chunk_containing(Xapian::docid desired_did)
00806 {
00807 DEBUGCALL(DB, void,
00808 "FlintPostList::move_to_chunk_containing", desired_did);
00809 (void)cursor->find_entry(FlintPostListTable::make_key(term, desired_did));
00810 Assert(!cursor->after_end());
00811
00812 const char * keypos = cursor->current_key.data();
00813 const char * keyend = keypos + cursor->current_key.size();
00814
00815 if (!check_tname_in_key_lite(&keypos, keyend, term)) {
00816
00817 is_at_end = true;
00818 is_last_chunk = true;
00819 return;
00820 }
00821 is_at_end = false;
00822
00823 cursor->read_tag();
00824 pos = cursor->current_tag.data();
00825 end = pos + cursor->current_tag.size();
00826
00827 if (keypos == keyend) {
00828
00829 #ifdef XAPIAN_DEBUG
00830 Xapian::doccount old_number_of_entries = number_of_entries;
00831 did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
00832 Assert(old_number_of_entries == number_of_entries);
00833 #else
00834 did = read_start_of_first_chunk(&pos, end, NULL, NULL);
00835 #endif
00836 } else {
00837
00838 if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
00839 report_read_error(keypos);
00840 }
00841 }
00842
00843 first_did_in_chunk = did;
00844 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
00845 &is_last_chunk);
00846 read_wdf_and_length(&pos, end, &wdf, &doclength);
00847
00848
00849
00850 if (desired_did > last_did_in_chunk) next_chunk();
00851 }
00852
00853 bool
00854 FlintPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
00855 {
00856 DEBUGCALL(DB, bool,
00857 "FlintPostList::move_forward_in_chunk_to_at_least", desired_did);
00858 if (desired_did > last_did_in_chunk) {
00859 pos = end;
00860 RETURN(false);
00861 }
00862 while (did < desired_did) {
00863
00864
00865 bool at_end_of_chunk = !next_in_chunk();
00866 if (at_end_of_chunk) RETURN(false);
00867 }
00868 RETURN(true);
00869 }
00870
00871 PostList *
00872 FlintPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
00873 {
00874 DEBUGCALL(DB, PostList *,
00875 "FlintPostList::skip_to", desired_did << ", " << w_min);
00876 (void)w_min;
00877
00878
00879 have_started = true;
00880
00881
00882 if (is_at_end || desired_did <= did) RETURN(NULL);
00883
00884
00885 if (!current_chunk_contains(desired_did)) {
00886 move_to_chunk_containing(desired_did);
00887
00888
00889 if (is_at_end) RETURN(NULL);
00890 }
00891
00892
00893 bool have_document = move_forward_in_chunk_to_at_least(desired_did);
00894 #ifdef XAPIAN_DEBUG
00895 Assert(have_document);
00896 #else
00897 (void)have_document;
00898 #endif
00899
00900 DEBUGLINE(DB, string("Skipped to ") <<
00901 (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
00902 om_tostring(did) + ", " + om_tostring(wdf) + ", " +
00903 om_tostring(doclength) + "."));
00904
00905 RETURN(NULL);
00906 }
00907
00908 string
00909 FlintPostList::get_description() const
00910 {
00911 return term + ":" + om_tostring(number_of_entries);
00912 }
00913
00914
00915 Xapian::docid
00916 FlintPostListTable::get_chunk(const string &tname,
00917 Xapian::docid did, bool adding,
00918 PostlistChunkReader ** from, PostlistChunkWriter **to)
00919 {
00920
00921 string key = make_key(tname, did);
00922
00923
00924 AutoPtr<FlintCursor> cursor(cursor_get());
00925
00926 cursor->find_entry(key);
00927 Assert(!cursor->after_end());
00928
00929 const char * keypos = cursor->current_key.data();
00930 const char * keyend = keypos + cursor->current_key.size();
00931
00932 if (!check_tname_in_key(&keypos, keyend, tname)) {
00933
00934 if (!adding)
00935 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
00936
00937 *from = NULL;
00938 *to = new PostlistChunkWriter("", true, tname, true);
00939 return Xapian::docid(-1);
00940 }
00941
00942
00943
00944 bool is_first_chunk = (keypos == keyend);
00945
00946 cursor->read_tag();
00947 const char * pos = cursor->current_tag.data();
00948 const char * end = pos + cursor->current_tag.size();
00949 Xapian::docid first_did_in_chunk;
00950 if (is_first_chunk) {
00951 first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
00952 } else {
00953 if (!unpack_uint_preserving_sort(&keypos, keyend,
00954 &first_did_in_chunk)) {
00955 report_read_error(keypos);
00956 }
00957 }
00958
00959 bool is_last_chunk;
00960 Xapian::docid last_did_in_chunk;
00961 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
00962 *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
00963 is_last_chunk);
00964 if (did > last_did_in_chunk) {
00965
00966
00967
00968 *from = NULL;
00969 (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
00970 string(pos, end));
00971 } else {
00972 *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
00973 }
00974 if (is_last_chunk) return Xapian::docid(-1);
00975
00976
00977 cursor->next();
00978 if (cursor->after_end()) {
00979 throw Xapian::DatabaseCorruptError("Expected another key but found none");
00980 }
00981 const char *kpos = cursor->current_key.data();
00982 const char *kend = kpos + cursor->current_key.size();
00983 if (!check_tname_in_key(&kpos, kend, tname)) {
00984 throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
00985 }
00986
00987
00988 Xapian::docid first_did_of_next_chunk;
00989 if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
00990 report_read_error(kpos);
00991 }
00992 return first_did_of_next_chunk - 1;
00993 }
00994
00995 void
00996 FlintPostListTable::merge_changes(
00997 const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
00998 const map<Xapian::docid, Xapian::termcount> & doclens,
00999 const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
01000 {
01001 DEBUGCALL(DB, void, "FlintPostListTable::merge_changes", "mod_plists, doclens, freq_deltas");
01002 map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
01003 for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
01004 if (i->second.empty()) continue;
01005 string tname = i->first;
01006 {
01007
01008
01009 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
01010 Assert(deltas != freq_deltas.end());
01011
01012 string current_key = make_key(tname);
01013 string tag;
01014 (void)get_exact_entry(current_key, tag);
01015
01016
01017 const char *pos = tag.data();
01018 const char *end = pos + tag.size();
01019 Xapian::doccount termfreq;
01020 Xapian::termcount collfreq;
01021 Xapian::docid firstdid, lastdid;
01022 bool islast;
01023 if (pos == end) {
01024 termfreq = 0;
01025 collfreq = 0;
01026 firstdid = 0;
01027 lastdid = 0;
01028 islast = true;
01029 } else {
01030 firstdid = read_start_of_first_chunk(&pos, end,
01031 &termfreq, &collfreq);
01032
01033 lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
01034 }
01035
01036 termfreq += deltas->second.first;
01037 if (termfreq == 0) {
01038
01039
01040 if (islast) {
01041
01042 del(current_key);
01043 continue;
01044 }
01045 AutoPtr<FlintCursor> cursor(cursor_get());
01046 bool found = cursor->find_entry(current_key);
01047 Assert(found);
01048 if (!found) continue;
01049 while (cursor->del()) {
01050 const char *kpos = cursor->current_key.data();
01051 const char *kend = kpos + cursor->current_key.size();
01052 if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
01053 }
01054 continue;
01055 }
01056 collfreq += deltas->second.second;
01057
01058
01059 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
01060 newhdr += make_start_of_chunk(islast, firstdid, lastdid);
01061 if (pos == end) {
01062 add(current_key, newhdr);
01063 } else {
01064 Assert((size_t)(pos - tag.data()) <= tag.size());
01065 tag.replace(0, pos - tag.data(), newhdr);
01066 add(current_key, tag);
01067 }
01068 }
01069 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
01070 j = i->second.begin();
01071 Assert(j != i->second.end());
01072
01073 Xapian::docid max_did;
01074 PostlistChunkReader *from;
01075 PostlistChunkWriter *to;
01076 max_did = get_chunk(tname, j->first, j->second.first == 'A',
01077 &from, &to);
01078 for ( ; j != i->second.end(); ++j) {
01079 Xapian::docid did = j->first;
01080
01081 next_chunk:
01082 DEBUGLINE(DB, "Updating tname=" << tname << ", did=" << did);
01083 if (from) while (!from->is_at_end()) {
01084 Xapian::docid copy_did = from->get_docid();
01085 if (copy_did >= did) {
01086 if (copy_did == did) {
01087 Assert(j->second.first != 'A');
01088 from->next();
01089 }
01090 break;
01091 }
01092 to->append(this, copy_did,
01093 from->get_wdf(), from->get_doclength());
01094 from->next();
01095 }
01096 if ((!from || from->is_at_end()) && did > max_did) {
01097 delete from;
01098 to->flush(this);
01099 delete to;
01100 max_did = get_chunk(tname, did, false, &from, &to);
01101 goto next_chunk;
01102 }
01103
01104 if (j->second.first != 'D') {
01105 map<Xapian::docid, Xapian::termcount>::const_iterator k = doclens.find(did);
01106 Assert(k != doclens.end());
01107 Xapian::termcount new_doclen = k->second;
01108 Xapian::termcount new_wdf = j->second.second;
01109
01110 to->append(this, did, new_wdf, new_doclen);
01111 }
01112 }
01113
01114 if (from) {
01115 while (!from->is_at_end()) {
01116 to->append(this, from->get_docid(),
01117 from->get_wdf(), from->get_doclength());
01118 from->next();
01119 }
01120 delete from;
01121 }
01122 to->flush(this);
01123 delete to;
01124 }
01125 }