xapian-core  1.4.21
chert_postlist.cc
Go to the documentation of this file.
1 /* chert_postlist.cc: Postlists in a chert database
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2014,2015 Olly Betts
5  * Copyright 2007,2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "chert_postlist.h"
26 
27 #include "chert_cursor.h"
28 #include "chert_database.h"
29 #include "debuglog.h"
30 #include "noreturn.h"
31 #include "pack.h"
32 #include "str.h"
33 
35 
36 void
37 ChertPostListTable::get_freqs(const string & term,
38  Xapian::doccount * termfreq_ptr,
39  Xapian::termcount * collfreq_ptr) const
40 {
41  string key = make_key(term);
42  string tag;
43  if (!get_exact_entry(key, tag)) {
44  if (termfreq_ptr)
45  *termfreq_ptr = 0;
46  if (collfreq_ptr)
47  *collfreq_ptr = 0;
48  } else {
49  const char * p = tag.data();
50  ChertPostList::read_number_of_entries(&p, p + tag.size(),
51  termfreq_ptr, collfreq_ptr);
52  }
53 }
54 
58  if (!doclen_pl.get()) {
59  // Don't keep a reference back to the database, since this
60  // would make a reference loop.
61  doclen_pl.reset(new ChertPostList(db, string(), false));
62  }
63  if (!doclen_pl->jump_to(did))
64  throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
65  return doclen_pl->get_wdf();
66 }
67 
68 bool
71 {
72  if (!doclen_pl.get()) {
73  // Don't keep a reference back to the database, since this
74  // would make a reference loop.
75  doclen_pl.reset(new ChertPostList(db, string(), false));
76  }
77  return (doclen_pl->jump_to(did));
78 }
79 
80 // How big should chunks in the posting list be? (They
81 // will grow slightly bigger than this, but not more than a
82 // few bytes extra) - FIXME: tune this value to try to
83 // maximise how well blocks are used. Or performance.
84 // Or indexing speed. Or something...
85 const unsigned int CHUNKSIZE = 2000;
86 
94  public:
95  PostlistChunkWriter(const string &orig_key_,
96  bool is_first_chunk_,
97  const string &tname_,
98  bool is_last_chunk_);
99 
101  void append(ChertTable * table, Xapian::docid did,
102  Xapian::termcount wdf);
103 
105  void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
106  const string & s) {
107  Assert(!started);
108  first_did = first_did_;
109  current_did = current_did_;
110  if (!s.empty()) {
111  chunk.append(s);
112  started = true;
113  }
114  }
115 
120  void flush(ChertTable *table);
121 
122  private:
123  string orig_key;
124  string tname;
127  bool started;
128 
131 
132  string chunk;
133 };
134 
136 
137 // Static functions
138 
140 XAPIAN_NORETURN(static void report_read_error(const char * position));
141 static void report_read_error(const char * position)
142 {
143  if (position == 0) {
144  // data ran out
145  LOGLINE(DB, "ChertPostList data ran out");
146  throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
147  }
148  // overflow
149  LOGLINE(DB, "ChertPostList value too large");
150  throw Xapian::RangeError("Value in posting list too large.");
151 }
152 
153 static inline bool get_tname_from_key(const char **src, const char *end,
154  string &tname)
155 {
156  return unpack_string_preserving_sort(src, end, tname);
157 }
158 
159 static inline bool
160 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
161 {
162  string tname_in_key;
163 
164  if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
165  *keypos += 2;
166  } else {
167  // Read the termname.
168  if (!get_tname_from_key(keypos, keyend, tname_in_key))
169  report_read_error(*keypos);
170  }
171 
172  // This should only fail if the postlist doesn't exist at all.
173  return tname_in_key == tname;
174 }
175 
176 static inline bool
177 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
178 {
179  if (*keypos == keyend) return false;
180 
181  return check_tname_in_key_lite(keypos, keyend, tname);
182 }
183 
185 static Xapian::docid
186 read_start_of_first_chunk(const char ** posptr,
187  const char * end,
188  Xapian::doccount * number_of_entries_ptr,
189  Xapian::termcount * collection_freq_ptr)
190 {
191  LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
192 
194  number_of_entries_ptr, collection_freq_ptr);
195  if (number_of_entries_ptr)
196  LOGVALUE(DB, *number_of_entries_ptr);
197  if (collection_freq_ptr)
198  LOGVALUE(DB, *collection_freq_ptr);
199 
200  Xapian::docid did;
201  // Read the docid of the first entry in the posting list.
202  if (!unpack_uint(posptr, end, &did))
203  report_read_error(*posptr);
204  ++did;
205  LOGVALUE(DB, did);
206  RETURN(did);
207 }
208 
209 static inline void
210 read_did_increase(const char ** posptr, const char * end,
211  Xapian::docid * did_ptr)
212 {
213  Xapian::docid did_increase;
214  if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
215  *did_ptr += did_increase + 1;
216 }
217 
219 static inline void
220 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
221 {
222  if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
223 }
224 
226 static Xapian::docid
227 read_start_of_chunk(const char ** posptr,
228  const char * end,
229  Xapian::docid first_did_in_chunk,
230  bool * is_last_chunk_ptr)
231 {
232  LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
233  Assert(is_last_chunk_ptr);
234 
235  // Read whether this is the last chunk
236  if (!unpack_bool(posptr, end, is_last_chunk_ptr))
237  report_read_error(*posptr);
238  LOGVALUE(DB, *is_last_chunk_ptr);
239 
240  // Read what the final document ID in this chunk is.
241  Xapian::docid increase_to_last;
242  if (!unpack_uint(posptr, end, &increase_to_last))
243  report_read_error(*posptr);
244  Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
245  LOGVALUE(DB, last_did_in_chunk);
246  RETURN(last_did_in_chunk);
247 }
248 
254  string data;
255 
256  const char *pos;
257  const char *end;
258 
259  bool at_end;
260 
263 
264  public:
270  PostlistChunkReader(Xapian::docid first_did, const string & data_)
271  : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
272  {
273  if (!at_end) read_wdf(&pos, end, &wdf);
274  }
275 
277  return did;
278  }
280  return wdf;
281  }
282 
283  bool is_at_end() const {
284  return at_end;
285  }
286 
289  void next();
290 };
291 
293 
294 void
295 PostlistChunkReader::next()
296 {
297  if (pos == end) {
298  at_end = true;
299  } else {
300  read_did_increase(&pos, end, &did);
301  read_wdf(&pos, end, &wdf);
302  }
303 }
304 
305 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
306  bool is_first_chunk_,
307  const string &tname_,
308  bool is_last_chunk_)
309  : orig_key(orig_key_),
310  tname(tname_), is_first_chunk(is_first_chunk_),
311  is_last_chunk(is_last_chunk_),
312  started(false)
313 {
314  LOGCALL_CTOR(DB, "PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
315 }
316 
317 void
319  Xapian::termcount wdf)
320 {
321  if (!started) {
322  started = true;
323  first_did = did;
324  } else {
325  Assert(did > current_did);
326  // Start a new chunk if this one has grown to the threshold.
327  if (chunk.size() >= CHUNKSIZE) {
328  bool save_is_last_chunk = is_last_chunk;
329  is_last_chunk = false;
330  flush(table);
331  is_last_chunk = save_is_last_chunk;
332  is_first_chunk = false;
333  first_did = did;
334  chunk.resize(0);
336  } else {
337  pack_uint(chunk, did - current_did - 1);
338  }
339  }
340  current_did = did;
341  pack_uint(chunk, wdf);
342 }
343 
346 static inline string
348  Xapian::termcount collectionfreq,
349  Xapian::docid new_did)
350 {
351  string chunk;
352  pack_uint(chunk, entries);
353  pack_uint(chunk, collectionfreq);
354  pack_uint(chunk, new_did - 1);
355  return chunk;
356 }
357 
360 static inline string
361 make_start_of_chunk(bool new_is_last_chunk,
362  Xapian::docid new_first_did,
363  Xapian::docid new_final_did)
364 {
365  Assert(new_final_did >= new_first_did);
366  string chunk;
367  pack_bool(chunk, new_is_last_chunk);
368  pack_uint(chunk, new_final_did - new_first_did);
369  return chunk;
370 }
371 
372 static void
374  unsigned int start_of_chunk_header,
375  unsigned int end_of_chunk_header,
376  bool is_last_chunk,
377  Xapian::docid first_did_in_chunk,
378  Xapian::docid last_did_in_chunk)
379 {
380  Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
381 
382  chunk.replace(start_of_chunk_header,
383  end_of_chunk_header - start_of_chunk_header,
384  make_start_of_chunk(is_last_chunk, first_did_in_chunk,
385  last_did_in_chunk));
386 }
387 
388 void
390 {
391  LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
392 
393  /* This is one of the more messy parts involved with updating posting
394  * list chunks.
395  *
396  * Depending on circumstances, we may have to delete an entire chunk
397  * or file it under a different key, as well as possibly modifying both
398  * the previous and next chunk of the postlist.
399  */
400 
401  if (!started) {
402  /* This chunk is now empty so disappears entirely.
403  *
404  * If this was the last chunk, then the previous chunk
405  * must have its "is_last_chunk" flag updated.
406  *
407  * If this was the first chunk, then the next chunk must
408  * be transformed into the first chunk. Messy!
409  */
410  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
411  Assert(!orig_key.empty());
412  if (is_first_chunk) {
413  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
414  if (is_last_chunk) {
415  /* This is the first and the last chunk, ie the only
416  * chunk, so just delete the tag.
417  */
418  table->del(orig_key);
419  return;
420  }
421 
422  /* This is the messiest case. The first chunk is to
423  * be removed, and there is at least one chunk after
424  * it. Need to rewrite the next chunk as the first
425  * chunk.
426  */
427  AutoPtr<ChertCursor> cursor(table->cursor_get());
428 
429  if (!cursor->find_entry(orig_key)) {
430  throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
431  }
432 
433  // FIXME: Currently the doclen list has a special first chunk too,
434  // which reduces special casing here. The downside is a slightly
435  // larger than necessary first chunk and needless fiddling if the
436  // first chunk is deleted. But really we should look at
437  // redesigning the whole postlist format with an eye to making it
438  // easier to update!
439 
440  // Extract existing counts from the first chunk so we can reinsert
441  // them into the block we're renaming.
442  Xapian::doccount num_ent;
443  Xapian::termcount coll_freq;
444  {
445  cursor->read_tag();
446  const char *tagpos = cursor->current_tag.data();
447  const char *tagend = tagpos + cursor->current_tag.size();
448 
449  (void)read_start_of_first_chunk(&tagpos, tagend,
450  &num_ent, &coll_freq);
451  }
452 
453  // Seek to the next chunk.
454  cursor->next();
455  if (cursor->after_end()) {
456  throw Xapian::DatabaseCorruptError("Expected another key but found none");
457  }
458  const char *kpos = cursor->current_key.data();
459  const char *kend = kpos + cursor->current_key.size();
460  if (!check_tname_in_key(&kpos, kend, tname)) {
461  throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
462  }
463 
464  // Read the new first docid
465  Xapian::docid new_first_did;
466  if (!C_unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
467  report_read_error(kpos);
468  }
469 
470  cursor->read_tag();
471  const char *tagpos = cursor->current_tag.data();
472  const char *tagend = tagpos + cursor->current_tag.size();
473 
474  // Read the chunk header
475  bool new_is_last_chunk;
476  Xapian::docid new_last_did_in_chunk =
477  read_start_of_chunk(&tagpos, tagend, new_first_did,
478  &new_is_last_chunk);
479 
480  string chunk_data(tagpos, tagend);
481 
482  // First remove the renamed tag
483  table->del(cursor->current_key);
484 
485  // And now write it as the first chunk
486  string tag;
487  tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
488  tag += make_start_of_chunk(new_is_last_chunk,
489  new_first_did,
490  new_last_did_in_chunk);
491  tag += chunk_data;
492  table->add(orig_key, tag);
493  return;
494  }
495 
496  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
497  /* This isn't the first chunk. Check whether we're the last chunk. */
498 
499  // Delete this chunk
500  table->del(orig_key);
501 
502  if (is_last_chunk) {
503  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
504  // Update the previous chunk's is_last_chunk flag.
505  AutoPtr<ChertCursor> cursor(table->cursor_get());
506 
507  /* Should not find the key we just deleted, but should
508  * find the previous chunk. */
509  if (cursor->find_entry(orig_key)) {
510  throw Xapian::DatabaseCorruptError("Chert key not deleted as we expected");
511  }
512  // Make sure this is a chunk with the right term attached.
513  const char * keypos = cursor->current_key.data();
514  const char * keyend = keypos + cursor->current_key.size();
515  if (!check_tname_in_key(&keypos, keyend, tname)) {
516  throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
517  }
518 
519  bool is_prev_first_chunk = (keypos == keyend);
520 
521  // Now update the last_chunk
522  cursor->read_tag();
523  string tag = cursor->current_tag;
524 
525  const char *tagpos = tag.data();
526  const char *tagend = tagpos + tag.size();
527 
528  // Skip first chunk header
529  Xapian::docid first_did_in_chunk;
530  if (is_prev_first_chunk) {
531  first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
532  0, 0);
533  } else {
534  if (!C_unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
535  report_read_error(keypos);
536  }
537  bool wrong_is_last_chunk;
538  string::size_type start_of_chunk_header = tagpos - tag.data();
539  Xapian::docid last_did_in_chunk =
540  read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
541  &wrong_is_last_chunk);
542  string::size_type end_of_chunk_header = tagpos - tag.data();
543 
544  // write new is_last flag
546  start_of_chunk_header,
547  end_of_chunk_header,
548  true, // is_last_chunk
549  first_did_in_chunk,
550  last_did_in_chunk);
551  table->add(cursor->current_key, tag);
552  }
553  } else {
554  LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
555  /* The chunk still has some items in it. Two major subcases:
556  * a) This is the first chunk.
557  * b) This isn't the first chunk.
558  *
559  * The subcases just affect the chunk header.
560  */
561  string tag;
562 
563  /* First write the header, which depends on whether this is the
564  * first chunk.
565  */
566  if (is_first_chunk) {
567  /* The first chunk. This is the relatively easy case,
568  * and we just have to write this one back to disk.
569  */
570  LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
571  string key = ChertPostListTable::make_key(tname);
572  bool ok = table->get_exact_entry(key, tag);
573  (void)ok;
574  Assert(ok);
575  Assert(!tag.empty());
576 
577  Xapian::doccount num_ent;
578  Xapian::termcount coll_freq;
579  {
580  const char * tagpos = tag.data();
581  const char * tagend = tagpos + tag.size();
582  (void)read_start_of_first_chunk(&tagpos, tagend,
583  &num_ent, &coll_freq);
584  }
585 
586  tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
587 
589  tag += chunk;
590  table->add(key, tag);
591  return;
592  }
593 
594  LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
595  /* Not the first chunk.
596  *
597  * This has the easy sub-sub-case:
598  * The first entry in the chunk hasn't changed
599  * ...and the hard sub-sub-case:
600  * The first entry in the chunk has changed. This is
601  * harder because the key for the chunk changes, so
602  * we've got to do a switch.
603  */
604 
605  // First find out the initial docid
606  const char *keypos = orig_key.data();
607  const char *keyend = keypos + orig_key.size();
608  if (!check_tname_in_key(&keypos, keyend, tname)) {
609  throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
610  }
611  Xapian::docid initial_did;
612  if (!C_unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
613  report_read_error(keypos);
614  }
615  string new_key;
616  if (initial_did != first_did) {
617  /* The fiddlier case:
618  * Create a new tag with the correct key, and replace
619  * the old one.
620  */
622  table->del(orig_key);
623  } else {
624  new_key = orig_key;
625  }
626 
627  // ...and write the start of this chunk.
629 
630  tag += chunk;
631  table->add(new_key, tag);
632  }
633 }
634 
639 void ChertPostList::read_number_of_entries(const char ** posptr,
640  const char * end,
641  Xapian::doccount * number_of_entries_ptr,
642  Xapian::termcount * collection_freq_ptr)
643 {
644  if (!unpack_uint(posptr, end, number_of_entries_ptr))
645  report_read_error(*posptr);
646  if (!unpack_uint(posptr, end, collection_freq_ptr))
647  report_read_error(*posptr);
648 }
649 
670  const string & term_,
671  bool keep_reference)
672  : LeafPostList(term_),
673  this_db(keep_reference ? this_db_ : NULL),
674  have_started(false),
675  is_at_end(false),
676  cursor(this_db_->postlist_table.cursor_get())
677 {
678  LOGCALL_CTOR(DB, "ChertPostList", this_db_.get() | term_ | keep_reference);
679  string key = ChertPostListTable::make_key(term);
680  int found = cursor->find_entry(key);
681  if (!found) {
682  LOGLINE(DB, "postlist for term not found");
683  number_of_entries = 0;
684  is_at_end = true;
685  pos = 0;
686  end = 0;
687  first_did_in_chunk = 0;
688  last_did_in_chunk = 0;
689  wdf_upper_bound = 0;
690  return;
691  }
692  cursor->read_tag();
693  pos = cursor->current_tag.data();
694  end = pos + cursor->current_tag.size();
695 
696  Xapian::termcount collfreq;
700  &is_last_chunk);
701  read_wdf(&pos, end, &wdf);
702  // This works even if there's only one entry (when wdf == collfreq)
703  // or when collfreq is 0 (=> wdf is 0 too).
704  wdf_upper_bound = max(collfreq - wdf, wdf);
705  LOGLINE(DB, "Initial docid " << did);
706 }
707 
709 {
710  LOGCALL_DTOR(DB, "ChertPostList");
711 }
712 
715 {
716  LOGCALL(DB, Xapian::termcount, "ChertPostList::get_doclength", NO_ARGS);
718  Assert(!is_at_end);
719  Assert(this_db.get());
721 }
722 
725 {
726  LOGCALL(DB, Xapian::termcount, "ChertPostList::get_unique_terms", NO_ARGS);
728  Assert(!is_at_end);
729  Assert(this_db.get());
731 }
732 
733 bool
735 {
736  LOGCALL(DB, bool, "ChertPostList::next_in_chunk", NO_ARGS);
737  if (pos == end) RETURN(false);
738 
740  read_wdf(&pos, end, &wdf);
741 
742  // Either not at last doc in chunk, or pos == end, but not both.
745  Assert(pos != end || did == last_did_in_chunk);
746 
747  RETURN(true);
748 }
749 
750 void
752 {
753  LOGCALL_VOID(DB, "ChertPostList::next_chunk", NO_ARGS);
754  if (is_last_chunk) {
755  is_at_end = true;
756  return;
757  }
758 
759  cursor->next();
760  if (cursor->after_end()) {
761  is_at_end = true;
762  throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
763  term + "'");
764  }
765  const char * keypos = cursor->current_key.data();
766  const char * keyend = keypos + cursor->current_key.size();
767  // Check we're still in same postlist
768  if (!check_tname_in_key_lite(&keypos, keyend, term)) {
769  is_at_end = true;
770  throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
771  term + "'");
772  }
773 
774  Xapian::docid newdid;
775  if (!C_unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
776  report_read_error(keypos);
777  }
778  if (newdid <= did) {
779  throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
780  str(newdid) +
781  ") is not greater than final document ID in previous chunk (" +
782  str(did) + ")");
783  }
784  did = newdid;
785 
786  cursor->read_tag();
787  pos = cursor->current_tag.data();
788  end = pos + cursor->current_tag.size();
789 
792  &is_last_chunk);
793  read_wdf(&pos, end, &wdf);
794 }
795 
796 PositionList *
798 {
799  LOGCALL(DB, PositionList *, "ChertPostList::read_position_list", NO_ARGS);
800  Assert(this_db.get());
803 }
804 
805 PositionList *
807 {
808  LOGCALL(DB, PositionList *, "ChertPostList::open_position_list", NO_ARGS);
809  Assert(this_db.get());
811 }
812 
813 PostList *
814 ChertPostList::next(double w_min)
815 {
816  LOGCALL(DB, PostList *, "ChertPostList::next", w_min);
817  (void)w_min; // no warning
818 
819  if (!have_started) {
820  have_started = true;
821  } else {
822  if (!next_in_chunk()) next_chunk();
823  }
824 
825  if (is_at_end) {
826  LOGLINE(DB, "Moved to end");
827  } else {
828  LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
829  }
830 
831  RETURN(NULL);
832 }
833 
834 bool
836 {
837  LOGCALL(DB, bool, "ChertPostList::current_chunk_contains", desired_did);
838  if (desired_did >= first_did_in_chunk &&
839  desired_did <= last_did_in_chunk) {
840  RETURN(true);
841  }
842  RETURN(false);
843 }
844 
845 void
847 {
848  LOGCALL_VOID(DB, "ChertPostList::move_to_chunk_containing", desired_did);
849  (void)cursor->find_entry(ChertPostListTable::make_key(term, desired_did));
850  Assert(!cursor->after_end());
851 
852  const char * keypos = cursor->current_key.data();
853  const char * keyend = keypos + cursor->current_key.size();
854  // Check we're still in same postlist
855  if (!check_tname_in_key_lite(&keypos, keyend, term)) {
856  // This should only happen if the postlist doesn't exist at all.
857  is_at_end = true;
858  is_last_chunk = true;
859  return;
860  }
861  is_at_end = false;
862 
863  cursor->read_tag();
864  pos = cursor->current_tag.data();
865  end = pos + cursor->current_tag.size();
866 
867  if (keypos == keyend) {
868  // In first chunk
869 #ifdef XAPIAN_ASSERTIONS
870  Xapian::doccount old_number_of_entries = number_of_entries;
872  Assert(old_number_of_entries == number_of_entries);
873 #else
874  did = read_start_of_first_chunk(&pos, end, NULL, NULL);
875 #endif
876  } else {
877  // In normal chunk
878  if (!C_unpack_uint_preserving_sort(&keypos, keyend, &did)) {
879  report_read_error(keypos);
880  }
881  }
882 
885  &is_last_chunk);
886  read_wdf(&pos, end, &wdf);
887 
888  // Possible, since desired_did might be after end of this chunk and before
889  // the next.
890  if (desired_did > last_did_in_chunk) next_chunk();
891 }
892 
893 bool
895 {
896  LOGCALL(DB, bool, "ChertPostList::move_forward_in_chunk_to_at_least", desired_did);
897  if (did >= desired_did)
898  RETURN(true);
899 
900  if (desired_did <= last_did_in_chunk) {
901  while (pos != end) {
903  if (did >= desired_did) {
904  read_wdf(&pos, end, &wdf);
905  RETURN(true);
906  }
907  // It's faster to just skip over the wdf than to decode it.
908  read_wdf(&pos, end, NULL);
909  }
910 
911  // If we hit the end of the chunk then last_did_in_chunk must be wrong.
912  Assert(false);
913  }
914 
915  pos = end;
916  RETURN(false);
917 }
918 
919 PostList *
920 ChertPostList::skip_to(Xapian::docid desired_did, double w_min)
921 {
922  LOGCALL(DB, PostList *, "ChertPostList::skip_to", desired_did | w_min);
923  (void)w_min; // no warning
924  // We've started now - if we hadn't already, we're already positioned
925  // at start so there's no need to actually do anything.
926  have_started = true;
927 
928  // Don't skip back, and don't need to do anything if already there.
929  if (is_at_end || desired_did <= did) RETURN(NULL);
930 
931  // Move to correct chunk
932  if (!current_chunk_contains(desired_did)) {
933  move_to_chunk_containing(desired_did);
934  // Might be at_end now, so we need to check before trying to move
935  // forward in chunk.
936  if (is_at_end) RETURN(NULL);
937  }
938 
939  // Move to correct position in chunk
940  bool have_document = move_forward_in_chunk_to_at_least(desired_did);
941  (void)have_document;
942  Assert(have_document);
943 
944  if (is_at_end) {
945  LOGLINE(DB, "Skipped to end");
946  } else {
947  LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
948  }
949 
950  RETURN(NULL);
951 }
952 
953 // Used for doclens.
954 bool
956 {
957  LOGCALL(DB, bool, "ChertPostList::jump_to", desired_did);
958  // We've started now - if we hadn't already, we're already positioned
959  // at start so there's no need to actually do anything.
960  have_started = true;
961 
962  // If the list is empty, give up right away.
963  if (pos == 0) RETURN(false);
964 
965  // Move to correct chunk, or reload the current chunk to go backwards in it
966  // (FIXME: perhaps handle the latter case more elegantly, though it won't
967  // happen during sequential access which is most common).
968  if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
969  // Clear is_at_end flag since we can rewind.
970  is_at_end = false;
971 
972  move_to_chunk_containing(desired_did);
973  // Might be at_end now, so we need to check before trying to move
974  // forward in chunk.
975  if (is_at_end) RETURN(false);
976  }
977 
978  // Move to correct position in chunk.
979  if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
980  RETURN(desired_did == did);
981 }
982 
983 string
985 {
986  return term + ":" + str(number_of_entries);
987 }
988 
989 // Returns the last did to allow in this chunk.
991 ChertPostListTable::get_chunk(const string &tname,
992  Xapian::docid did, bool adding,
994 {
995  LOGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname | did | adding | from | to);
996  // Get chunk containing entry
997  string key = make_key(tname, did);
998 
999  // Find the right chunk
1000  AutoPtr<ChertCursor> cursor(cursor_get());
1001 
1002  (void)cursor->find_entry(key);
1003  Assert(!cursor->after_end());
1004 
1005  const char * keypos = cursor->current_key.data();
1006  const char * keyend = keypos + cursor->current_key.size();
1007 
1008  if (!check_tname_in_key(&keypos, keyend, tname)) {
1009  // Postlist for this termname doesn't exist.
1010  if (!adding)
1011  throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
1012 
1013  *from = NULL;
1014  *to = new PostlistChunkWriter(string(), true, tname, true);
1015  RETURN(Xapian::docid(-1));
1016  }
1017 
1018  // See if we're appending - if so we can shortcut by just copying
1019  // the data part of the chunk wholesale.
1020  bool is_first_chunk = (keypos == keyend);
1021  LOGVALUE(DB, is_first_chunk);
1022 
1023  cursor->read_tag();
1024  const char * pos = cursor->current_tag.data();
1025  const char * end = pos + cursor->current_tag.size();
1027  if (is_first_chunk) {
1028  first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
1029  } else {
1030  if (!C_unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
1031  report_read_error(keypos);
1032  }
1033  }
1034 
1035  bool is_last_chunk;
1037  last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
1038  *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
1039  is_last_chunk);
1040  if (did > last_did_in_chunk) {
1041  // This is the shortcut. Not very pretty, but I'll leave refactoring
1042  // until I've a clearer picture of everything which needs to be done.
1043  // (FIXME)
1044  *from = NULL;
1045  (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1046  string(pos, end));
1047  } else {
1048  *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
1049  }
1050  if (is_last_chunk) RETURN(Xapian::docid(-1));
1051 
1052  // Find first did of next tag.
1053  cursor->next();
1054  if (cursor->after_end()) {
1055  throw Xapian::DatabaseCorruptError("Expected another key but found none");
1056  }
1057  const char *kpos = cursor->current_key.data();
1058  const char *kend = kpos + cursor->current_key.size();
1059  if (!check_tname_in_key(&kpos, kend, tname)) {
1060  throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
1061  }
1062 
1063  // Read the new first docid
1064  Xapian::docid first_did_of_next_chunk;
1065  if (!C_unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
1066  report_read_error(kpos);
1067  }
1068  RETURN(first_did_of_next_chunk - 1);
1069 }
1070 
1071 void
1073  const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
1074  const map<Xapian::docid, Xapian::termcount> & doclens,
1075  const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
1076 {
1077  LOGCALL_VOID(DB, "ChertPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
1078 
1079  // The cursor in the doclen_pl will no longer be valid, so reset it.
1080  doclen_pl.reset(0);
1081 
1082  LOGVALUE(DB, doclens.size());
1083  if (!doclens.empty()) {
1084  // Ensure there's a first chunk.
1085  string current_key = make_key(string());
1086  if (!key_exists(current_key)) {
1087  LOGLINE(DB, "Adding dummy first chunk");
1088  string newtag = make_start_of_first_chunk(0, 0, 0);
1089  newtag += make_start_of_chunk(true, 0, 0);
1090  add(current_key, newtag);
1091  }
1092 
1093  map<Xapian::docid, Xapian::termcount>::const_iterator j;
1094  j = doclens.begin();
1095  Assert(j != doclens.end()); // This case is caught above.
1096 
1097  Xapian::docid max_did;
1098  PostlistChunkReader *from;
1099  PostlistChunkWriter *to;
1100  max_did = get_chunk(string(), j->first, true, &from, &to);
1101  LOGVALUE(DB, max_did);
1102  for ( ; j != doclens.end(); ++j) {
1103  Xapian::docid did = j->first;
1104 
1105 next_doclen_chunk:
1106  LOGLINE(DB, "Updating doclens, did=" << did);
1107  if (from) while (!from->is_at_end()) {
1108  Xapian::docid copy_did = from->get_docid();
1109  if (copy_did >= did) {
1110  if (copy_did == did) from->next();
1111  break;
1112  }
1113  to->append(this, copy_did, from->get_wdf());
1114  from->next();
1115  }
1116  if ((!from || from->is_at_end()) && did > max_did) {
1117  delete from;
1118  to->flush(this);
1119  delete to;
1120  max_did = get_chunk(string(), did, false, &from, &to);
1121  goto next_doclen_chunk;
1122  }
1123 
1124  Xapian::termcount new_doclen = j->second;
1125  if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1126  to->append(this, did, new_doclen);
1127  }
1128  }
1129 
1130  if (from) {
1131  while (!from->is_at_end()) {
1132  to->append(this, from->get_docid(), from->get_wdf());
1133  from->next();
1134  }
1135  delete from;
1136  }
1137  to->flush(this);
1138  delete to;
1139  }
1140 
1141  map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
1142  for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
1143  if (i->second.empty()) continue;
1144  string tname = i->first;
1145  {
1146  // Rewrite the first chunk of this posting list with the updated
1147  // termfreq and collfreq.
1148  map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
1149  Assert(deltas != freq_deltas.end());
1150 
1151  string current_key = make_key(tname);
1152  string tag;
1153  (void)get_exact_entry(current_key, tag);
1154 
1155  // Read start of first chunk to get termfreq and collfreq.
1156  const char *pos = tag.data();
1157  const char *end = pos + tag.size();
1158  Xapian::doccount termfreq;
1159  Xapian::termcount collfreq;
1160  Xapian::docid firstdid, lastdid;
1161  bool islast;
1162  if (pos == end) {
1163  termfreq = 0;
1164  collfreq = 0;
1165  firstdid = 0;
1166  lastdid = 0;
1167  islast = true;
1168  } else {
1169  firstdid = read_start_of_first_chunk(&pos, end,
1170  &termfreq, &collfreq);
1171  // Handle the generic start of chunk header.
1172  lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
1173  }
1174 
1175  termfreq += deltas->second.first;
1176  if (termfreq == 0) {
1177  // All postings deleted! So we can shortcut by zapping the
1178  // posting list.
1179  if (islast) {
1180  // Only one entry for this posting list.
1181  del(current_key);
1182  continue;
1183  }
1184  MutableChertCursor cursor(this);
1185  bool found = cursor.find_entry(current_key);
1186  Assert(found);
1187  if (!found) continue; // Reduce damage!
1188  while (cursor.del()) {
1189  const char *kpos = cursor.current_key.data();
1190  const char *kend = kpos + cursor.current_key.size();
1191  if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
1192  }
1193  continue;
1194  }
1195  collfreq += deltas->second.second;
1196 
1197  // Rewrite start of first chunk to update termfreq and collfreq.
1198  string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
1199  newhdr += make_start_of_chunk(islast, firstdid, lastdid);
1200  if (pos == end) {
1201  add(current_key, newhdr);
1202  } else {
1203  Assert((size_t)(pos - tag.data()) <= tag.size());
1204  tag.replace(0, pos - tag.data(), newhdr);
1205  add(current_key, tag);
1206  }
1207  }
1208  map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
1209  j = i->second.begin();
1210  Assert(j != i->second.end()); // This case is caught above.
1211 
1212  Xapian::docid max_did;
1213  PostlistChunkReader *from;
1214  PostlistChunkWriter *to;
1215  max_did = get_chunk(tname, j->first, j->second.first == 'A',
1216  &from, &to);
1217  for ( ; j != i->second.end(); ++j) {
1218  Xapian::docid did = j->first;
1219 
1220 next_chunk:
1221  LOGLINE(DB, "Updating tname=" << tname << ", did=" << did);
1222  if (from) while (!from->is_at_end()) {
1223  Xapian::docid copy_did = from->get_docid();
1224  if (copy_did >= did) {
1225  if (copy_did == did) {
1226  Assert(j->second.first != 'A');
1227  from->next();
1228  }
1229  break;
1230  }
1231  to->append(this, copy_did, from->get_wdf());
1232  from->next();
1233  }
1234  if ((!from || from->is_at_end()) && did > max_did) {
1235  delete from;
1236  to->flush(this);
1237  delete to;
1238  max_did = get_chunk(tname, did, false, &from, &to);
1239  goto next_chunk;
1240  }
1241 
1242  if (j->second.first != 'D') {
1243  Xapian::termcount new_wdf = j->second.second;
1244  to->append(this, did, new_wdf);
1245  }
1246  }
1247 
1248  if (from) {
1249  while (!from->is_at_end()) {
1250  to->append(this, from->get_docid(), from->get_wdf());
1251  from->next();
1252  }
1253  delete from;
1254  }
1255  to->flush(this);
1256  delete to;
1257  }
1258 }
1259 
1260 void
1262  Xapian::docid & last) const
1263 {
1264  LOGCALL(DB, Xapian::docid, "ChertPostList::get_used_docid_range", "&first, &used");
1265  AutoPtr<ChertCursor> cur(cursor_get());
1266  if (!cur->find_entry(pack_chert_postlist_key(string()))) {
1267  // Empty database.
1268  first = last = 0;
1269  return;
1270  }
1271 
1272  cur->read_tag();
1273  const char * p = cur->current_tag.data();
1274  const char * e = p + cur->current_tag.size();
1275 
1276  first = read_start_of_first_chunk(&p, e, NULL, NULL);
1277 
1278  (void)cur->find_entry(pack_glass_postlist_key(string(), CHERT_MAX_DOCID));
1279  Assert(!cur->after_end());
1280 
1281  const char * keypos = cur->current_key.data();
1282  const char * keyend = keypos + cur->current_key.size();
1283  // Check we're still in same postlist
1284  if (!check_tname_in_key_lite(&keypos, keyend, string())) {
1285  // Shouldn't happen - we already handled the empty database case above.
1286  Assert(false);
1287  first = last = 0;
1288  return;
1289  }
1290 
1291  cur->read_tag();
1292  p = cur->current_tag.data();
1293  e = p + cur->current_tag.size();
1294 
1295  Xapian::docid start_of_last_chunk;
1296  if (keypos == keyend) {
1297  start_of_last_chunk = first;
1298  first = read_start_of_first_chunk(&p, e, NULL, NULL);
1299  } else {
1300  // In normal chunk
1301  if (!C_unpack_uint_preserving_sort(&keypos, keyend,
1302  &start_of_last_chunk)) {
1303  report_read_error(keypos);
1304  }
1305  }
1306 
1307  bool dummy;
1308  last = read_start_of_chunk(&p, e, start_of_last_chunk, &dummy);
1309 }
1310 
1313 {
1314  return wdf_upper_bound;
1315 }
void pack_bool(std::string &s, bool value)
Append an encoded bool to a string.
Definition: pack.h:57
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry.
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:480
#define RETURN(A)
Definition: debuglog.h:482
#define Assert(COND)
Definition: omassert.h:122
Xapian::docid get_chunk(const string &tname, Xapian::docid did, bool adding, Chert::PostlistChunkReader **from, Chert::PostlistChunkWriter **to)
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
Define the XAPIAN_NORETURN macro.
~ChertPostList()
Destructor.
bool is_last_chunk
True if this is the last chunk.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
Abstract base class for postlists.
Definition: postlist.h:37
A position list in a chert database.
Xapian::doccount number_of_entries
The number of entries in the posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
void flush(ChertTable *table)
Flush the chunk to the buffered table.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
const char * end
Pointer to byte after end of current chunk.
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const ChertDatabase > db) const
Returns the length of document did.
Xapian::termcount get_doclength() const
Returns the length of current document.
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader.
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk.
Class managing a Btree table in a Chert database.
Definition: chert_table.h:347
static void report_read_error(const char *position)
Report an error when reading the posting list.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:479
std::string get_description() const
Get a description of the document.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
Convert types to std::string.
Xapian::docid get_docid() const
ChertPositionListTable position_table
Table storing position lists.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:38
void get_freqs(const std::string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
std::string term
The term name for this postlist (empty for an alldocs postlist).
Definition: leafpostlist.h:51
#define false
Definition: header.h:9
Xapian::termcount wdf
The wdf of the current document.
Xapian::termcount get_wdf_upper_bound() const
void next_chunk()
Move to the next chunk.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk.
ChertPostList(const ChertPostList &)
Copying is not allowed.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Virtual methods of Database::Internal.
bool del(const std::string &key)
Delete an entry from the table.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
const char * dummy[]
Definition: version_h.cc:7
RangeError indicates an attempt to access outside the bounds of a container.
Definition: error.h:971
bool have_started
Whether we&#39;ve started reading the list yet.
void append(ChertTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk.
bool C_unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode an "sort preserved" unsigned integer from a string.
Definition: pack.h:185
#define LOGVALUE(a, b)
Definition: debuglog.h:484
Interface to Btree cursors.
AutoPtr< ChertCursor > cursor
Cursor pointing to current chunk of postlist.
bool next(Cursor *C_, int j) const
Definition: chert_table.h:821
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry.
bool read_data(const ChertTable *table, Xapian::docid did, const string &tname)
Fill list with data, and move the position to the start.
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
bool is_at_end
Whether we&#39;ve run off the end of the list yet.
bool empty() const
Return true if there are no entries in the table.
Definition: chert_table.h:628
#define CHERT_MAX_DOCID
The largest docid value supported by chert.
Definition: chert_types.h:60
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk.
void merge_changes(const map< string, map< Xapian::docid, pair< char, Xapian::termcount > > > &mod_plists, const map< Xapian::docid, Xapian::termcount > &doclens, const map< string, pair< Xapian::termcount_diff, Xapian::termcount_diff > > &freq_deltas)
Merge added, removed, and changed entries.
PositionList * read_position_list()
Get the list of positions of the term in the current document.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:562
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const ChertDatabase > db) const
Check if document did exists.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:478
AutoPtr< ChertPostList > doclen_pl
PostList for looking up document lengths.
C++ class definition for chert database.
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
static bool check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
std::string pack_chert_postlist_key(const std::string &term)
Definition: pack.h:585
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
Definition: chert_table.cc:978
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
Definition: pack.h:69
string current_key
Current key pointed to by cursor.
Definition: chert_cursor.h:149
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk.
Xapian::Internal::intrusive_ptr< const ChertDatabase > this_db
The database we are searching.
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal.
Postlists in chert databases.
std::string pack_glass_postlist_key(const std::string &term)
Definition: pack.h:613
const unsigned int CHUNKSIZE
Xapian::docid did
Document id we&#39;re currently at.
Pack types into strings and unpack them again.
static bool get_tname_from_key(const char **src, const char *end, string &tname)
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
ChertPositionList positionlist
The position list object for this posting list.
A postlist in a chert database.
Xapian::termcount get_wdf() const
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
#define LOGLINE(a, b)
Definition: debuglog.h:483
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
bool next_in_chunk()
Move to the next item in the chunk, if possible.
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
string make_key(Xapian::docid did)
Definition: chert_record.cc:35
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
const char * pos
Position of iteration through current chunk.
void next()
Advance to the next entry.