xapian-core  2.0.0
glass_postlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002-2024 Olly Betts
6  * Copyright 2007,2008,2009 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see
20  * <https://www.gnu.org/licenses/>.
21  */
22 
23 #include <config.h>
24 
25 #include "glass_postlist.h"
26 
27 #include "glass_cursor.h"
28 #include "glass_database.h"
29 #include "debuglog.h"
30 #include "pack.h"
31 #include "str.h"
33 
35 using namespace std;
36 
37 // Static functions
38 
40 [[noreturn]]
41 static void report_read_error(const char * position)
42 {
43  if (position == 0) {
44  // data ran out
45  LOGLINE(DB, "GlassPostList data ran out");
46  throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when "
47  "reading posting list");
48  }
49  // overflow
50  LOGLINE(DB, "GlassPostList value too large");
51  throw Xapian::RangeError("Value in posting list too large");
52 }
53 
54 static inline bool
55 get_tname_from_key(const char **src, const char *end, string &tname)
56 {
57  return unpack_string_preserving_sort(src, end, tname);
58 }
59 
60 static inline bool
61 check_tname_in_key_lite(const char** keypos,
62  const char* keyend,
63  string_view tname)
64 {
65  string tname_in_key;
66 
67  if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
68  *keypos += 2;
69  } else {
70  // Read the termname.
71  if (!get_tname_from_key(keypos, keyend, tname_in_key))
72  report_read_error(*keypos);
73  }
74 
75  // This should only fail if the postlist doesn't exist at all.
76  return tname_in_key == tname;
77 }
78 
79 static inline bool
80 check_tname_in_key(const char** keypos, const char* keyend, string_view tname)
81 {
82  if (*keypos == keyend) return false;
83 
84  return check_tname_in_key_lite(keypos, keyend, tname);
85 }
86 
88 static Xapian::docid
89 read_start_of_first_chunk(const char** posptr,
90  const char* end,
91  Xapian::doccount* termfreq_ptr,
92  Xapian::termcount* collection_freq_ptr)
93 {
94  LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void*)posptr | (const void*)end | (void*)termfreq_ptr | (void*)collection_freq_ptr);
95 
96  GlassPostList::read_freqs(posptr, end, termfreq_ptr, collection_freq_ptr);
97  if (termfreq_ptr)
98  LOGVALUE(DB, *termfreq_ptr);
99  if (collection_freq_ptr)
100  LOGVALUE(DB, *collection_freq_ptr);
101 
102  Xapian::docid did;
103  // Read the docid of the first entry in the posting list.
104  if (!unpack_uint(posptr, end, &did))
105  report_read_error(*posptr);
106  ++did;
107  LOGVALUE(DB, did);
108  RETURN(did);
109 }
110 
111 static inline void
112 read_did_increase(const char ** posptr, const char * end,
113  Xapian::docid * did_ptr)
114 {
115  Xapian::docid did_increase;
116  if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
117  *did_ptr += did_increase + 1;
118 }
119 
121 static inline void
122 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
123 {
124  if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
125 }
126 
128 static Xapian::docid
129 read_start_of_chunk(const char ** posptr,
130  const char * end,
131  Xapian::docid first_did_in_chunk,
132  bool * is_last_chunk_ptr)
133 {
134  LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
135  Assert(is_last_chunk_ptr);
136 
137  // Read whether this is the last chunk
138  if (!unpack_bool(posptr, end, is_last_chunk_ptr))
139  report_read_error(*posptr);
140  LOGVALUE(DB, *is_last_chunk_ptr);
141 
142  // Read what the final document ID in this chunk is.
143  Xapian::docid increase_to_last;
144  if (!unpack_uint(posptr, end, &increase_to_last))
145  report_read_error(*posptr);
146  Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
147  LOGVALUE(DB, last_did_in_chunk);
148  RETURN(last_did_in_chunk);
149 }
150 
151 void
153  Xapian::doccount* termfreq_ptr,
154  Xapian::termcount* collfreq_ptr,
155  Xapian::termcount* wdfub_ptr) const
156 {
157  string key = make_key(term);
158  string tag;
159  if (!get_exact_entry(key, tag)) {
160  if (termfreq_ptr)
161  *termfreq_ptr = 0;
162  if (collfreq_ptr)
163  *collfreq_ptr = 0;
164  if (wdfub_ptr)
165  *wdfub_ptr = 0;
166  } else {
167  const char * p = tag.data();
168  const char * e = p + tag.size();
169  Xapian::doccount tf;
171  GlassPostList::read_freqs(&p, e, &tf, &cf);
172  if (termfreq_ptr)
173  *termfreq_ptr = tf;
174  if (collfreq_ptr)
175  *collfreq_ptr = cf;
176  if (wdfub_ptr) {
177  if (cf == 0 || tf == 1) {
178  *wdfub_ptr = cf;
179  } else {
180  Xapian::docid did;
181  if (!unpack_uint(&p, e, &did))
183  bool is_last;
184  (void)read_start_of_chunk(&p, e, did + 1, &is_last);
185  (void)is_last;
186  Xapian::termcount first_wdf;
187  if (!unpack_uint(&p, e, &first_wdf))
189  *wdfub_ptr = max(cf - first_wdf, first_wdf);
190  }
191  }
192  }
193 }
194 
198  if (!doclen_pl) {
199  // Don't keep a reference back to the database, since this
200  // would make a reference loop.
201  doclen_pl.reset(new GlassPostList(db, {}, false));
202  }
203  if (!doclen_pl->jump_to(did))
204  throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
205  return doclen_pl->get_wdf();
206 }
207 
208 bool
211 {
212  if (!doclen_pl) {
213  // Don't keep a reference back to the database, since this
214  // would make a reference loop.
215  doclen_pl.reset(new GlassPostList(db, {}, false));
216  }
217  return (doclen_pl->jump_to(did));
218 }
219 
220 // How big should chunks in the posting list be? (They
221 // will grow slightly bigger than this, but not more than a
222 // few bytes extra) - FIXME: tune this value to try to
223 // maximise how well blocks are used. Or performance.
224 // Or indexing speed. Or something...
225 const unsigned int CHUNKSIZE = 2000;
226 
234  public:
235  PostlistChunkWriter(string_view orig_key_,
236  bool is_first_chunk_,
237  string_view tname_,
238  bool is_last_chunk_);
239 
241  void append(GlassTable * table, Xapian::docid did,
242  Xapian::termcount wdf);
243 
245  void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
246  const string & s) {
247  Assert(!started);
248  first_did = first_did_;
249  current_did = current_did_;
250  if (!s.empty()) {
251  chunk.append(s);
252  started = true;
253  }
254  }
255 
260  void flush(GlassTable *table);
261 
262  private:
263  string orig_key;
264  string tname;
267  bool started;
268 
271 
272  string chunk;
273 };
274 
276 
282  string data;
283 
284  const char *pos;
285  const char *end;
286 
287  bool at_end;
288 
291 
292  public:
298  PostlistChunkReader(Xapian::docid first_did, const string & data_)
299  : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
300  {
301  if (!at_end) read_wdf(&pos, end, &wdf);
302  }
303 
305  return did;
306  }
308  return wdf;
309  }
310 
311  bool is_at_end() const {
312  return at_end;
313  }
314 
317  void next();
318 };
319 
321 
322 void
323 PostlistChunkReader::next()
324 {
325  if (pos == end) {
326  at_end = true;
327  } else {
328  read_did_increase(&pos, end, &did);
329  read_wdf(&pos, end, &wdf);
330  }
331 }
332 
333 PostlistChunkWriter::PostlistChunkWriter(string_view orig_key_,
334  bool is_first_chunk_,
335  string_view tname_,
336  bool is_last_chunk_)
337  : orig_key(orig_key_),
338  tname(tname_), is_first_chunk(is_first_chunk_),
339  is_last_chunk(is_last_chunk_),
340  started(false)
341 {
342  LOGCALL_CTOR(DB, "PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
343 }
344 
345 void
347  Xapian::termcount wdf)
348 {
349  if (!started) {
350  started = true;
351  first_did = did;
352  } else {
353  Assert(did > current_did);
354  // Start a new chunk if this one has grown to the threshold.
355  if (chunk.size() >= CHUNKSIZE) {
356  bool save_is_last_chunk = is_last_chunk;
357  is_last_chunk = false;
358  flush(table);
359  is_last_chunk = save_is_last_chunk;
360  is_first_chunk = false;
361  first_did = did;
362  chunk.resize(0);
364  } else {
365  pack_uint(chunk, did - current_did - 1);
366  }
367  }
368  current_did = did;
369  pack_uint(chunk, wdf);
370 }
371 
374 static inline string
376  Xapian::termcount collectionfreq,
377  Xapian::docid new_did)
378 {
379  string chunk;
380  pack_uint(chunk, entries);
381  pack_uint(chunk, collectionfreq);
382  pack_uint(chunk, new_did - 1);
383  return chunk;
384 }
385 
388 static inline string
389 make_start_of_chunk(bool new_is_last_chunk,
390  Xapian::docid new_first_did,
391  Xapian::docid new_final_did)
392 {
393  Assert(new_final_did >= new_first_did);
394  string chunk;
395  pack_bool(chunk, new_is_last_chunk);
396  pack_uint(chunk, new_final_did - new_first_did);
397  return chunk;
398 }
399 
400 static void
401 write_start_of_chunk(string & chunk,
402  unsigned int start_of_chunk_header,
403  unsigned int end_of_chunk_header,
404  bool is_last_chunk,
405  Xapian::docid first_did_in_chunk,
406  Xapian::docid last_did_in_chunk)
407 {
408  Assert(size_t(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
409 
410  chunk.replace(start_of_chunk_header,
411  end_of_chunk_header - start_of_chunk_header,
412  make_start_of_chunk(is_last_chunk, first_did_in_chunk,
413  last_did_in_chunk));
414 }
415 
416 void
418 {
419  LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
420 
421  /* This is one of the more messy parts involved with updating posting
422  * list chunks.
423  *
424  * Depending on circumstances, we may have to delete an entire chunk
425  * or file it under a different key, as well as possibly modifying both
426  * the previous and next chunk of the postlist.
427  */
428 
429  if (!started) {
430  /* This chunk is now empty so disappears entirely.
431  *
432  * If this was the last chunk, then the previous chunk
433  * must have its "is_last_chunk" flag updated.
434  *
435  * If this was the first chunk, then the next chunk must
436  * be transformed into the first chunk. Messy!
437  */
438  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
439  Assert(!orig_key.empty());
440  if (is_first_chunk) {
441  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
442  if (is_last_chunk) {
443  /* This is the first and the last chunk, ie the only
444  * chunk, so just delete the tag.
445  */
446  table->del(orig_key);
447  return;
448  }
449 
450  /* This is the messiest case. The first chunk is to
451  * be removed, and there is at least one chunk after
452  * it. Need to rewrite the next chunk as the first
453  * chunk.
454  */
455  unique_ptr<GlassCursor> cursor(table->cursor_get());
456 
457  if (!cursor->find_entry(orig_key)) {
458  throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
459  }
460 
461  // FIXME: Currently the doclen list has a special first chunk too,
462  // which reduces special casing here. The downside is a slightly
463  // larger than necessary first chunk and needless fiddling if the
464  // first chunk is deleted. But really we should look at
465  // redesigning the whole postlist format with an eye to making it
466  // easier to update!
467 
468  // Extract existing counts from the first chunk so we can reinsert
469  // them into the block we're renaming.
470  Xapian::doccount num_ent;
471  Xapian::termcount coll_freq;
472  {
473  cursor->read_tag();
474  const char *tagpos = cursor->current_tag.data();
475  const char *tagend = tagpos + cursor->current_tag.size();
476 
477  (void)read_start_of_first_chunk(&tagpos, tagend,
478  &num_ent, &coll_freq);
479  }
480 
481  // Seek to the next chunk.
482  if (!cursor->next()) {
483  throw Xapian::DatabaseCorruptError("Expected another key but found none");
484  }
485  const char *kpos = cursor->current_key.data();
486  const char *kend = kpos + cursor->current_key.size();
487  if (!check_tname_in_key(&kpos, kend, tname)) {
488  throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
489  }
490 
491  // Read the new first docid
492  Xapian::docid new_first_did;
493  if (!unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
494  report_read_error(kpos);
495  }
496 
497  cursor->read_tag();
498  const char *tagpos = cursor->current_tag.data();
499  const char *tagend = tagpos + cursor->current_tag.size();
500 
501  // Read the chunk header
502  bool new_is_last_chunk;
503  Xapian::docid new_last_did_in_chunk =
504  read_start_of_chunk(&tagpos, tagend, new_first_did,
505  &new_is_last_chunk);
506 
507  string chunk_data(tagpos, tagend);
508 
509  // First remove the renamed tag
510  table->del(cursor->current_key);
511 
512  // And now write it as the first chunk
513  string tag;
514  tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
515  tag += make_start_of_chunk(new_is_last_chunk,
516  new_first_did,
517  new_last_did_in_chunk);
518  tag += chunk_data;
519  table->add(orig_key, tag);
520  return;
521  }
522 
523  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
524  /* This isn't the first chunk. Check whether we're the last chunk. */
525 
526  // Delete this chunk
527  table->del(orig_key);
528 
529  if (is_last_chunk) {
530  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
531  // Update the previous chunk's is_last_chunk flag.
532  unique_ptr<GlassCursor> cursor(table->cursor_get());
533 
534  /* Should not find the key we just deleted, but should
535  * find the previous chunk. */
536  if (cursor->find_entry(orig_key)) {
537  throw Xapian::DatabaseCorruptError("Glass key not deleted as we expected");
538  }
539  // Make sure this is a chunk with the right term attached.
540  const char * keypos = cursor->current_key.data();
541  const char * keyend = keypos + cursor->current_key.size();
542  if (!check_tname_in_key(&keypos, keyend, tname)) {
543  throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
544  }
545 
546  bool is_prev_first_chunk = (keypos == keyend);
547 
548  // Now update the last_chunk
549  cursor->read_tag();
550  string tag = cursor->current_tag;
551 
552  const char *tagpos = tag.data();
553  const char *tagend = tagpos + tag.size();
554 
555  // Skip first chunk header
556  Xapian::docid first_did_in_chunk;
557  if (is_prev_first_chunk) {
558  first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
559  0, 0);
560  } else {
561  if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
562  report_read_error(keypos);
563  }
564  bool wrong_is_last_chunk;
565  string::size_type start_of_chunk_header = tagpos - tag.data();
566  Xapian::docid last_did_in_chunk =
567  read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
568  &wrong_is_last_chunk);
569  string::size_type end_of_chunk_header = tagpos - tag.data();
570 
571  // write new is_last flag
573  start_of_chunk_header,
574  end_of_chunk_header,
575  true, // is_last_chunk
576  first_did_in_chunk,
577  last_did_in_chunk);
578  table->add(cursor->current_key, tag);
579  }
580  } else {
581  LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
582  /* The chunk still has some items in it. Two major subcases:
583  * a) This is the first chunk.
584  * b) This isn't the first chunk.
585  *
586  * The subcases just affect the chunk header.
587  */
588  string tag;
589 
590  /* First write the header, which depends on whether this is the
591  * first chunk.
592  */
593  if (is_first_chunk) {
594  /* The first chunk. This is the relatively easy case,
595  * and we just have to write this one back to disk.
596  */
597  LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
598  string key = GlassPostListTable::make_key(tname);
599  bool ok = table->get_exact_entry(key, tag);
600  (void)ok;
601  Assert(ok);
602  Assert(!tag.empty());
603 
604  Xapian::doccount num_ent;
605  Xapian::termcount coll_freq;
606  {
607  const char * tagpos = tag.data();
608  const char * tagend = tagpos + tag.size();
609  (void)read_start_of_first_chunk(&tagpos, tagend,
610  &num_ent, &coll_freq);
611  }
612 
613  tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
614 
616  tag += chunk;
617  table->add(key, tag);
618  return;
619  }
620 
621  LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
622  /* Not the first chunk.
623  *
624  * This has the easy sub-sub-case:
625  * The first entry in the chunk hasn't changed
626  * ...and the hard sub-sub-case:
627  * The first entry in the chunk has changed. This is
628  * harder because the key for the chunk changes, so
629  * we've got to do a switch.
630  */
631 
632  // First find out the initial docid
633  const char *keypos = orig_key.data();
634  const char *keyend = keypos + orig_key.size();
635  if (!check_tname_in_key(&keypos, keyend, tname)) {
636  throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
637  }
638  Xapian::docid initial_did;
639  if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
640  report_read_error(keypos);
641  }
642  string new_key;
643  if (initial_did != first_did) {
644  /* The fiddlier case:
645  * Create a new tag with the correct key, and replace
646  * the old one.
647  */
649  table->del(orig_key);
650  } else {
651  new_key = orig_key;
652  }
653 
654  // ...and write the start of this chunk.
656 
657  tag += chunk;
658  table->add(new_key, tag);
659  }
660 }
661 
666 void GlassPostList::read_freqs(const char** posptr,
667  const char* end,
668  Xapian::doccount* termfreq_ptr,
669  Xapian::termcount* collection_freq_ptr)
670 {
671  if (!unpack_uint(posptr, end, termfreq_ptr))
672  report_read_error(*posptr);
673  if (!unpack_uint(posptr, end, collection_freq_ptr))
674  report_read_error(*posptr);
675 }
676 
697  string_view term_,
698  bool keep_reference)
699  : LeafPostList(term_),
700  this_db(keep_reference ? this_db_ : NULL),
701  have_started(false),
702  is_at_end(false),
703  cursor(this_db_->postlist_table.cursor_get())
704 {
705  LOGCALL_CTOR(DB, "GlassPostList", this_db_.get() | term_ | keep_reference);
706  init();
707 }
708 
710  string_view term_,
711  GlassCursor * cursor_)
712  : LeafPostList(term_),
713  this_db(this_db_),
714  have_started(false),
715  is_at_end(false),
716  cursor(cursor_)
717 {
718  LOGCALL_CTOR(DB, "GlassPostList", this_db_.get() | term_ | cursor_);
719  init();
720 }
721 
722 void
724 {
725  string key = GlassPostListTable::make_key(term);
726  int found = cursor->find_entry(key);
727  if (!found) {
728  LOGLINE(DB, "postlist for term not found");
729  termfreq = 0;
730  collfreq = 0;
731  is_at_end = true;
732  pos = 0;
733  end = 0;
734  first_did_in_chunk = 0;
735  last_did_in_chunk = 0;
736  wdf_upper_bound = 0;
737  return;
738  }
739  cursor->read_tag();
740  pos = cursor->current_tag.data();
741  end = pos + cursor->current_tag.size();
742 
746  &is_last_chunk);
747  read_wdf(&pos, end, &wdf);
748  // This works even if there's only one entry (when wdf == collfreq)
749  // or when collfreq is 0 (=> wdf is 0 too). However it if this is
750  // a doclen list (term.empty()) then collfreq is 0 and "wdf" is the
751  // length of the first document. We don't use wdf_upper_bound in
752  // this case so just set it to 0 and avoid unsigned overflow.
753  wdf_upper_bound = term.empty() ? 0 : max(collfreq - wdf, wdf);
754  LOGLINE(DB, "Initial docid " << did);
755 }
756 
758 {
759  LOGCALL_DTOR(DB, "GlassPostList");
760  delete positionlist;
761 }
762 
763 bool
764 GlassPostList::open_nearby_postlist(std::string_view term_,
765  bool need_read_pos,
766  LeafPostList*& pl) const
767 {
768  LOGCALL(DB, bool, "GlassPostList::open_nearby_postlist", term_ | need_read_pos | Literal("LeafPostList*&"));
769  (void)need_read_pos;
770  if (term_.empty())
771  RETURN(false);
773  RETURN(false);
774  pl = new GlassPostList(this_db, term_, cursor->clone());
775  if (pl && pl->get_termfreq() == 0) {
776  delete pl;
777  pl = nullptr;
778  }
779  RETURN(true);
780 }
781 
782 bool
784 {
785  LOGCALL(DB, bool, "GlassPostList::next_in_chunk", NO_ARGS);
786  if (pos == end) RETURN(false);
787 
789  read_wdf(&pos, end, &wdf);
790 
791  // Either not at last doc in chunk, or pos == end, but not both.
794  Assert(pos != end || did == last_did_in_chunk);
795 
796  RETURN(true);
797 }
798 
799 void
801 {
802  LOGCALL_VOID(DB, "GlassPostList::next_chunk", NO_ARGS);
803  if (is_last_chunk) {
804  is_at_end = true;
805  return;
806  }
807 
808  if (!cursor->next()) {
809  is_at_end = true;
810  throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
811  term + "'");
812  }
813  const char * keypos = cursor->current_key.data();
814  const char * keyend = keypos + cursor->current_key.size();
815  // Check we're still in same postlist
816  if (!check_tname_in_key_lite(&keypos, keyend, term)) {
817  is_at_end = true;
818  throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
819  term + "'");
820  }
821 
822  Xapian::docid newdid;
823  if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
824  report_read_error(keypos);
825  }
826  if (newdid <= did) {
827  throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
828  str(newdid) +
829  ") is not greater than final document ID in previous chunk (" +
830  str(did) + ")");
831  }
832  did = newdid;
833 
834  cursor->read_tag();
835  pos = cursor->current_tag.data();
836  end = pos + cursor->current_tag.size();
837 
840  &is_last_chunk);
841  read_wdf(&pos, end, &wdf);
842 }
843 
844 PositionList *
846 {
847  LOGCALL(DB, PositionList *, "GlassPostList::read_position_list", NO_ARGS);
848  Assert(this_db);
849  if (rare(positionlist == NULL)) {
850  // Lazily create positionlist to avoid the size cost for the common
851  // case where we don't want positional data.
853  }
856 }
857 
858 PositionList *
860 {
861  LOGCALL(DB, PositionList *, "GlassPostList::open_position_list", NO_ARGS);
862  Assert(this_db);
864 }
865 
866 PostList *
867 GlassPostList::next(double w_min)
868 {
869  LOGCALL(DB, PostList *, "GlassPostList::next", w_min);
870  (void)w_min; // no warning
871 
872  if (!have_started) {
873  have_started = true;
874  } else {
875  if (!next_in_chunk()) next_chunk();
876  }
877 
878  if (is_at_end) {
879  LOGLINE(DB, "Moved to end");
880  } else {
881  LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
882  }
883 
884  RETURN(NULL);
885 }
886 
887 bool
889 {
890  LOGCALL(DB, bool, "GlassPostList::current_chunk_contains", desired_did);
891  if (desired_did >= first_did_in_chunk &&
892  desired_did <= last_did_in_chunk) {
893  RETURN(true);
894  }
895  RETURN(false);
896 }
897 
898 void
900 {
901  LOGCALL_VOID(DB, "GlassPostList::move_to_chunk_containing", desired_did);
902  (void)cursor->find_entry(GlassPostListTable::make_key(term, desired_did));
903  Assert(!cursor->after_end());
904 
905  const char * keypos = cursor->current_key.data();
906  const char * keyend = keypos + cursor->current_key.size();
907  // Check we're still in same postlist
908  if (!check_tname_in_key_lite(&keypos, keyend, term)) {
909  // This should only happen if the postlist doesn't exist at all.
910  is_at_end = true;
911  is_last_chunk = true;
912  return;
913  }
914  is_at_end = false;
915 
916  cursor->read_tag();
917  pos = cursor->current_tag.data();
918  end = pos + cursor->current_tag.size();
919 
920  if (keypos == keyend) {
921  // In first chunk
922 #ifdef XAPIAN_ASSERTIONS
923  Xapian::doccount old_termfreq = termfreq;
925  Assert(old_termfreq == termfreq);
926 #else
927  did = read_start_of_first_chunk(&pos, end, NULL, NULL);
928 #endif
929  } else {
930  // In normal chunk
931  if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
932  report_read_error(keypos);
933  }
934  }
935 
938  &is_last_chunk);
939  read_wdf(&pos, end, &wdf);
940 
941  // Possible, since desired_did might be after end of this chunk and before
942  // the next.
943  if (desired_did > last_did_in_chunk) next_chunk();
944 }
945 
946 bool
948 {
949  LOGCALL(DB, bool, "GlassPostList::move_forward_in_chunk_to_at_least", desired_did);
950  if (did >= desired_did)
951  RETURN(true);
952 
953  if (desired_did <= last_did_in_chunk) {
954  while (pos != end) {
956  if (did >= desired_did) {
957  read_wdf(&pos, end, &wdf);
958  RETURN(true);
959  }
960  // It's faster to just skip over the wdf than to decode it.
961  read_wdf(&pos, end, NULL);
962  }
963 
964  // If we hit the end of the chunk then last_did_in_chunk must be wrong.
965  Assert(false);
966  }
967 
968  pos = end;
969  RETURN(false);
970 }
971 
972 PostList *
973 GlassPostList::skip_to(Xapian::docid desired_did, double w_min)
974 {
975  LOGCALL(DB, PostList *, "GlassPostList::skip_to", desired_did | w_min);
976  (void)w_min; // no warning
977  // We've started now - if we hadn't already, we're already positioned
978  // at start so there's no need to actually do anything.
979  have_started = true;
980 
981  // Don't skip back, and don't need to do anything if already there.
982  if (is_at_end || desired_did <= did) RETURN(NULL);
983 
984  // Move to correct chunk
985  if (!current_chunk_contains(desired_did)) {
986  move_to_chunk_containing(desired_did);
987  // Might be at_end now, so we need to check before trying to move
988  // forward in chunk.
989  if (is_at_end) RETURN(NULL);
990  }
991 
992  // Move to correct position in chunk
993  bool have_document = move_forward_in_chunk_to_at_least(desired_did);
994  (void)have_document;
995  Assert(have_document);
996 
997  if (is_at_end) {
998  LOGLINE(DB, "Skipped to end");
999  } else {
1000  LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
1001  }
1002 
1003  RETURN(NULL);
1004 }
1005 
1006 // Used for doclens.
1007 bool
1009 {
1010  LOGCALL(DB, bool, "GlassPostList::jump_to", desired_did);
1011  // We've started now - if we hadn't already, we're already positioned
1012  // at start so there's no need to actually do anything.
1013  have_started = true;
1014 
1015  // If the list is empty, give up right away.
1016  if (pos == 0) RETURN(false);
1017 
1018  // Move to correct chunk, or reload the current chunk to go backwards in it
1019  // (FIXME: perhaps handle the latter case more elegantly, though it won't
1020  // happen during sequential access which is most common).
1021  if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
1022  // Clear is_at_end flag since we can rewind.
1023  is_at_end = false;
1024 
1025  move_to_chunk_containing(desired_did);
1026  // Might be at_end now, so we need to check before trying to move
1027  // forward in chunk.
1028  if (is_at_end) RETURN(false);
1029  }
1030 
1031  // Move to correct position in chunk.
1032  if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
1033  RETURN(desired_did == did);
1034 }
1035 
1036 void
1038 {
1039  if (pos == NULL) {
1040  last = 0;
1041  } else {
1042  first = first_did_in_chunk;
1043  if (is_last_chunk) {
1044  last = last_did_in_chunk;
1045  }
1046  }
1047 }
1048 
1049 string
1051 {
1052  string desc;
1053  description_append(desc, term);
1054  desc += ":";
1055  desc += str(termfreq);
1056  return desc;
1057 }
1058 
1059 // Returns the last did to allow in this chunk.
1062  Xapian::docid did, bool adding,
1063  PostlistChunkReader ** from,
1064  PostlistChunkWriter **to)
1065 {
1066  LOGCALL(DB, Xapian::docid, "GlassPostListTable::get_chunk", tname | did | adding | from | to);
1067  // Get chunk containing entry
1068  string key = make_key(tname, did);
1069 
1070  // Find the right chunk
1071  unique_ptr<GlassCursor> cursor(cursor_get());
1072 
1073  (void)cursor->find_entry(key);
1074  Assert(!cursor->after_end());
1075 
1076  const char * keypos = cursor->current_key.data();
1077  const char * keyend = keypos + cursor->current_key.size();
1078 
1079  if (!check_tname_in_key(&keypos, keyend, tname)) {
1080  // Postlist for this termname doesn't exist.
1081  //
1082  // NB "adding" will only be true if we are adding, but it may sometimes
1083  // be false in some cases where we are actually adding.
1084  if (!adding)
1085  throw Xapian::DatabaseCorruptError("Attempted to delete or modify "
1086  "an entry in a non-existent "
1087  "posting list "
1088  "for "s.append(tname));
1089 
1090  *from = NULL;
1091  *to = new PostlistChunkWriter({}, true, tname, true);
1092  RETURN(Xapian::docid(-1));
1093  }
1094 
1095  // See if we're appending - if so we can shortcut by just copying
1096  // the data part of the chunk wholesale.
1097  bool is_first_chunk = (keypos == keyend);
1098  LOGVALUE(DB, is_first_chunk);
1099 
1100  cursor->read_tag();
1101  const char * pos = cursor->current_tag.data();
1102  const char * end = pos + cursor->current_tag.size();
1103  Xapian::docid first_did_in_chunk;
1104  if (is_first_chunk) {
1105  first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
1106  } else {
1107  if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
1108  report_read_error(keypos);
1109  }
1110  }
1111 
1112  bool is_last_chunk;
1113  Xapian::docid last_did_in_chunk;
1114  last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
1115  *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
1116  is_last_chunk);
1117  if (did > last_did_in_chunk) {
1118  // This is the shortcut. Not very pretty, but I'll leave refactoring
1119  // until I've a clearer picture of everything which needs to be done.
1120  // (FIXME)
1121  *from = NULL;
1122  (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1123  string(pos, end));
1124  } else {
1125  *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
1126  }
1127  if (is_last_chunk) RETURN(Xapian::docid(-1));
1128 
1129  // Find first did of next tag.
1130  if (!cursor->next()) {
1131  throw Xapian::DatabaseCorruptError("Expected another key but found none");
1132  }
1133  const char *kpos = cursor->current_key.data();
1134  const char *kend = kpos + cursor->current_key.size();
1135  if (!check_tname_in_key(&kpos, kend, tname)) {
1136  throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
1137  }
1138 
1139  // Read the new first docid
1140  Xapian::docid first_did_of_next_chunk;
1141  if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
1142  report_read_error(kpos);
1143  }
1144  RETURN(first_did_of_next_chunk - 1);
1145 }
1146 
1147 void
1148 GlassPostListTable::merge_doclen_changes(const map<Xapian::docid, Xapian::termcount> & doclens)
1149 {
1150  LOGCALL_VOID(DB, "GlassPostListTable::merge_doclen_changes", doclens);
1151 
1152  // The cursor in the doclen_pl will no longer be valid, so reset it.
1153  doclen_pl.reset(0);
1154 
1155  LOGVALUE(DB, doclens.size());
1156  if (doclens.empty()) return;
1157 
1158  // Ensure there's a first chunk.
1159  string current_key = make_key({});
1160  if (!key_exists(current_key)) {
1161  LOGLINE(DB, "Adding dummy first chunk");
1162  // Zero values except the "last chunk" flag is set.
1163  add(current_key, "\0\0\0\x31\0"s);
1164  }
1165 
1166  map<Xapian::docid, Xapian::termcount>::const_iterator j;
1167  j = doclens.begin();
1168  Assert(j != doclens.end()); // This case is caught above.
1169 
1170  Xapian::docid max_did;
1171  PostlistChunkReader *from;
1172  PostlistChunkWriter *to;
1173  max_did = get_chunk({}, j->first, true, &from, &to);
1174  LOGVALUE(DB, max_did);
1175  for ( ; j != doclens.end(); ++j) {
1176  Xapian::docid did = j->first;
1177 
1178 next_doclen_chunk:
1179  LOGLINE(DB, "Updating doclens, did=" << did);
1180  if (from) while (!from->is_at_end()) {
1181  Xapian::docid copy_did = from->get_docid();
1182  if (copy_did >= did) {
1183  if (copy_did == did) from->next();
1184  break;
1185  }
1186  to->append(this, copy_did, from->get_wdf());
1187  from->next();
1188  }
1189  if ((!from || from->is_at_end()) && did > max_did) {
1190  delete from;
1191  to->flush(this);
1192  delete to;
1193  max_did = get_chunk({}, did, false, &from, &to);
1194  goto next_doclen_chunk;
1195  }
1196 
1197  Xapian::termcount new_doclen = j->second;
1198  if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1199  to->append(this, did, new_doclen);
1200  }
1201  }
1202 
1203  if (from) {
1204  while (!from->is_at_end()) {
1205  to->append(this, from->get_docid(), from->get_wdf());
1206  from->next();
1207  }
1208  delete from;
1209  }
1210  to->flush(this);
1211  delete to;
1212 }
1213 
1214 void
1216  const Inverter::PostingChanges& changes)
1217 {
1218  {
1219  // Rewrite the first chunk of this posting list with the updated
1220  // termfreq and collfreq.
1221  string current_key = make_key(term);
1222  string tag;
1223  (void)get_exact_entry(current_key, tag);
1224 
1225  // Read start of first chunk to get termfreq and collfreq.
1226  const char *pos = tag.data();
1227  const char *end = pos + tag.size();
1228  Xapian::doccount termfreq;
1229  Xapian::termcount collfreq;
1230  Xapian::docid firstdid, lastdid;
1231  bool islast;
1232  if (pos == end) {
1233  termfreq = 0;
1234  collfreq = 0;
1235  // Dummy values which will get replaced later.
1236  firstdid = lastdid = 1;
1237  islast = true;
1238  } else {
1239  firstdid = read_start_of_first_chunk(&pos, end,
1240  &termfreq, &collfreq);
1241  // Handle the generic start of chunk header.
1242  lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
1243  }
1244 
1245  UNSIGNED_OVERFLOW_OK(termfreq += changes.get_tfdelta());
1246  if (termfreq == 0) {
1247  // All postings deleted! So we can shortcut by zapping the
1248  // posting list.
1249  if (islast) {
1250  // Only one entry for this posting list.
1251  del(current_key);
1252  return;
1253  }
1254  MutableGlassCursor cursor(this);
1255  bool found = cursor.find_entry(current_key);
1256  Assert(found);
1257  if (!found) return; // Reduce damage!
1258  while (cursor.del()) {
1259  const char *kpos = cursor.current_key.data();
1260  const char *kend = kpos + cursor.current_key.size();
1261  if (!check_tname_in_key_lite(&kpos, kend, term)) break;
1262  }
1263  return;
1264  }
1265  UNSIGNED_OVERFLOW_OK(collfreq += changes.get_cfdelta());
1266 
1267  // Rewrite start of first chunk to update termfreq and collfreq.
1268  string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
1269  newhdr += make_start_of_chunk(islast, firstdid, lastdid);
1270  if (pos == end) {
1271  add(current_key, newhdr);
1272  } else {
1273  Assert(size_t(pos - tag.data()) <= tag.size());
1274  tag.replace(0, pos - tag.data(), newhdr);
1275  add(current_key, tag);
1276  }
1277  }
1278  map<Xapian::docid, Xapian::termcount>::const_iterator j;
1279  j = changes.pl_changes.begin();
1280  Assert(j != changes.pl_changes.end()); // This case is caught above.
1281 
1282  Xapian::docid max_did;
1283  PostlistChunkReader *from;
1284  PostlistChunkWriter *to;
1285  max_did = get_chunk(term, j->first, false, &from, &to);
1286  for ( ; j != changes.pl_changes.end(); ++j) {
1287  Xapian::docid did = j->first;
1288 
1289 next_chunk:
1290  LOGLINE(DB, "Updating term=" << term << ", did=" << did);
1291  if (from) while (!from->is_at_end()) {
1292  Xapian::docid copy_did = from->get_docid();
1293  if (copy_did >= did) {
1294  if (copy_did == did) {
1295  from->next();
1296  }
1297  break;
1298  }
1299  to->append(this, copy_did, from->get_wdf());
1300  from->next();
1301  }
1302  if ((!from || from->is_at_end()) && did > max_did) {
1303  delete from;
1304  to->flush(this);
1305  delete to;
1306  max_did = get_chunk(term, did, false, &from, &to);
1307  goto next_chunk;
1308  }
1309 
1310  Xapian::termcount new_wdf = j->second;
1311  if (new_wdf != Xapian::termcount(-1)) {
1312  to->append(this, did, new_wdf);
1313  }
1314  }
1315 
1316  if (from) {
1317  while (!from->is_at_end()) {
1318  to->append(this, from->get_docid(), from->get_wdf());
1319  from->next();
1320  }
1321  delete from;
1322  }
1323  to->flush(this);
1324  delete to;
1325 }
1326 
1327 void
1329  Xapian::docid & last) const
1330 {
1331  LOGCALL(DB, Xapian::docid, "GlassPostListTable::get_used_docid_range", "&first, &used");
1332  unique_ptr<GlassCursor> cur(cursor_get());
1333  if (!cur->find_entry(pack_glass_postlist_key({}))) {
1334  // Empty database.
1335  first = last = 0;
1336  return;
1337  }
1338 
1339  cur->read_tag();
1340  const char * p = cur->current_tag.data();
1341  const char * e = p + cur->current_tag.size();
1342 
1343  first = read_start_of_first_chunk(&p, e, NULL, NULL);
1344 
1345  (void)cur->find_entry(pack_glass_postlist_key({}, GLASS_MAX_DOCID));
1346  Assert(!cur->after_end());
1347 
1348  const char * keypos = cur->current_key.data();
1349  const char * keyend = keypos + cur->current_key.size();
1350  // Check we're still in same postlist
1351  if (!check_tname_in_key_lite(&keypos, keyend, {})) {
1352  // Shouldn't happen - we already handled the empty database case above.
1353  Assert(false);
1354  first = last = 0;
1355  return;
1356  }
1357 
1358  cur->read_tag();
1359  p = cur->current_tag.data();
1360  e = p + cur->current_tag.size();
1361 
1362  Xapian::docid start_of_last_chunk;
1363  if (keypos == keyend) {
1364  start_of_last_chunk = first;
1365  first = read_start_of_first_chunk(&p, e, NULL, NULL);
1366  } else {
1367  // In normal chunk
1368  if (!unpack_uint_preserving_sort(&keypos, keyend,
1369  &start_of_last_chunk)) {
1370  report_read_error(keypos);
1371  }
1372  }
1373 
1374  bool dummy;
1375  last = read_start_of_chunk(&p, e, start_of_last_chunk, &dummy);
1376 }
1377 
1380 {
1381  Assert(!term.empty());
1382  return wdf_upper_bound;
1383 }
1384 
1385 #ifdef DISABLE_GPL_LIBXAPIAN
1386 # error GPL source we cannot relicense included in libxapian
1387 #endif
A cursor pointing to a position in a Btree table, for reading several entries in order,...
Definition: glass_cursor.h:148
string current_key
Current key pointed to by cursor.
Definition: glass_cursor.h:239
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
PositionList * open_position_list(Xapian::docid did, std::string_view term) const
virtual void read_position_list(GlassRePositionList *pos_list, Xapian::docid did, std::string_view term) const
GlassPositionListTable position_table
Table storing position lists.
GlassPostListTable postlist_table
Table storing posting lists.
Xapian::docid get_chunk(std::string_view tname, Xapian::docid did, bool adding, Glass::PostlistChunkReader **from, Glass::PostlistChunkWriter **to)
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Check if document did exists.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
static std::string make_key(std::string_view term, Xapian::docid did)
Compose a key from a termname and docid.
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Returns the length of document did.
void merge_changes(std::string_view term, const Inverter::PostingChanges &changes)
Merge changes for a term.
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr, Xapian::termcount *wdfub_ptr=NULL) const
Returns frequencies for a term.
void merge_doclen_changes(const std::map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
std::unique_ptr< GlassPostList > doclen_pl
PostList for looking up document lengths.
A postlist in a glass database.
bool have_started
Whether we've started reading the list yet.
Xapian::docid did
Document id we're currently at.
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
bool is_at_end
Whether we've run off the end of the list yet.
bool next_in_chunk()
Move to the next item in the chunk, if possible.
static void read_freqs(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the term frequency and collection frequency.
std::string get_description() const
Get a description of the document.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
Xapian::termcount get_wdf_upper_bound() const
bool open_nearby_postlist(std::string_view term_, bool need_read_pos, LeafPostList *&pl) const
Open another postlist from the same database.
GlassPostList(const GlassPostList &)
Copying is not allowed.
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
~GlassPostList()
Destructor.
GlassRePositionList * positionlist
The position list object for this posting list.
PositionList * read_position_list()
Get the list of positions of the term in the current document.
const char * end
Pointer to byte after end of current chunk.
const char * pos
Position of iteration through current chunk.
bool is_last_chunk
True if this is the last chunk.
void next_chunk()
Move to the next chunk.
std::unique_ptr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
Xapian::termcount wdf
The wdf of the current document.
A reusable position list in a glass database.
Class managing a Btree table in a Glass database.
Definition: glass_table.h:432
bool key_exists(std::string_view key) const
Check if a key exists in the Btree.
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
bool get_exact_entry(std::string_view key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
bool del(std::string_view key)
Delete an entry from the table.
bool is_writable() const
Return true if this table is writable.
Definition: glass_table.h:516
void add(std::string_view key, std::string_view tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
void next()
Advance to the next entry.
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader.
Xapian::termcount get_wdf() const
Xapian::docid get_docid() const
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk,...
void flush(GlassTable *table)
Flush the chunk to the buffered table.
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk.
void append(GlassTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk.
Class for storing the changes in frequencies for a term.
Xapian::termcount get_tfdelta() const
Get the term frequency delta.
Xapian::termcount get_cfdelta() const
Get the collection frequency delta.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term's postlist.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
Xapian::termcount collfreq
The collection frequency of the term.
Definition: leafpostlist.h:57
std::string term
The term name for this postlist (empty for an alldocs postlist).
Definition: leafpostlist.h:51
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
Indicates an attempt to access a document not present in the database.
Definition: error.h:662
Abstract base class for postlists.
Definition: postlist.h:40
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
Definition: postlist.h:67
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
Definition: postlist.h:52
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:83
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
RangeError indicates an attempt to access outside the bounds of a container.
Definition: error.h:959
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
#define rare(COND)
Definition: config.h:607
string term
PositionList * p
Xapian::termpos pos
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGLINE(a, b)
Definition: debuglog.h:485
#define LOGVALUE(a, b)
Definition: debuglog.h:486
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:480
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:482
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:479
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:481
Append a string to an object description, escaping invalid UTF-8.
Interface to Btree cursors.
C++ class definition for glass database.
#define GLASS_MAX_DOCID
The largest docid value supported by glass.
Definition: glass_defs.h:50
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk.
static void report_read_error(const char *position)
Report an error when reading the posting list.
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk.
static bool get_tname_from_key(const char **src, const char *end, string &tname)
static bool check_tname_in_key(const char **keypos, const char *keyend, string_view tname)
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, string_view tname)
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk.
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *termfreq_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list.
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry.
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
const unsigned int CHUNKSIZE
Postlists in glass databases.
#define false
Definition: header.h:9
string str(int value)
Convert int to std::string.
Definition: str.cc:91
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define Assert(COND)
Definition: omassert.h:122
Pack types into strings and unpack them again.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:551
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
Definition: pack.h:76
void pack_bool(std::string &s, bool value)
Append an encoded bool to a string.
Definition: pack.h:64
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
Definition: pack.h:251
std::string pack_glass_postlist_key(std::string_view term)
Definition: pack.h:574
Convert types to std::string.
Definition: pretty.h:48
void description_append(std::string &desc, std::string_view s)
Definition: unittest.cc:105
const char * dummy[]
Definition: version_h.cc:7