xapian-core  1.4.26
glass_postlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015,2019 Olly Betts
6  * Copyright 2007,2008,2009 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "glass_postlist.h"
27 
28 #include "glass_cursor.h"
29 #include "glass_database.h"
30 #include "debuglog.h"
31 #include "noreturn.h"
32 #include "pack.h"
33 #include "str.h"
35 
37 
38 // Static functions
39 
41 XAPIAN_NORETURN(static void report_read_error(const char * position));
42 static void report_read_error(const char * position)
43 {
44  if (position == 0) {
45  // data ran out
46  LOGLINE(DB, "GlassPostList data ran out");
47  throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when "
48  "reading posting list");
49  }
50  // overflow
51  LOGLINE(DB, "GlassPostList value too large");
52  throw Xapian::RangeError("Value in posting list too large");
53 }
54 
55 static inline bool
56 get_tname_from_key(const char **src, const char *end, string &tname)
57 {
58  return unpack_string_preserving_sort(src, end, tname);
59 }
60 
61 static inline bool
62 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
63 {
64  string tname_in_key;
65 
66  if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
67  *keypos += 2;
68  } else {
69  // Read the termname.
70  if (!get_tname_from_key(keypos, keyend, tname_in_key))
71  report_read_error(*keypos);
72  }
73 
74  // This should only fail if the postlist doesn't exist at all.
75  return tname_in_key == tname;
76 }
77 
78 static inline bool
79 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
80 {
81  if (*keypos == keyend) return false;
82 
83  return check_tname_in_key_lite(keypos, keyend, tname);
84 }
85 
87 static Xapian::docid
88 read_start_of_first_chunk(const char ** posptr,
89  const char * end,
90  Xapian::doccount * number_of_entries_ptr,
91  Xapian::termcount * collection_freq_ptr)
92 {
93  LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
94 
96  number_of_entries_ptr,
97  collection_freq_ptr);
98  if (number_of_entries_ptr)
99  LOGVALUE(DB, *number_of_entries_ptr);
100  if (collection_freq_ptr)
101  LOGVALUE(DB, *collection_freq_ptr);
102 
103  Xapian::docid did;
104  // Read the docid of the first entry in the posting list.
105  if (!unpack_uint(posptr, end, &did))
106  report_read_error(*posptr);
107  ++did;
108  LOGVALUE(DB, did);
109  RETURN(did);
110 }
111 
112 static inline void
113 read_did_increase(const char ** posptr, const char * end,
114  Xapian::docid * did_ptr)
115 {
116  Xapian::docid did_increase;
117  if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
118  *did_ptr += did_increase + 1;
119 }
120 
122 static inline void
123 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
124 {
125  if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
126 }
127 
129 static Xapian::docid
130 read_start_of_chunk(const char ** posptr,
131  const char * end,
132  Xapian::docid first_did_in_chunk,
133  bool * is_last_chunk_ptr)
134 {
135  LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
136  Assert(is_last_chunk_ptr);
137 
138  // Read whether this is the last chunk
139  if (!unpack_bool(posptr, end, is_last_chunk_ptr))
140  report_read_error(*posptr);
141  LOGVALUE(DB, *is_last_chunk_ptr);
142 
143  // Read what the final document ID in this chunk is.
144  Xapian::docid increase_to_last;
145  if (!unpack_uint(posptr, end, &increase_to_last))
146  report_read_error(*posptr);
147  Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
148  LOGVALUE(DB, last_did_in_chunk);
149  RETURN(last_did_in_chunk);
150 }
151 
152 void
153 GlassPostListTable::get_freqs(const string & term,
154  Xapian::doccount * termfreq_ptr,
155  Xapian::termcount * collfreq_ptr,
156  Xapian::termcount * wdfub_ptr) const
157 {
158  string key = make_key(term);
159  string tag;
160  if (!get_exact_entry(key, tag)) {
161  if (termfreq_ptr)
162  *termfreq_ptr = 0;
163  if (collfreq_ptr)
164  *collfreq_ptr = 0;
165  if (wdfub_ptr)
166  *wdfub_ptr = 0;
167  } else {
168  const char * p = tag.data();
169  const char * e = p + tag.size();
170  Xapian::doccount tf;
172  GlassPostList::read_number_of_entries(&p, e, &tf, &cf);
173  if (termfreq_ptr)
174  *termfreq_ptr = tf;
175  if (collfreq_ptr)
176  *collfreq_ptr = cf;
177  if (wdfub_ptr) {
178  if (cf == 0 || tf == 1) {
179  *wdfub_ptr = cf;
180  } else {
181  Xapian::docid did;
182  if (!unpack_uint(&p, e, &did))
184  bool is_last;
185  (void)read_start_of_chunk(&p, e, did + 1, &is_last);
186  (void)is_last;
187  Xapian::termcount first_wdf;
188  if (!unpack_uint(&p, e, &first_wdf))
190  *wdfub_ptr = max(cf - first_wdf, first_wdf);
191  }
192  }
193  }
194 }
195 
199  if (!doclen_pl.get()) {
200  // Don't keep a reference back to the database, since this
201  // would make a reference loop.
202  doclen_pl.reset(new GlassPostList(db, string(), false));
203  }
204  if (!doclen_pl->jump_to(did))
205  throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
206  return doclen_pl->get_wdf();
207 }
208 
209 bool
212 {
213  if (!doclen_pl.get()) {
214  // Don't keep a reference back to the database, since this
215  // would make a reference loop.
216  doclen_pl.reset(new GlassPostList(db, string(), false));
217  }
218  return (doclen_pl->jump_to(did));
219 }
220 
221 // How big should chunks in the posting list be? (They
222 // will grow slightly bigger than this, but not more than a
223 // few bytes extra) - FIXME: tune this value to try to
224 // maximise how well blocks are used. Or performance.
225 // Or indexing speed. Or something...
226 const unsigned int CHUNKSIZE = 2000;
227 
235  public:
236  PostlistChunkWriter(const string &orig_key_,
237  bool is_first_chunk_,
238  const string &tname_,
239  bool is_last_chunk_);
240 
242  void append(GlassTable * table, Xapian::docid did,
243  Xapian::termcount wdf);
244 
246  void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
247  const string & s) {
248  Assert(!started);
249  first_did = first_did_;
250  current_did = current_did_;
251  if (!s.empty()) {
252  chunk.append(s);
253  started = true;
254  }
255  }
256 
261  void flush(GlassTable *table);
262 
263  private:
264  string orig_key;
265  string tname;
268  bool started;
269 
272 
273  string chunk;
274 };
275 
277 
283  string data;
284 
285  const char *pos;
286  const char *end;
287 
288  bool at_end;
289 
292 
293  public:
299  PostlistChunkReader(Xapian::docid first_did, const string & data_)
300  : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
301  {
302  if (!at_end) read_wdf(&pos, end, &wdf);
303  }
304 
306  return did;
307  }
309  return wdf;
310  }
311 
312  bool is_at_end() const {
313  return at_end;
314  }
315 
318  void next();
319 };
320 
322 
323 void
324 PostlistChunkReader::next()
325 {
326  if (pos == end) {
327  at_end = true;
328  } else {
329  read_did_increase(&pos, end, &did);
330  read_wdf(&pos, end, &wdf);
331  }
332 }
333 
334 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
335  bool is_first_chunk_,
336  const string &tname_,
337  bool is_last_chunk_)
338  : orig_key(orig_key_),
339  tname(tname_), is_first_chunk(is_first_chunk_),
340  is_last_chunk(is_last_chunk_),
341  started(false)
342 {
343  LOGCALL_CTOR(DB, "PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
344 }
345 
346 void
348  Xapian::termcount wdf)
349 {
350  if (!started) {
351  started = true;
352  first_did = did;
353  } else {
354  Assert(did > current_did);
355  // Start a new chunk if this one has grown to the threshold.
356  if (chunk.size() >= CHUNKSIZE) {
357  bool save_is_last_chunk = is_last_chunk;
358  is_last_chunk = false;
359  flush(table);
360  is_last_chunk = save_is_last_chunk;
361  is_first_chunk = false;
362  first_did = did;
363  chunk.resize(0);
365  } else {
366  pack_uint(chunk, did - current_did - 1);
367  }
368  }
369  current_did = did;
370  pack_uint(chunk, wdf);
371 }
372 
375 static inline string
377  Xapian::termcount collectionfreq,
378  Xapian::docid new_did)
379 {
380  string chunk;
381  pack_uint(chunk, entries);
382  pack_uint(chunk, collectionfreq);
383  pack_uint(chunk, new_did - 1);
384  return chunk;
385 }
386 
389 static inline string
390 make_start_of_chunk(bool new_is_last_chunk,
391  Xapian::docid new_first_did,
392  Xapian::docid new_final_did)
393 {
394  Assert(new_final_did >= new_first_did);
395  string chunk;
396  pack_bool(chunk, new_is_last_chunk);
397  pack_uint(chunk, new_final_did - new_first_did);
398  return chunk;
399 }
400 
401 static void
403  unsigned int start_of_chunk_header,
404  unsigned int end_of_chunk_header,
405  bool is_last_chunk,
406  Xapian::docid first_did_in_chunk,
407  Xapian::docid last_did_in_chunk)
408 {
409  Assert(size_t(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
410 
411  chunk.replace(start_of_chunk_header,
412  end_of_chunk_header - start_of_chunk_header,
413  make_start_of_chunk(is_last_chunk, first_did_in_chunk,
414  last_did_in_chunk));
415 }
416 
417 void
419 {
420  LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
421 
422  /* This is one of the more messy parts involved with updating posting
423  * list chunks.
424  *
425  * Depending on circumstances, we may have to delete an entire chunk
426  * or file it under a different key, as well as possibly modifying both
427  * the previous and next chunk of the postlist.
428  */
429 
430  if (!started) {
431  /* This chunk is now empty so disappears entirely.
432  *
433  * If this was the last chunk, then the previous chunk
434  * must have its "is_last_chunk" flag updated.
435  *
436  * If this was the first chunk, then the next chunk must
437  * be transformed into the first chunk. Messy!
438  */
439  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
440  Assert(!orig_key.empty());
441  if (is_first_chunk) {
442  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
443  if (is_last_chunk) {
444  /* This is the first and the last chunk, ie the only
445  * chunk, so just delete the tag.
446  */
447  table->del(orig_key);
448  return;
449  }
450 
451  /* This is the messiest case. The first chunk is to
452  * be removed, and there is at least one chunk after
453  * it. Need to rewrite the next chunk as the first
454  * chunk.
455  */
456  AutoPtr<GlassCursor> cursor(table->cursor_get());
457 
458  if (!cursor->find_entry(orig_key)) {
459  throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
460  }
461 
462  // FIXME: Currently the doclen list has a special first chunk too,
463  // which reduces special casing here. The downside is a slightly
464  // larger than necessary first chunk and needless fiddling if the
465  // first chunk is deleted. But really we should look at
466  // redesigning the whole postlist format with an eye to making it
467  // easier to update!
468 
469  // Extract existing counts from the first chunk so we can reinsert
470  // them into the block we're renaming.
471  Xapian::doccount num_ent;
472  Xapian::termcount coll_freq;
473  {
474  cursor->read_tag();
475  const char *tagpos = cursor->current_tag.data();
476  const char *tagend = tagpos + cursor->current_tag.size();
477 
478  (void)read_start_of_first_chunk(&tagpos, tagend,
479  &num_ent, &coll_freq);
480  }
481 
482  // Seek to the next chunk.
483  cursor->next();
484  if (cursor->after_end()) {
485  throw Xapian::DatabaseCorruptError("Expected another key but found none");
486  }
487  const char *kpos = cursor->current_key.data();
488  const char *kend = kpos + cursor->current_key.size();
489  if (!check_tname_in_key(&kpos, kend, tname)) {
490  throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
491  }
492 
493  // Read the new first docid
494  Xapian::docid new_first_did;
495  if (!unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
496  report_read_error(kpos);
497  }
498 
499  cursor->read_tag();
500  const char *tagpos = cursor->current_tag.data();
501  const char *tagend = tagpos + cursor->current_tag.size();
502 
503  // Read the chunk header
504  bool new_is_last_chunk;
505  Xapian::docid new_last_did_in_chunk =
506  read_start_of_chunk(&tagpos, tagend, new_first_did,
507  &new_is_last_chunk);
508 
509  string chunk_data(tagpos, tagend);
510 
511  // First remove the renamed tag
512  table->del(cursor->current_key);
513 
514  // And now write it as the first chunk
515  string tag;
516  tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
517  tag += make_start_of_chunk(new_is_last_chunk,
518  new_first_did,
519  new_last_did_in_chunk);
520  tag += chunk_data;
521  table->add(orig_key, tag);
522  return;
523  }
524 
525  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
526  /* This isn't the first chunk. Check whether we're the last chunk. */
527 
528  // Delete this chunk
529  table->del(orig_key);
530 
531  if (is_last_chunk) {
532  LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
533  // Update the previous chunk's is_last_chunk flag.
534  AutoPtr<GlassCursor> cursor(table->cursor_get());
535 
536  /* Should not find the key we just deleted, but should
537  * find the previous chunk. */
538  if (cursor->find_entry(orig_key)) {
539  throw Xapian::DatabaseCorruptError("Glass key not deleted as we expected");
540  }
541  // Make sure this is a chunk with the right term attached.
542  const char * keypos = cursor->current_key.data();
543  const char * keyend = keypos + cursor->current_key.size();
544  if (!check_tname_in_key(&keypos, keyend, tname)) {
545  throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
546  }
547 
548  bool is_prev_first_chunk = (keypos == keyend);
549 
550  // Now update the last_chunk
551  cursor->read_tag();
552  string tag = cursor->current_tag;
553 
554  const char *tagpos = tag.data();
555  const char *tagend = tagpos + tag.size();
556 
557  // Skip first chunk header
558  Xapian::docid first_did_in_chunk;
559  if (is_prev_first_chunk) {
560  first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
561  0, 0);
562  } else {
563  if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
564  report_read_error(keypos);
565  }
566  bool wrong_is_last_chunk;
567  string::size_type start_of_chunk_header = tagpos - tag.data();
568  Xapian::docid last_did_in_chunk =
569  read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
570  &wrong_is_last_chunk);
571  string::size_type end_of_chunk_header = tagpos - tag.data();
572 
573  // write new is_last flag
575  start_of_chunk_header,
576  end_of_chunk_header,
577  true, // is_last_chunk
578  first_did_in_chunk,
579  last_did_in_chunk);
580  table->add(cursor->current_key, tag);
581  }
582  } else {
583  LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
584  /* The chunk still has some items in it. Two major subcases:
585  * a) This is the first chunk.
586  * b) This isn't the first chunk.
587  *
588  * The subcases just affect the chunk header.
589  */
590  string tag;
591 
592  /* First write the header, which depends on whether this is the
593  * first chunk.
594  */
595  if (is_first_chunk) {
596  /* The first chunk. This is the relatively easy case,
597  * and we just have to write this one back to disk.
598  */
599  LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
600  string key = GlassPostListTable::make_key(tname);
601  bool ok = table->get_exact_entry(key, tag);
602  (void)ok;
603  Assert(ok);
604  Assert(!tag.empty());
605 
606  Xapian::doccount num_ent;
607  Xapian::termcount coll_freq;
608  {
609  const char * tagpos = tag.data();
610  const char * tagend = tagpos + tag.size();
611  (void)read_start_of_first_chunk(&tagpos, tagend,
612  &num_ent, &coll_freq);
613  }
614 
615  tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
616 
618  tag += chunk;
619  table->add(key, tag);
620  return;
621  }
622 
623  LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
624  /* Not the first chunk.
625  *
626  * This has the easy sub-sub-case:
627  * The first entry in the chunk hasn't changed
628  * ...and the hard sub-sub-case:
629  * The first entry in the chunk has changed. This is
630  * harder because the key for the chunk changes, so
631  * we've got to do a switch.
632  */
633 
634  // First find out the initial docid
635  const char *keypos = orig_key.data();
636  const char *keyend = keypos + orig_key.size();
637  if (!check_tname_in_key(&keypos, keyend, tname)) {
638  throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
639  }
640  Xapian::docid initial_did;
641  if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
642  report_read_error(keypos);
643  }
644  string new_key;
645  if (initial_did != first_did) {
646  /* The fiddlier case:
647  * Create a new tag with the correct key, and replace
648  * the old one.
649  */
651  table->del(orig_key);
652  } else {
653  new_key = orig_key;
654  }
655 
656  // ...and write the start of this chunk.
658 
659  tag += chunk;
660  table->add(new_key, tag);
661  }
662 }
663 
668 void GlassPostList::read_number_of_entries(const char ** posptr,
669  const char * end,
670  Xapian::doccount * number_of_entries_ptr,
671  Xapian::termcount * collection_freq_ptr)
672 {
673  if (!unpack_uint(posptr, end, number_of_entries_ptr))
674  report_read_error(*posptr);
675  if (!unpack_uint(posptr, end, collection_freq_ptr))
676  report_read_error(*posptr);
677 }
678 
699  const string & term_,
700  bool keep_reference)
701  : LeafPostList(term_),
702  this_db(keep_reference ? this_db_ : NULL),
703  have_started(false),
704  is_at_end(false),
705  cursor(this_db_->postlist_table.cursor_get())
706 {
707  LOGCALL_CTOR(DB, "GlassPostList", this_db_.get() | term_ | keep_reference);
708  init();
709 }
710 
712  const string & term_,
713  GlassCursor * cursor_)
714  : LeafPostList(term_),
715  this_db(this_db_),
717  is_at_end(false),
718  cursor(cursor_)
719 {
720  LOGCALL_CTOR(DB, "GlassPostList", this_db_.get() | term_ | cursor_);
721  init();
722 }
723 
724 void
726 {
727  string key = GlassPostListTable::make_key(term);
728  int found = cursor->find_entry(key);
729  if (!found) {
730  LOGLINE(DB, "postlist for term not found");
731  number_of_entries = 0;
732  is_at_end = true;
733  pos = 0;
734  end = 0;
735  first_did_in_chunk = 0;
736  last_did_in_chunk = 0;
737  wdf_upper_bound = 0;
738  return;
739  }
740  cursor->read_tag();
741  pos = cursor->current_tag.data();
742  end = pos + cursor->current_tag.size();
743 
744  Xapian::termcount collfreq;
748  &is_last_chunk);
749  read_wdf(&pos, end, &wdf);
750  // This works even if there's only one entry (when wdf == collfreq)
751  // or when collfreq is 0 (=> wdf is 0 too).
752  wdf_upper_bound = max(collfreq - wdf, wdf);
753  LOGLINE(DB, "Initial docid " << did);
754 }
755 
757 {
758  LOGCALL_DTOR(DB, "GlassPostList");
759 }
760 
761 LeafPostList *
762 GlassPostList::open_nearby_postlist(const std::string & term_) const
763 {
764  LOGCALL(DB, LeafPostList *, "GlassPostList::open_nearby_postlist", term_);
765  if (term_.empty())
766  RETURN(NULL);
768  RETURN(NULL);
769  RETURN(new GlassPostList(this_db, term_, cursor->clone()));
770 }
771 
774 {
775  LOGCALL(DB, Xapian::termcount, "GlassPostList::get_doclength", NO_ARGS);
777  Assert(this_db.get());
779 }
780 
783 {
784  LOGCALL(DB, Xapian::termcount, "GlassPostList::get_unique_terms", NO_ARGS);
786  Assert(this_db.get());
788 }
789 
790 bool
792 {
793  LOGCALL(DB, bool, "GlassPostList::next_in_chunk", NO_ARGS);
794  if (pos == end) RETURN(false);
795 
797  read_wdf(&pos, end, &wdf);
798 
799  // Either not at last doc in chunk, or pos == end, but not both.
802  Assert(pos != end || did == last_did_in_chunk);
803 
804  RETURN(true);
805 }
806 
807 void
809 {
810  LOGCALL_VOID(DB, "GlassPostList::next_chunk", NO_ARGS);
811  if (is_last_chunk) {
812  is_at_end = true;
813  return;
814  }
815 
816  cursor->next();
817  if (cursor->after_end()) {
818  is_at_end = true;
819  throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
820  term + "'");
821  }
822  const char * keypos = cursor->current_key.data();
823  const char * keyend = keypos + cursor->current_key.size();
824  // Check we're still in same postlist
825  if (!check_tname_in_key_lite(&keypos, keyend, term)) {
826  is_at_end = true;
827  throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
828  term + "'");
829  }
830 
831  Xapian::docid newdid;
832  if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
833  report_read_error(keypos);
834  }
835  if (newdid <= did) {
836  throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
837  str(newdid) +
838  ") is not greater than final document ID in previous chunk (" +
839  str(did) + ")");
840  }
841  did = newdid;
842 
843  cursor->read_tag();
844  pos = cursor->current_tag.data();
845  end = pos + cursor->current_tag.size();
846 
849  &is_last_chunk);
850  read_wdf(&pos, end, &wdf);
851 }
852 
853 PositionList *
855 {
856  LOGCALL(DB, PositionList *, "GlassPostList::read_position_list", NO_ARGS);
857  Assert(this_db.get());
860 }
861 
862 PositionList *
864 {
865  LOGCALL(DB, PositionList *, "GlassPostList::open_position_list", NO_ARGS);
866  Assert(this_db.get());
868 }
869 
870 PostList *
871 GlassPostList::next(double w_min)
872 {
873  LOGCALL(DB, PostList *, "GlassPostList::next", w_min);
874  (void)w_min; // no warning
875 
876  if (!have_started) {
877  have_started = true;
878  } else {
879  if (!next_in_chunk()) next_chunk();
880  }
881 
882  if (is_at_end) {
883  LOGLINE(DB, "Moved to end");
884  } else {
885  LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
886  }
887 
888  RETURN(NULL);
889 }
890 
891 bool
893 {
894  LOGCALL(DB, bool, "GlassPostList::current_chunk_contains", desired_did);
895  if (desired_did >= first_did_in_chunk &&
896  desired_did <= last_did_in_chunk) {
897  RETURN(true);
898  }
899  RETURN(false);
900 }
901 
902 void
904 {
905  LOGCALL_VOID(DB, "GlassPostList::move_to_chunk_containing", desired_did);
906  (void)cursor->find_entry(GlassPostListTable::make_key(term, desired_did));
907  Assert(!cursor->after_end());
908 
909  const char * keypos = cursor->current_key.data();
910  const char * keyend = keypos + cursor->current_key.size();
911  // Check we're still in same postlist
912  if (!check_tname_in_key_lite(&keypos, keyend, term)) {
913  // This should only happen if the postlist doesn't exist at all.
914  is_at_end = true;
915  is_last_chunk = true;
916  return;
917  }
918  is_at_end = false;
919 
920  cursor->read_tag();
921  pos = cursor->current_tag.data();
922  end = pos + cursor->current_tag.size();
923 
924  if (keypos == keyend) {
925  // In first chunk
926 #ifdef XAPIAN_ASSERTIONS
927  Xapian::doccount old_number_of_entries = number_of_entries;
929  Assert(old_number_of_entries == number_of_entries);
930 #else
931  did = read_start_of_first_chunk(&pos, end, NULL, NULL);
932 #endif
933  } else {
934  // In normal chunk
935  if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
936  report_read_error(keypos);
937  }
938  }
939 
942  &is_last_chunk);
943  read_wdf(&pos, end, &wdf);
944 
945  // Possible, since desired_did might be after end of this chunk and before
946  // the next.
947  if (desired_did > last_did_in_chunk) next_chunk();
948 }
949 
950 bool
952 {
953  LOGCALL(DB, bool, "GlassPostList::move_forward_in_chunk_to_at_least", desired_did);
954  if (did >= desired_did)
955  RETURN(true);
956 
957  if (desired_did <= last_did_in_chunk) {
958  while (pos != end) {
960  if (did >= desired_did) {
961  read_wdf(&pos, end, &wdf);
962  RETURN(true);
963  }
964  // It's faster to just skip over the wdf than to decode it.
965  read_wdf(&pos, end, NULL);
966  }
967 
968  // If we hit the end of the chunk then last_did_in_chunk must be wrong.
969  Assert(false);
970  }
971 
972  pos = end;
973  RETURN(false);
974 }
975 
976 PostList *
977 GlassPostList::skip_to(Xapian::docid desired_did, double w_min)
978 {
979  LOGCALL(DB, PostList *, "GlassPostList::skip_to", desired_did | w_min);
980  (void)w_min; // no warning
981  // We've started now - if we hadn't already, we're already positioned
982  // at start so there's no need to actually do anything.
983  have_started = true;
984 
985  // Don't skip back, and don't need to do anything if already there.
986  if (is_at_end || desired_did <= did) RETURN(NULL);
987 
988  // Move to correct chunk
989  if (!current_chunk_contains(desired_did)) {
990  move_to_chunk_containing(desired_did);
991  // Might be at_end now, so we need to check before trying to move
992  // forward in chunk.
993  if (is_at_end) RETURN(NULL);
994  }
995 
996  // Move to correct position in chunk
997  bool have_document = move_forward_in_chunk_to_at_least(desired_did);
998  (void)have_document;
999  Assert(have_document);
1000 
1001  if (is_at_end) {
1002  LOGLINE(DB, "Skipped to end");
1003  } else {
1004  LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
1005  }
1006 
1007  RETURN(NULL);
1008 }
1009 
1010 // Used for doclens.
1011 bool
1013 {
1014  LOGCALL(DB, bool, "GlassPostList::jump_to", desired_did);
1015  // We've started now - if we hadn't already, we're already positioned
1016  // at start so there's no need to actually do anything.
1017  have_started = true;
1018 
1019  // If the list is empty, give up right away.
1020  if (pos == 0) RETURN(false);
1021 
1022  // Move to correct chunk, or reload the current chunk to go backwards in it
1023  // (FIXME: perhaps handle the latter case more elegantly, though it won't
1024  // happen during sequential access which is most common).
1025  if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
1026  // Clear is_at_end flag since we can rewind.
1027  is_at_end = false;
1028 
1029  move_to_chunk_containing(desired_did);
1030  // Might be at_end now, so we need to check before trying to move
1031  // forward in chunk.
1032  if (is_at_end) RETURN(false);
1033  }
1034 
1035  // Move to correct position in chunk.
1036  if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
1037  RETURN(desired_did == did);
1038 }
1039 
1040 string
1042 {
1043  string desc;
1044  description_append(desc, term);
1045  desc += ":";
1046  desc += str(number_of_entries);
1047  return desc;
1048 }
1049 
1050 // Returns the last did to allow in this chunk.
1052 GlassPostListTable::get_chunk(const string &tname,
1053  Xapian::docid did, bool adding,
1054  PostlistChunkReader ** from,
1055  PostlistChunkWriter **to)
1056 {
1057  LOGCALL(DB, Xapian::docid, "GlassPostListTable::get_chunk", tname | did | adding | from | to);
1058  // Get chunk containing entry
1059  string key = make_key(tname, did);
1060 
1061  // Find the right chunk
1062  AutoPtr<GlassCursor> cursor(cursor_get());
1063 
1064  (void)cursor->find_entry(key);
1065  Assert(!cursor->after_end());
1066 
1067  const char * keypos = cursor->current_key.data();
1068  const char * keyend = keypos + cursor->current_key.size();
1069 
1070  if (!check_tname_in_key(&keypos, keyend, tname)) {
1071  // Postlist for this termname doesn't exist.
1072  //
1073  // NB "adding" will only be true if we are adding, but it may sometimes
1074  // be false in some cases where we are actually adding.
1075  if (!adding)
1076  throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
1077 
1078  *from = NULL;
1079  *to = new PostlistChunkWriter(string(), true, tname, true);
1080  RETURN(Xapian::docid(-1));
1081  }
1082 
1083  // See if we're appending - if so we can shortcut by just copying
1084  // the data part of the chunk wholesale.
1085  bool is_first_chunk = (keypos == keyend);
1086  LOGVALUE(DB, is_first_chunk);
1087 
1088  cursor->read_tag();
1089  const char * pos = cursor->current_tag.data();
1090  const char * end = pos + cursor->current_tag.size();
1092  if (is_first_chunk) {
1093  first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
1094  } else {
1095  if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
1096  report_read_error(keypos);
1097  }
1098  }
1099 
1100  bool is_last_chunk;
1102  last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
1103  *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
1104  is_last_chunk);
1105  if (did > last_did_in_chunk) {
1106  // This is the shortcut. Not very pretty, but I'll leave refactoring
1107  // until I've a clearer picture of everything which needs to be done.
1108  // (FIXME)
1109  *from = NULL;
1110  (*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1111  string(pos, end));
1112  } else {
1113  *from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
1114  }
1115  if (is_last_chunk) RETURN(Xapian::docid(-1));
1116 
1117  // Find first did of next tag.
1118  cursor->next();
1119  if (cursor->after_end()) {
1120  throw Xapian::DatabaseCorruptError("Expected another key but found none");
1121  }
1122  const char *kpos = cursor->current_key.data();
1123  const char *kend = kpos + cursor->current_key.size();
1124  if (!check_tname_in_key(&kpos, kend, tname)) {
1125  throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
1126  }
1127 
1128  // Read the new first docid
1129  Xapian::docid first_did_of_next_chunk;
1130  if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
1131  report_read_error(kpos);
1132  }
1133  RETURN(first_did_of_next_chunk - 1);
1134 }
1135 
1136 void
1137 GlassPostListTable::merge_doclen_changes(const map<Xapian::docid, Xapian::termcount> & doclens)
1138 {
1139  LOGCALL_VOID(DB, "GlassPostListTable::merge_doclen_changes", doclens);
1140 
1141  // The cursor in the doclen_pl will no longer be valid, so reset it.
1142  doclen_pl.reset(0);
1143 
1144  LOGVALUE(DB, doclens.size());
1145  if (doclens.empty()) return;
1146 
1147  // Ensure there's a first chunk.
1148  string current_key = make_key(string());
1149  if (!key_exists(current_key)) {
1150  LOGLINE(DB, "Adding dummy first chunk");
1151  string newtag = make_start_of_first_chunk(0, 0, 0);
1152  newtag += make_start_of_chunk(true, 0, 0);
1153  add(current_key, newtag);
1154  }
1155 
1156  map<Xapian::docid, Xapian::termcount>::const_iterator j;
1157  j = doclens.begin();
1158  Assert(j != doclens.end()); // This case is caught above.
1159 
1160  Xapian::docid max_did;
1161  PostlistChunkReader *from;
1162  PostlistChunkWriter *to;
1163  max_did = get_chunk(string(), j->first, true, &from, &to);
1164  LOGVALUE(DB, max_did);
1165  for ( ; j != doclens.end(); ++j) {
1166  Xapian::docid did = j->first;
1167 
1168 next_doclen_chunk:
1169  LOGLINE(DB, "Updating doclens, did=" << did);
1170  if (from) while (!from->is_at_end()) {
1171  Xapian::docid copy_did = from->get_docid();
1172  if (copy_did >= did) {
1173  if (copy_did == did) from->next();
1174  break;
1175  }
1176  to->append(this, copy_did, from->get_wdf());
1177  from->next();
1178  }
1179  if ((!from || from->is_at_end()) && did > max_did) {
1180  delete from;
1181  to->flush(this);
1182  delete to;
1183  max_did = get_chunk(string(), did, false, &from, &to);
1184  goto next_doclen_chunk;
1185  }
1186 
1187  Xapian::termcount new_doclen = j->second;
1188  if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1189  to->append(this, did, new_doclen);
1190  }
1191  }
1192 
1193  if (from) {
1194  while (!from->is_at_end()) {
1195  to->append(this, from->get_docid(), from->get_wdf());
1196  from->next();
1197  }
1198  delete from;
1199  }
1200  to->flush(this);
1201  delete to;
1202 }
1203 
1204 void
1206  const Inverter::PostingChanges & changes)
1207 {
1208  {
1209  // Rewrite the first chunk of this posting list with the updated
1210  // termfreq and collfreq.
1211  string current_key = make_key(term);
1212  string tag;
1213  (void)get_exact_entry(current_key, tag);
1214 
1215  // Read start of first chunk to get termfreq and collfreq.
1216  const char *pos = tag.data();
1217  const char *end = pos + tag.size();
1218  Xapian::doccount termfreq;
1219  Xapian::termcount collfreq;
1220  Xapian::docid firstdid, lastdid;
1221  bool islast;
1222  if (pos == end) {
1223  termfreq = 0;
1224  collfreq = 0;
1225  firstdid = 0;
1226  lastdid = 0;
1227  islast = true;
1228  } else {
1229  firstdid = read_start_of_first_chunk(&pos, end,
1230  &termfreq, &collfreq);
1231  // Handle the generic start of chunk header.
1232  lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
1233  }
1234 
1235  termfreq += changes.get_tfdelta();
1236  if (termfreq == 0) {
1237  // All postings deleted! So we can shortcut by zapping the
1238  // posting list.
1239  if (islast) {
1240  // Only one entry for this posting list.
1241  del(current_key);
1242  return;
1243  }
1244  MutableGlassCursor cursor(this);
1245  bool found = cursor.find_entry(current_key);
1246  Assert(found);
1247  if (!found) return; // Reduce damage!
1248  while (cursor.del()) {
1249  const char *kpos = cursor.current_key.data();
1250  const char *kend = kpos + cursor.current_key.size();
1251  if (!check_tname_in_key_lite(&kpos, kend, term)) break;
1252  }
1253  return;
1254  }
1255  collfreq += changes.get_cfdelta();
1256 
1257  // Rewrite start of first chunk to update termfreq and collfreq.
1258  string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
1259  newhdr += make_start_of_chunk(islast, firstdid, lastdid);
1260  if (pos == end) {
1261  add(current_key, newhdr);
1262  } else {
1263  Assert(size_t(pos - tag.data()) <= tag.size());
1264  tag.replace(0, pos - tag.data(), newhdr);
1265  add(current_key, tag);
1266  }
1267  }
1268  map<Xapian::docid, Xapian::termcount>::const_iterator j;
1269  j = changes.pl_changes.begin();
1270  Assert(j != changes.pl_changes.end()); // This case is caught above.
1271 
1272  Xapian::docid max_did;
1273  PostlistChunkReader *from;
1274  PostlistChunkWriter *to;
1275  max_did = get_chunk(term, j->first, false, &from, &to);
1276  for ( ; j != changes.pl_changes.end(); ++j) {
1277  Xapian::docid did = j->first;
1278 
1279 next_chunk:
1280  LOGLINE(DB, "Updating term=" << term << ", did=" << did);
1281  if (from) while (!from->is_at_end()) {
1282  Xapian::docid copy_did = from->get_docid();
1283  if (copy_did >= did) {
1284  if (copy_did == did) {
1285  from->next();
1286  }
1287  break;
1288  }
1289  to->append(this, copy_did, from->get_wdf());
1290  from->next();
1291  }
1292  if ((!from || from->is_at_end()) && did > max_did) {
1293  delete from;
1294  to->flush(this);
1295  delete to;
1296  max_did = get_chunk(term, did, false, &from, &to);
1297  goto next_chunk;
1298  }
1299 
1300  Xapian::termcount new_wdf = j->second;
1301  if (new_wdf != Xapian::termcount(-1)) {
1302  to->append(this, did, new_wdf);
1303  }
1304  }
1305 
1306  if (from) {
1307  while (!from->is_at_end()) {
1308  to->append(this, from->get_docid(), from->get_wdf());
1309  from->next();
1310  }
1311  delete from;
1312  }
1313  to->flush(this);
1314  delete to;
1315 }
1316 
1317 void
1319  Xapian::docid & last) const
1320 {
1321  LOGCALL(DB, Xapian::docid, "GlassPostListTable::get_used_docid_range", "&first, &used");
1322  AutoPtr<GlassCursor> cur(cursor_get());
1323  if (!cur->find_entry(pack_glass_postlist_key(string()))) {
1324  // Empty database.
1325  first = last = 0;
1326  return;
1327  }
1328 
1329  cur->read_tag();
1330  const char * p = cur->current_tag.data();
1331  const char * e = p + cur->current_tag.size();
1332 
1333  first = read_start_of_first_chunk(&p, e, NULL, NULL);
1334 
1335  (void)cur->find_entry(pack_glass_postlist_key(string(), GLASS_MAX_DOCID));
1336  Assert(!cur->after_end());
1337 
1338  const char * keypos = cur->current_key.data();
1339  const char * keyend = keypos + cur->current_key.size();
1340  // Check we're still in same postlist
1341  if (!check_tname_in_key_lite(&keypos, keyend, string())) {
1342  // Shouldn't happen - we already handled the empty database case above.
1343  Assert(false);
1344  first = last = 0;
1345  return;
1346  }
1347 
1348  cur->read_tag();
1349  p = cur->current_tag.data();
1350  e = p + cur->current_tag.size();
1351 
1352  Xapian::docid start_of_last_chunk;
1353  if (keypos == keyend) {
1354  start_of_last_chunk = first;
1355  first = read_start_of_first_chunk(&p, e, NULL, NULL);
1356  } else {
1357  // In normal chunk
1358  if (!unpack_uint_preserving_sort(&keypos, keyend,
1359  &start_of_last_chunk)) {
1360  report_read_error(keypos);
1361  }
1362  }
1363 
1364  bool dummy;
1365  last = read_start_of_chunk(&p, e, start_of_last_chunk, &dummy);
1366 }
1367 
1370 {
1371  Assert(!term.empty());
1372  return wdf_upper_bound;
1373 }
void pack_bool(std::string &s, bool value)
Append an encoded bool to a string.
Definition: pack.h:57
void append(GlassTable *table, Xapian::docid did, Xapian::termcount wdf)
Append an entry to this chunk.
#define LOGCALL_STATIC(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:491
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
Xapian::termcount get_wdf() const
Define the XAPIAN_NORETURN macro.
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency.
Abstract base class for postlists.
Definition: postlist.h:37
Xapian::termcount get_unique_terms(Xapian::docid did) const
Virtual methods of Database::Internal.
static bool check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
void next_chunk()
Move to the next chunk.
void merge_doclen_changes(const map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
Xapian::docid get_docid() const
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
Xapian::termcount_diff get_tfdelta() const
Get the term frequency delta.
#define GLASS_MAX_DOCID
The largest docid value supported by glass.
Definition: glass_defs.h:50
const char * pos
Position of iteration through current chunk.
bool empty() const
Return true if there are no entries in the table.
Definition: glass_table.h:681
GlassPostList(const GlassPostList &)
Copying is not allowed.
void raw_append(Xapian::docid first_did_, Xapian::docid current_did_, const string &s)
Append a block of raw entries to this chunk.
Class managing a Btree table in a Glass database.
Definition: glass_table.h:425
AutoPtr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
bool del()
Delete the current key/tag pair, leaving the cursor on the next entry.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
Class for storing the changes in frequencies for a term.
Postlists in glass databases.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
Convert types to std::string.
void add(const std::string &key, const std::string &tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
static bool get_tname_from_key(const char **src, const char *end, string &tname)
Abstract base class for leaf postlists.
Definition: leafpostlist.h:39
std::string term
The term name for this postlist (empty for an alldocs postlist).
Definition: leafpostlist.h:52
static void report_read_error(const char *position)
Report an error when reading the posting list.
Xapian::doccount number_of_entries
The number of entries in the posting list.
#define false
Definition: header.h:9
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Returns the length of document did.
AutoPtr< GlassPostList > doclen_pl
PostList for looking up document lengths.
bool is_at_end
Whether we&#39;ve run off the end of the list yet.
static Xapian::docid read_start_of_first_chunk(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the start of the first chunk in the posting list.
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
static string make_start_of_chunk(bool new_is_last_chunk, Xapian::docid new_first_did, Xapian::docid new_final_did)
Make the data to go at the start of a standard chunk.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
bool next(Glass::Cursor *C_, int j) const
Definition: glass_table.h:860
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
virtual void read_position_list(GlassPositionList *pos_list, Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
static void write_start_of_chunk(string &chunk, unsigned int start_of_chunk_header, unsigned int end_of_chunk_header, bool is_last_chunk, Xapian::docid first_did_in_chunk, Xapian::docid last_did_in_chunk)
const char * dummy[]
Definition: version_h.cc:7
bool is_last_chunk
True if this is the last chunk.
RangeError indicates an attempt to access outside the bounds of a container.
Definition: error.h:971
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Check if document did exists.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
void description_append(std::string &desc, const std::string &s)
Definition: unittest.cc:102
Xapian::termcount get_unique_terms() const
Returns the number of unique terms in the current document.
const char * end
Pointer to byte after end of current chunk.
string current_key
Current key pointed to by cursor.
Definition: glass_cursor.h:239
bool next_in_chunk()
Move to the next item in the chunk, if possible.
GlassPostListTable postlist_table
Table storing posting lists.
Xapian::termcount_diff get_cfdelta() const
Get the collection frequency delta.
Xapian::termcount get_wdf_upper_bound() const
static string make_start_of_first_chunk(Xapian::doccount entries, Xapian::termcount collectionfreq, Xapian::docid new_did)
Make the data to go at the start of the very first chunk.
#define LOGVALUE(a, b)
Definition: debuglog.h:495
PositionList * open_position_list(Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
std::string get_description() const
Get a description of the document.
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
~GlassPostList()
Destructor.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
void flush(GlassTable *table)
Flush the chunk to the buffered table.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
C++ class definition for glass database.
A postlist in a glass database.
Xapian::docid get_chunk(const string &tname, Xapian::docid did, bool adding, Glass::PostlistChunkReader **from, Glass::PostlistChunkWriter **to)
void merge_changes(const string &term, const Inverter::PostingChanges &changes)
Merge changes for a term.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term&#39;s postlist.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:562
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
Definition: pack.h:318
const unsigned int CHUNKSIZE
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
static bool check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
Append a string to an object description, escaping invalid UTF-8.
static void read_wdf(const char **posptr, const char *end, Xapian::termcount *wdf_ptr)
Read the wdf for an entry.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
bool unpack_bool(const char **p, const char *end, bool *result)
Decode a bool from a string.
Definition: pack.h:69
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
PostlistChunkReader(Xapian::docid first_did, const string &data_)
Initialise the postlist chunk reader.
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
PositionList * read_position_list()
Get the list of positions of the term in the current document.
Interface to Btree cursors.
bool have_started
Whether we&#39;ve started reading the list yet.
std::string pack_glass_postlist_key(const std::string &term)
Definition: pack.h:613
Xapian::docid did
Document id we&#39;re currently at.
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
static Xapian::docid read_start_of_chunk(const char **posptr, const char *end, Xapian::docid first_did_in_chunk, bool *is_last_chunk_ptr)
Read the start of a chunk.
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
Xapian::termcount get_doclength(Xapian::docid did) const
Virtual methods of Database::Internal.
LeafPostList * open_nearby_postlist(const std::string &term_) const
Open another postlist from the same database.
Xapian::termcount wdf
The wdf of the current document.
void get_freqs(const std::string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr, Xapian::termcount *wdfub_ptr=NULL) const
Returns frequencies for a term.
#define LOGLINE(a, b)
Definition: debuglog.h:494
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Xapian::termcount get_doclength() const
Returns the length of current document.
GlassPositionList positionlist
The position list object for this posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
static void read_did_increase(const char **posptr, const char *end, Xapian::docid *did_ptr)
bool is_writable() const
Return true if this table is writable.
Definition: glass_table.h:509
bool del(const std::string &key)
Delete an entry from the table.
string make_key(Xapian::docid did)
Definition: chert_record.cc:35
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
void next()
Advance to the next entry.