xapian-core  2.0.0
honey_postlisttable.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007-2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "honey_postlisttable.h"
24 
25 #include "honey_alldocspostlist.h"
26 #include "honey_cursor.h"
27 #include "honey_database.h"
28 #include "honey_defs.h"
29 #include "honey_postlist.h"
31 
32 #include <memory>
33 #include <string_view>
34 
35 using namespace Honey;
36 using namespace std;
37 
40  std::string_view term,
41  bool need_read_pos) const
42 {
43  Assert(!term.empty());
44  // Try to position cursor first so we avoid creating HoneyPostList objects
45  // for terms which don't exist.
46  unique_ptr<HoneyCursor> cursor(cursor_get());
47  if (!cursor->find_exact(Honey::make_postingchunk_key(term))) {
48  return nullptr;
49  }
50 
51  if (need_read_pos)
52  return new HoneyPosPostList(db, term, cursor.release());
53  return new HoneyPostList(db, term, cursor.release());
54 }
55 
56 void
58  Xapian::doccount* termfreq_ptr,
59  Xapian::termcount* collfreq_ptr) const
60 {
61  string chunk;
62  if (!get_exact_entry(Honey::make_postingchunk_key(term), chunk)) {
63  if (termfreq_ptr) *termfreq_ptr = 0;
64  if (collfreq_ptr) *collfreq_ptr = 0;
65  return;
66  }
67 
68  const char* p = chunk.data();
69  const char* pend = p + chunk.size();
72  if (!decode_initial_chunk_header_freqs(&p, pend, tf, cf))
73  throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
74  if (termfreq_ptr) *termfreq_ptr = tf;
75  if (collfreq_ptr) *collfreq_ptr = cf;
76 }
77 
78 void
80  Xapian::docid& first,
81  Xapian::docid& last) const
82 {
83  unique_ptr<HoneyCursor> cursor(cursor_get());
84  Assert(cursor);
85 
86  static const char doclen_key_prefix[2] = {
88  };
89  if (cursor->find_entry_ge(string(doclen_key_prefix, 2))) {
90  first = 1;
91  } else {
92  // doccount == 0 should be handled by our caller.
93  Assert(!cursor->after_end());
94  Xapian::docid last_in_first_chunk = docid_from_key(cursor->current_key);
95  if (last_in_first_chunk == 0) {
96  // Note that our caller checks for doccount == 0 and handles that.
97  throw Xapian::DatabaseCorruptError("Bad first doclen chunk key");
98  }
99  cursor->read_tag();
100  unsigned width = cursor->current_tag[0] / 8;
101  first = last_in_first_chunk - (cursor->current_tag.size() - 2) / width;
102  }
103 
104  // We know the last docid is at least first - 1 + doccount, so seek
105  // to there and then scan forwards. If we match exactly, then that
106  // is exactly the last docid (our caller handles this case when
107  // first == 1, but not otherwise).
108  last = first - 1 + doccount;
109  if (cursor->find_entry_ge(make_doclenchunk_key(last)))
110  return;
111 
112  if (cursor->after_end())
113  throw Xapian::DatabaseCorruptError("Missing doclen chunk");
114 
115  do {
116  Xapian::docid new_last = docid_from_key(cursor->current_key);
117  if (new_last == 0) {
118  // We've hit a non-doclen item.
119  return;
120  }
121  last = new_last;
122  } while (cursor->next());
123 
124  // We've reached the end of the table (only possible if there are no terms
125  // at all!)
126 }
127 
130 {
131  string chunk;
132  if (!get_exact_entry(Honey::make_postingchunk_key(term), chunk)) {
133  // Term not present.
134  return 0;
135  }
136 
137  const char* p = chunk.data();
138  const char* pend = p + chunk.size();
139  Xapian::doccount tf;
141  Xapian::docid first;
142  Xapian::docid last;
143  Xapian::docid chunk_last;
144  Xapian::termcount first_wdf;
145  Xapian::termcount wdf_max;
146  if (!decode_initial_chunk_header(&p, pend, tf, cf, first, last, chunk_last,
147  first_wdf, wdf_max))
148  throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
149  return wdf_max;
150 }
Database using honey backend.
PostList in a honey database with positions.
void get_used_docid_range(Xapian::doccount doccount, Xapian::docid &first, Xapian::docid &last) const
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
HoneyPostList * open_post_list(const HoneyDatabase *db, std::string_view term, bool need_read_pos) const
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
PostList in a honey database.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
string term
PositionList * p
A PostList which iterates over all documents in a HoneyDatabase.
HoneyCursor class.
Database using honey backend.
Definitions, types, etc for use inside honey.
PostList in a honey database.
Encoding and decoding functions for honey postlists.
bool decode_initial_chunk_header(const char **p, const char *end, Xapian::doccount &termfreq, Xapian::termcount &collfreq, Xapian::docid &first, Xapian::docid &last, Xapian::docid &chunk_last, Xapian::termcount &first_wdf, Xapian::termcount &wdf_max)
bool decode_initial_chunk_header_freqs(const char **p, const char *end, Xapian::doccount &termfreq, Xapian::termcount &collfreq)
Subclass of HoneyTable which holds postlists.
std::string make_postingchunk_key(std::string_view term)
Generate a key for a posting initial chunk.
std::string make_doclenchunk_key(Xapian::docid last_did)
Generate a key for a doclen chunk.
Xapian::docid docid_from_key(const std::string &key)
@ KEY_DOCLEN_CHUNK
Definition: honey_defs.h:88
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define Assert(COND)
Definition: omassert.h:122