xapian-core  2.0.0
honey_postlist.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2011,2013,2015,2016,2017,2018,2024 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_HONEY_POSTLIST_H
23 #define XAPIAN_INCLUDED_HONEY_POSTLIST_H
24 
25 #include "backends/leafpostlist.h"
26 #include "honey_positionlist.h"
27 #include "pack.h"
28 
29 #include <string>
30 #include <string_view>
31 
32 class HoneyCursor;
33 class HoneyDatabase;
34 
35 namespace Honey {
36 
38 inline std::string
39 make_postingchunk_key(std::string_view term)
40 {
41  std::string key;
43  return key;
44 }
45 
47 inline std::string
49 {
50  std::string key;
52  pack_uint_preserving_sort(key, did);
53  return key;
54 }
55 
56 inline Xapian::docid
57 docid_from_key(const std::string& term, const std::string& key)
58 {
59  if (key.size() < term.size()) {
60  // A key can't be shorter than the term it contains.
61  return false;
62  }
63  const char* p = key.data();
64  const char* end = p + key.size();
65  // Most terms don't contain zero bytes, so we could optimise this.
66  std::string term_in_key;
67  // FIXME: the next key might not be for a postlist chunk...
68  if (!unpack_string_preserving_sort(&p, end, term_in_key))
69  throw Xapian::DatabaseCorruptError("bad postlist key");
70  if (term_in_key != term)
71  return false;
72  Xapian::docid did;
73  if (!unpack_uint_preserving_sort(&p, end, &did))
74  throw Xapian::DatabaseCorruptError("bad postlist key");
75  return did;
76 }
77 
79  const char* p = nullptr;
80  const char* end;
81 
83 
85 
88 
90 
113 
114  public:
117 
119  void init() {
120  p = NULL;
121  termfreq = 0;
122  }
123 
126  p = NULL;
127  termfreq = tf;
128  collfreq_info = cf_info;
129  }
130 
131  void assign(const char* p_, size_t len, Xapian::docid did);
132 
133  void assign(const char* p_, size_t len, Xapian::docid did_,
134  Xapian::docid last_did_in_chunk,
135  Xapian::termcount wdf_);
136 
137  bool at_end() const { return p == NULL; }
138 
139  Xapian::docid get_docid() const { return did; }
140 
141  Xapian::termcount get_wdf() const { return wdf; }
142 
144  bool next();
145 
147  bool skip_to(Xapian::docid target);
148 };
149 
150 }
151 
153 class HoneyPostList : public LeafPostList {
156 
158  HoneyPostList(const HoneyPostList&) = delete;
159 
162 
164 
167 
170 
176 
181  bool started = false;
182 
184  bool update_reader();
185 
186  public:
188  HoneyPostList(const HoneyDatabase* db_,
189  std::string_view term_,
190  HoneyCursor* cursor_);
191 
192  ~HoneyPostList();
193 
194  bool open_nearby_postlist(std::string_view term_,
195  bool need_read_pos,
196  LeafPostList*& pl) const;
197 
198  Xapian::docid get_docid() const;
199 
200  Xapian::termcount get_wdf() const;
201 
202  bool at_end() const;
203 
205 
206  PostList* next(double w_min);
207 
208  PostList* skip_to(Xapian::docid did, double w_min);
209 
211 
212  void get_docid_range(Xapian::docid& first, Xapian::docid& last) const;
213 
214  std::string get_description() const;
215 };
216 
229 
230  public:
231  HoneyPosPostList(const HoneyDatabase* db_,
232  std::string_view term_,
233  HoneyCursor* cursor_);
234 
236 
237  std::string get_description() const;
238 };
239 
240 #endif // XAPIAN_INCLUDED_HONEY_POSTLIST_H
Database using honey backend.
PostList in a honey database with positions.
HoneyRePositionList position_list
PositionList object to reuse for OP_NEAR and OP_PHRASE.
std::string get_description() const
Return a string description of this object.
HoneyPosPostList(const HoneyDatabase *db_, std::string_view term_, HoneyCursor *cursor_)
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
PostList in a honey database.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Xapian::docid last_did
The highest document id in this posting list.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
Xapian::termcount wdf_max
Maximum wdf for this postlist.
bool open_nearby_postlist(std::string_view term_, bool need_read_pos, LeafPostList *&pl) const
Open another postlist from the same database.
HoneyCursor * cursor
Cursor on the postlist table.
Xapian::termcount get_wdf_upper_bound() const
const HoneyDatabase * db
HoneyDatabase to get position table object from.
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
HoneyPostList & operator=(const HoneyPostList &)=delete
Don't allow assignment.
Honey::PostingChunkReader reader
bool update_reader()
Update reader to use the chunk currently pointed to by cursor.
HoneyPostList(const HoneyPostList &)=delete
Don't allow copying.
Xapian::docid get_docid() const
Return the current docid.
bool started
Needed so that first next() does nothing.
std::string get_description() const
Return a string description of this object.
bool at_end() const
Return true if the current position is past the last entry in this list.
A reusable position list in a honey database.
Xapian::doccount termfreq
PostingChunkReader()
Create an uninitialised PostingChunkReader.
void init()
Initialise already at_end().
Xapian::termcount wdf
Xapian::docid get_docid() const
Xapian::docid last_did
The last docid in this chunk.
Xapian::termcount collfreq_info
Value "to do with" collection frequency.
void assign(const char *p_, size_t len, Xapian::docid did)
Xapian::termcount get_wdf() const
bool next()
Advance, returning false if we've run out of data.
bool skip_to(Xapian::docid target)
Skip ahead, returning false if we've run out of data.
void init(Xapian::doccount tf, Xapian::termcount cf_info)
Initialise.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
Abstract base class for postlists.
Definition: postlist.h:40
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
string term
PositionList * p
A position list in a honey database.
Abstract base class for leaf postlists.
std::string make_postingchunk_key(std::string_view term)
Generate a key for a posting initial chunk.
Xapian::docid docid_from_key(const std::string &key)
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Pack types into strings and unpack them again.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:551
void pack_string_preserving_sort(std::string &s, std::string_view value, bool last=false)
Append an encoded std::string to a string, preserving the sort order.
Definition: pack.h:528
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
Definition: pack.h:251
void pack_uint_preserving_sort(std::string &s, U value)
Append an encoded unsigned integer to a string, preserving the sort order.
Definition: pack.h:204