xapian-core  2.0.0
glass_postlist.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2024 Olly Betts
7  * Copyright 2007,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, see
21  * <https://www.gnu.org/licenses/>.
22  */
23 
24 #ifndef XAPIAN_INCLUDED_GLASS_POSTLIST_H
25 #define XAPIAN_INCLUDED_GLASS_POSTLIST_H
26 
27 #include <xapian/database.h>
28 
29 #include "backends/leafpostlist.h"
30 #include "glass_defs.h"
31 #include "glass_inverter.h"
32 #include "glass_positionlist.h"
33 #include "omassert.h"
34 
35 #include <memory>
36 #include <map>
37 #include <string>
38 #include <string_view>
39 
40 class GlassCursor;
41 class GlassDatabase;
42 
43 namespace Glass {
44  class PostlistChunkReader;
45  class PostlistChunkWriter;
46  class RootInfo;
47 }
48 
49 using Glass::RootInfo;
50 
53 class GlassPostList : public LeafPostList {
59 
62 
65 
68 
70  bool is_at_end;
71 
73  std::unique_ptr<GlassCursor> cursor;
74 
77 
80 
82  const char * pos;
83 
85  const char * end;
86 
89 
92 
95 
98 
100  void operator=(const GlassPostList &);
101 
105  bool next_in_chunk();
106 
112  void next_chunk();
113 
121  bool current_chunk_contains(Xapian::docid desired_did);
122 
134  void move_to_chunk_containing(Xapian::docid desired_did);
135 
146 
148  std::string_view term,
149  GlassCursor * cursor_);
150 
151  void init();
152 
153  public:
156  std::string_view term,
157  bool keep_reference);
158 
160  ~GlassPostList();
161 
162  bool open_nearby_postlist(std::string_view term_,
163  bool need_read_pos,
164  LeafPostList*& pl) const;
165 
170  bool jump_to(Xapian::docid desired_did);
171 
174 
179 
183 
187 
189  PostList * next(double w_min);
190 
192  PostList * skip_to(Xapian::docid desired_did, double w_min);
193 
195  bool at_end() const { return is_at_end; }
196 
198 
199  void get_docid_range(Xapian::docid& first, Xapian::docid& last) const;
200 
202  std::string get_description() const;
203 
205  static void read_freqs(const char** posptr,
206  const char* end,
207  Xapian::doccount* number_of_entries_ptr,
208  Xapian::termcount* collection_freq_ptr);
209 };
210 
213  mutable std::unique_ptr<GlassPostList> doclen_pl;
214 
215  public:
228  GlassPostListTable(const std::string& path_, bool readonly_)
229  : GlassTable("postlist", path_ + "/postlist.", readonly_),
230  doclen_pl()
231  { }
232 
233  GlassPostListTable(int fd, off_t offset_, bool readonly_)
234  : GlassTable("postlist", fd, offset_, readonly_),
235  doclen_pl()
236  { }
237 
238  void open(int flags_, const RootInfo & root_info,
240  doclen_pl.reset(0);
241  GlassTable::open(flags_, root_info, rev);
242  }
243 
245  void merge_changes(std::string_view term,
246  const Inverter::PostingChanges& changes);
247 
249  void merge_doclen_changes(const std::map<Xapian::docid,
250  Xapian::termcount>& doclens);
251 
252  Xapian::docid get_chunk(std::string_view tname,
253  Xapian::docid did, bool adding,
256 
258  static std::string make_key(std::string_view term, Xapian::docid did) {
259  return pack_glass_postlist_key(term, did);
260  }
261 
263  static std::string make_key(std::string_view term) {
265  }
266 
267  bool term_exists(std::string_view term) const {
268  return key_exists(make_key(term));
269  }
270 
282  void get_freqs(std::string_view term,
283  Xapian::doccount* termfreq_ptr,
284  Xapian::termcount* collfreq_ptr,
285  Xapian::termcount* wdfub_ptr = NULL) const;
286 
290 
294 
295  void get_used_docid_range(Xapian::docid & first,
296  Xapian::docid & last) const;
297 };
298 #ifdef DISABLE_GPL_LIBXAPIAN
299 # error GPL source we cannot relicense included in libxapian
300 #endif
301 
302 #endif /* XAPIAN_INCLUDED_GLASS_POSTLIST_H */
A cursor pointing to a position in a Btree table, for reading several entries in order,...
Definition: glass_cursor.h:148
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
GlassPostListTable(int fd, off_t offset_, bool readonly_)
Xapian::docid get_chunk(std::string_view tname, Xapian::docid did, bool adding, Glass::PostlistChunkReader **from, Glass::PostlistChunkWriter **to)
bool document_exists(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Check if document did exists.
GlassPostListTable(const std::string &path_, bool readonly_)
Create a new table object.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
static std::string make_key(std::string_view term, Xapian::docid did)
Compose a key from a termname and docid.
Xapian::termcount get_doclength(Xapian::docid did, Xapian::Internal::intrusive_ptr< const GlassDatabase > db) const
Returns the length of document did.
void merge_changes(std::string_view term, const Inverter::PostingChanges &changes)
Merge changes for a term.
static std::string make_key(std::string_view term)
Compose a key from a termname.
bool term_exists(std::string_view term) const
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr, Xapian::termcount *wdfub_ptr=NULL) const
Returns frequencies for a term.
void merge_doclen_changes(const std::map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
std::unique_ptr< GlassPostList > doclen_pl
PostList for looking up document lengths.
A postlist in a glass database.
Xapian::docid get_docid() const
Returns the current docid.
bool have_started
Whether we've started reading the list yet.
Xapian::docid did
Document id we're currently at.
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
Scan forward in the current chunk for the specified document ID.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
bool current_chunk_contains(Xapian::docid desired_did)
Return true if the given document ID lies in the range covered by the current chunk.
void move_to_chunk_containing(Xapian::docid desired_did)
Move to chunk containing the specified document ID.
PositionList * open_position_list() const
Get the list of positions of the term in the current document.
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
PostList * skip_to(Xapian::docid desired_did, double w_min)
Skip to next document with docid >= docid.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
bool is_at_end
Whether we've run off the end of the list yet.
bool next_in_chunk()
Move to the next item in the chunk, if possible.
static void read_freqs(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the term frequency and collection frequency.
std::string get_description() const
Get a description of the document.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
Xapian::termcount get_wdf_upper_bound() const
Xapian::termcount get_wdf() const
Returns the Within Document Frequency of the term in the current document.
bool open_nearby_postlist(std::string_view term_, bool need_read_pos, LeafPostList *&pl) const
Open another postlist from the same database.
GlassPostList(const GlassPostList &)
Copying is not allowed.
bool jump_to(Xapian::docid desired_did)
Used for looking up doclens.
~GlassPostList()
Destructor.
GlassRePositionList * positionlist
The position list object for this posting list.
PositionList * read_position_list()
Get the list of positions of the term in the current document.
const char * end
Pointer to byte after end of current chunk.
const char * pos
Position of iteration through current chunk.
bool at_end() const
Return true if and only if we're off the end of the list.
bool is_last_chunk
True if this is the last chunk.
void operator=(const GlassPostList &)
Assignment is not allowed.
void next_chunk()
Move to the next chunk.
std::unique_ptr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
Xapian::termcount wdf
The wdf of the current document.
A reusable position list in a glass database.
Class managing a Btree table in a Glass database.
Definition: glass_table.h:432
bool key_exists(std::string_view key) const
Check if a key exists in the Btree.
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk,...
Class for storing the changes in frequencies for a term.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
std::string term
The term name for this postlist (empty for an alldocs postlist).
Definition: leafpostlist.h:51
Abstract base class for postlists.
Definition: postlist.h:40
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
An indexed database of documents.
string term
Definitions, types, etc for use inside glass.
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
Inverter class which "inverts the file".
A position list in a glass database.
Abstract base class for leaf postlists.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:108
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
std::string pack_glass_postlist_key(std::string_view term)
Definition: pack.h:574