xapian-core  1.4.27
glass_postlist.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015 Olly Betts
7  * Copyright 2007,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #ifndef OM_HGUARD_GLASS_POSTLIST_H
26 #define OM_HGUARD_GLASS_POSTLIST_H
27 
28 #include <xapian/database.h>
29 
30 #include "glass_defs.h"
31 #include "glass_inverter.h"
32 #include "glass_positionlist.h"
33 #include "api/leafpostlist.h"
34 #include "omassert.h"
35 
36 #include "autoptr.h"
37 #include <map>
38 #include <string>
39 
40 using namespace std;
41 
42 class GlassCursor;
43 class GlassDatabase;
44 
45 namespace Glass {
46  class PostlistChunkReader;
47  class PostlistChunkWriter;
48  class RootInfo;
49 }
50 
51 using Glass::RootInfo;
52 
55 class GlassPostList : public LeafPostList {
61 
64 
67 
70 
72  bool is_at_end;
73 
75  AutoPtr<GlassCursor> cursor;
76 
79 
82 
84  const char * pos;
85 
87  const char * end;
88 
91 
94 
97 
100 
102  GlassPostList(const GlassPostList &);
103 
105  void operator=(const GlassPostList &);
106 
110  bool next_in_chunk();
111 
117  void next_chunk();
118 
126  bool current_chunk_contains(Xapian::docid desired_did);
127 
139  void move_to_chunk_containing(Xapian::docid desired_did);
140 
150  bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did);
151 
153  const string & term,
154  GlassCursor * cursor_);
155 
156  void init();
157 
158  public:
161  const string & term,
162  bool keep_reference);
163 
165  ~GlassPostList();
166 
167  LeafPostList * open_nearby_postlist(const std::string & term_) const;
168 
173  bool jump_to(Xapian::docid desired_did);
174 
179  Xapian::doccount get_termfreq() const { return number_of_entries; }
180 
182  Xapian::docid get_docid() const { Assert(have_started); return did; }
183 
185  Xapian::termcount get_doclength() const;
186 
188  Xapian::termcount get_unique_terms() const;
189 
193  Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
194 
197  PositionList *read_position_list();
198 
201  PositionList * open_position_list() const;
202 
204  PostList * next(double w_min);
205 
207  PostList * skip_to(Xapian::docid desired_did, double w_min);
208 
210  bool at_end() const { return is_at_end; }
211 
212  Xapian::termcount get_wdf_upper_bound() const;
213 
215  std::string get_description() const;
216 
218  static void read_number_of_entries(const char ** posptr,
219  const char * end,
220  Xapian::doccount * number_of_entries_ptr,
221  Xapian::termcount * collection_freq_ptr);
222 };
223 
226  mutable AutoPtr<GlassPostList> doclen_pl;
227 
228  public:
241  GlassPostListTable(const string & path_, bool readonly_)
242  : GlassTable("postlist", path_ + "/postlist.", readonly_),
243  doclen_pl()
244  { }
245 
246  GlassPostListTable(int fd, off_t offset_, bool readonly_)
247  : GlassTable("postlist", fd, offset_, readonly_),
248  doclen_pl()
249  { }
250 
251  void open(int flags_, const RootInfo & root_info,
253  doclen_pl.reset(0);
254  GlassTable::open(flags_, root_info, rev);
255  }
256 
258  void merge_changes(const string& term,
259  const Inverter::PostingChanges& changes);
260 
262  void merge_doclen_changes(const map<Xapian::docid,
263  Xapian::termcount>& doclens);
264 
265  Xapian::docid get_chunk(const string& tname,
266  Xapian::docid did, bool adding,
269 
271  static string make_key(const string& term, Xapian::docid did) {
272  return pack_glass_postlist_key(term, did);
273  }
274 
276  static string make_key(const string & term) {
277  return pack_glass_postlist_key(term);
278  }
279 
280  bool term_exists(const string & term) const {
281  return key_exists(make_key(term));
282  }
283 
295  void get_freqs(const std::string & term,
296  Xapian::doccount * termfreq_ptr,
297  Xapian::termcount * collfreq_ptr,
298  Xapian::termcount * wdfub_ptr = NULL) const;
299 
301  Xapian::termcount get_doclength(Xapian::docid did,
303 
305  bool document_exists(Xapian::docid did,
307 
308  void get_used_docid_range(Xapian::docid & first,
309  Xapian::docid & last) const;
310 };
311 
312 #endif /* OM_HGUARD_GLASS_POSTLIST_H */
#define Assert(COND)
Definition: omassert.h:122
Xapian::termcount get_wdf() const
Returns the Within Document Frequency of the term in the current document.
Abstract base class for postlists.
Definition: postlist.h:37
static string make_key(const string &term)
Compose a key from a termname.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
A position list in a glass database.
const char * pos
Position of iteration through current chunk.
Class managing a Btree table in a Glass database.
Definition: glass_table.h:425
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:68
AutoPtr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
GlassPostListTable(const string &path_, bool readonly_)
Create a new table object.
Class for storing the changes in frequencies for a term.
STL namespace.
Definitions, types, etc for use inside glass.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:39
A position list in a glass database.
Xapian::doccount number_of_entries
The number of entries in the posting list.
AutoPtr< GlassPostList > doclen_pl
PostList for looking up document lengths.
bool at_end() const
Return true if and only if we&#39;re off the end of the list.
bool is_at_end
Whether we&#39;ve run off the end of the list yet.
Xapian::doccount get_termfreq() const
Returns number of docs indexed by this term.
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
Abstract base class for leaf postlists.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool is_last_chunk
True if this is the last chunk.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
const char * end
Pointer to byte after end of current chunk.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
API for working with Xapian databases.
A postlist in a glass database.
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
bool term_exists(const string &term) const
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
bool have_started
Whether we&#39;ve started reading the list yet.
std::string pack_glass_postlist_key(const std::string &term)
Definition: pack.h:613
Xapian::docid did
Document id we&#39;re currently at.
Xapian::termcount wdf
The wdf of the current document.
Various assertion macros.
Xapian::docid get_docid() const
Returns the current docid.
GlassPostListTable(int fd, off_t offset_, bool readonly_)
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Inverter class which "inverts the file".
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
GlassPositionList positionlist
The position list object for this posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
string make_key(Xapian::docid did)
Definition: chert_record.cc:35
Wrapper around standard unique_ptr template.