xapian-core  1.4.21
glass_postlist.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015 Olly Betts
7  * Copyright 2007,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #ifndef OM_HGUARD_GLASS_POSTLIST_H
26 #define OM_HGUARD_GLASS_POSTLIST_H
27 
28 #include <xapian/database.h>
29 
30 #include "glass_defs.h"
31 #include "glass_inverter.h"
32 #include "glass_positionlist.h"
33 #include "api/leafpostlist.h"
34 #include "omassert.h"
35 
36 #include "autoptr.h"
37 #include <map>
38 #include <string>
39 
40 using namespace std;
41 
42 class GlassCursor;
43 class GlassDatabase;
44 
45 namespace Glass {
46  class PostlistChunkReader;
47  class PostlistChunkWriter;
48  class RootInfo;
49 }
50 
51 using Glass::RootInfo;
52 
53 class GlassPostList;
54 
57  mutable AutoPtr<GlassPostList> doclen_pl;
58 
59  public:
72  GlassPostListTable(const string & path_, bool readonly_)
73  : GlassTable("postlist", path_ + "/postlist.", readonly_),
74  doclen_pl()
75  { }
76 
77  GlassPostListTable(int fd, off_t offset_, bool readonly_)
78  : GlassTable("postlist", fd, offset_, readonly_),
79  doclen_pl()
80  { }
81 
82  void open(int flags_, const RootInfo & root_info,
84  doclen_pl.reset(0);
85  GlassTable::open(flags_, root_info, rev);
86  }
87 
89  void merge_changes(const string& term,
90  const Inverter::PostingChanges& changes);
91 
93  void merge_doclen_changes(const map<Xapian::docid,
94  Xapian::termcount>& doclens);
95 
96  Xapian::docid get_chunk(const string& tname,
97  Xapian::docid did, bool adding,
100 
102  static string make_key(const string& term, Xapian::docid did) {
103  return pack_glass_postlist_key(term, did);
104  }
105 
107  static string make_key(const string & term) {
108  return pack_glass_postlist_key(term);
109  }
110 
111  bool term_exists(const string & term) const {
112  return key_exists(make_key(term));
113  }
114 
126  void get_freqs(const std::string & term,
127  Xapian::doccount * termfreq_ptr,
128  Xapian::termcount * collfreq_ptr,
129  Xapian::termcount * wdfub_ptr = NULL) const;
130 
132  Xapian::termcount get_doclength(Xapian::docid did,
134 
136  bool document_exists(Xapian::docid did,
138 
139  void get_used_docid_range(Xapian::docid & first,
140  Xapian::docid & last) const;
141 };
142 
145 class GlassPostList : public LeafPostList {
151 
154 
157 
160 
162  bool is_at_end;
163 
165  AutoPtr<GlassCursor> cursor;
166 
169 
172 
174  const char * pos;
175 
177  const char * end;
178 
181 
184 
187 
190 
192  GlassPostList(const GlassPostList &);
193 
195  void operator=(const GlassPostList &);
196 
200  bool next_in_chunk();
201 
207  void next_chunk();
208 
216  bool current_chunk_contains(Xapian::docid desired_did);
217 
229  void move_to_chunk_containing(Xapian::docid desired_did);
230 
240  bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did);
241 
243  const string & term,
244  GlassCursor * cursor_);
245 
246  void init();
247 
248  public:
251  const string & term,
252  bool keep_reference);
253 
255  ~GlassPostList();
256 
257  LeafPostList * open_nearby_postlist(const std::string & term_) const;
258 
263  bool jump_to(Xapian::docid desired_did);
264 
269  Xapian::doccount get_termfreq() const { return number_of_entries; }
270 
272  Xapian::docid get_docid() const { Assert(have_started); return did; }
273 
275  Xapian::termcount get_doclength() const;
276 
278  Xapian::termcount get_unique_terms() const;
279 
283  Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
284 
287  PositionList *read_position_list();
288 
291  PositionList * open_position_list() const;
292 
294  PostList * next(double w_min);
295 
297  PostList * skip_to(Xapian::docid desired_did, double w_min);
298 
300  bool at_end() const { return is_at_end; }
301 
302  Xapian::termcount get_wdf_upper_bound() const;
303 
305  std::string get_description() const;
306 
308  static void read_number_of_entries(const char ** posptr,
309  const char * end,
310  Xapian::doccount * number_of_entries_ptr,
311  Xapian::termcount * collection_freq_ptr);
312 };
313 
314 #endif /* OM_HGUARD_GLASS_POSTLIST_H */
#define Assert(COND)
Definition: omassert.h:122
Xapian::termcount get_wdf() const
Returns the Within Document Frequency of the term in the current document.
Abstract base class for postlists.
Definition: postlist.h:37
static string make_key(const string &term)
Compose a key from a termname.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
A position list in a glass database.
const char * pos
Position of iteration through current chunk.
Class managing a Btree table in a Glass database.
Definition: glass_table.h:430
uint4 glass_revision_number_t
The revision number of a glass database.
Definition: glass_defs.h:61
AutoPtr< GlassCursor > cursor
Cursor pointing to current chunk of postlist.
GlassPostListTable(const string &path_, bool readonly_)
Create a new table object.
Class for storing the changes in frequencies for a term.
STL namespace.
Definitions, types, etc for use inside glass.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:38
A position list in a glass database.
Xapian::doccount number_of_entries
The number of entries in the posting list.
AutoPtr< GlassPostList > doclen_pl
PostList for looking up document lengths.
bool at_end() const
Return true if and only if we&#39;re off the end of the list.
bool is_at_end
Whether we&#39;ve run off the end of the list yet.
Xapian::doccount get_termfreq() const
Returns number of docs indexed by this term.
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
Abstract base class for leaf postlists.
Xapian::docid first_did_in_chunk
The first document id in this chunk.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool is_last_chunk
True if this is the last chunk.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
const char * end
Pointer to byte after end of current chunk.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
API for working with Xapian databases.
A postlist in a glass database.
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: glass_cursor.h:147
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Xapian::Internal::intrusive_ptr< const GlassDatabase > this_db
The database we are searching.
bool term_exists(const string &term) const
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
bool have_started
Whether we&#39;ve started reading the list yet.
std::string pack_glass_postlist_key(const std::string &term)
Definition: pack.h:613
Xapian::docid did
Document id we&#39;re currently at.
Xapian::termcount wdf
The wdf of the current document.
Various assertion macros.
Xapian::docid get_docid() const
Returns the current docid.
GlassPostListTable(int fd, off_t offset_, bool readonly_)
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Inverter class which "inverts the file".
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
GlassPositionList positionlist
The position list object for this posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
string make_key(Xapian::docid did)
Definition: chert_record.cc:35
Wrapper around standard unique_ptr template.