xapian-core  1.4.26
chert_postlist.h
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2014 Olly Betts
7  * Copyright 2007,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #ifndef OM_HGUARD_CHERT_POSTLIST_H
26 #define OM_HGUARD_CHERT_POSTLIST_H
27 
28 #include <xapian/database.h>
29 
30 #include "chert_types.h"
31 #include "chert_positionlist.h"
32 #include "api/leafpostlist.h"
33 #include "omassert.h"
34 
35 #include "autoptr.h"
36 #include <map>
37 #include <string>
38 
39 using namespace std;
40 
41 class ChertCursor;
42 class ChertDatabase;
43 
44 namespace Chert {
45  class PostlistChunkReader;
46  class PostlistChunkWriter;
47 }
48 
49 class ChertPostList;
50 
53  mutable AutoPtr<ChertPostList> doclen_pl;
54 
55  public:
68  ChertPostListTable(const string & path_, bool readonly_)
69  : ChertTable("postlist", path_ + "/postlist.", readonly_),
70  doclen_pl()
71  { }
72 
74  doclen_pl.reset(0);
75  return ChertTable::open(revno);
76  }
77 
79  void merge_changes(
80  const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
81  const map<Xapian::docid, Xapian::termcount> & doclens,
82  const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas);
83 
84  Xapian::docid get_chunk(const string &tname,
85  Xapian::docid did, bool adding,
88 
90  static string make_key(const string & term, Xapian::docid did) {
91  return pack_chert_postlist_key(term, did);
92  }
93 
95  static string make_key(const string & term) {
96  return pack_chert_postlist_key(term);
97  }
98 
99  bool term_exists(const string & term) const {
100  return key_exists(make_key(term));
101  }
102 
111  void get_freqs(const std::string & term,
112  Xapian::doccount * termfreq_ptr,
113  Xapian::termcount * collfreq_ptr) const;
114 
116  Xapian::termcount get_doclength(Xapian::docid did,
118 
119  Xapian::termcount get_unique_terms(Xapian::docid did,
121 
123  bool document_exists(Xapian::docid did,
125 
126  void get_used_docid_range(Xapian::docid & first,
127  Xapian::docid & last) const;
128 };
129 
132 class ChertPostList : public LeafPostList {
133  protected: // ChertModifiedPostList needs to access these.
139 
142 
145 
146  private:
149 
151  bool is_at_end;
152 
154  AutoPtr<ChertCursor> cursor;
155 
158 
161 
163  const char * pos;
164 
166  const char * end;
167 
170 
173 
176 
177  protected: // ChertModifiedPostList needs to access this too.
180 
181  private:
183  ChertPostList(const ChertPostList &);
184 
186  void operator=(const ChertPostList &);
187 
191  bool next_in_chunk();
192 
198  void next_chunk();
199 
207  bool current_chunk_contains(Xapian::docid desired_did);
208 
220  void move_to_chunk_containing(Xapian::docid desired_did);
221 
231  bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did);
232 
233  public:
236  const string & term,
237  bool keep_reference);
238 
240  ~ChertPostList();
241 
246  bool jump_to(Xapian::docid desired_did);
247 
252  Xapian::doccount get_termfreq() const { return number_of_entries; }
253 
255  Xapian::docid get_docid() const { Assert(have_started); return did; }
256 
258  Xapian::termcount get_doclength() const;
259 
260  Xapian::termcount get_unique_terms() const;
261 
265  Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
266 
269  PositionList *read_position_list();
270 
273  PositionList * open_position_list() const;
274 
276  PostList * next(double w_min);
277 
279  PostList * skip_to(Xapian::docid desired_did, double w_min);
280 
282  bool at_end() const { return is_at_end; }
283 
284  Xapian::termcount get_wdf_upper_bound() const;
285 
287  std::string get_description() const;
288 
290  static void read_number_of_entries(const char ** posptr,
291  const char * end,
292  Xapian::doccount * number_of_entries_ptr,
293  Xapian::termcount * collection_freq_ptr);
294 };
295 
296 #endif /* OM_HGUARD_CHERT_POSTLIST_H */
bool term_exists(const string &term) const
#define Assert(COND)
Definition: omassert.h:122
PostlistChunkReader is essentially an iterator wrapper around a postlist chunk.
bool is_last_chunk
True if this is the last chunk.
Abstract base class for postlists.
Definition: postlist.h:37
A position list in a chert database.
Xapian::doccount number_of_entries
The number of entries in the posting list.
Xapian::termcount wdf_upper_bound
Upper bound on wdf for this postlist.
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
Definition: chert_cursor.h:66
Xapian::docid first_did_in_chunk
The first document id in this chunk.
const char * end
Pointer to byte after end of current chunk.
bool open(chert_revision_number_t revno)
Class managing a Btree table in a Chert database.
Definition: chert_table.h:347
STL namespace.
void open()
Open the btree at the latest revision.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:39
Xapian::docid get_docid() const
Returns the current docid.
Xapian::termcount wdf
The wdf of the current document.
Abstract base class for leaf postlists.
Types used by chert backend and the Btree manager.
unsigned int chert_revision_number_t
A type used to store a revision number for a table.
Definition: chert_types.h:40
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool have_started
Whether we&#39;ve started reading the list yet.
AutoPtr< ChertCursor > cursor
Cursor pointing to current chunk of postlist.
API for working with Xapian databases.
bool is_at_end
Whether we&#39;ve run off the end of the list yet.
static string make_key(const string &term, Xapian::docid did)
Compose a key from a termname and docid.
ChertPostListTable(const string &path_, bool readonly_)
Create a new table object.
Xapian::docid last_did_in_chunk
The last document id in this chunk.
bool at_end() const
Return true if and only if we&#39;re off the end of the list.
AutoPtr< ChertPostList > doclen_pl
PostList for looking up document lengths.
A position list in a chert database.
std::string pack_chert_postlist_key(const std::string &term)
Definition: pack.h:585
Xapian::Internal::intrusive_ptr< const ChertDatabase > this_db
The database we are searching.
Xapian::doccount get_termfreq() const
Returns number of docs indexed by this term.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
static string make_key(const string &term)
Compose a key from a termname.
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
Xapian::docid did
Document id we&#39;re currently at.
ChertPositionList positionlist
The position list object for this posting list.
Various assertion macros.
A postlist in a chert database.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
PostlistChunkWriter is a wrapper which acts roughly as an output iterator on a postlist chunk...
Xapian::termcount get_wdf() const
Returns the Within Document Frequency of the term in the current document.
string make_key(Xapian::docid did)
Definition: chert_record.cc:35
Wrapper around standard unique_ptr template.
const char * pos
Position of iteration through current chunk.