xapian-core  1.4.25
chert_alltermslist.cc
Go to the documentation of this file.
1 /* chert_alltermslist.cc: A termlist containing all terms in a chert database.
2  *
3  * Copyright (C) 2005,2007,2008,2009,2010 Olly Betts
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
18  * USA
19  */
20 
21 #include <config.h>
22 
23 #include "chert_alltermslist.h"
24 #include "chert_postlist.h"
25 
26 #include "debuglog.h"
27 #include "pack.h"
28 #include "stringutils.h"
29 
30 void
32 {
33  LOGCALL_VOID(DB, "ChertAllTermsList::read_termfreq", NO_ARGS);
34  Assert(!current_term.empty());
35  Assert(!at_end());
36 
37  // Unpack the termfreq from the tag.
38  Xapian::termcount collfreq;
39  cursor->read_tag();
40  const char *p = cursor->current_tag.data();
41  const char *pend = p + cursor->current_tag.size();
42  ChertPostList::read_number_of_entries(&p, pend, &termfreq, &collfreq);
43  // Not used.
44  (void)collfreq;
45 }
46 
48 {
49  LOGCALL_DTOR(DB, "ChertAllTermsList");
50  delete cursor;
51 }
52 
53 string
55 {
56  LOGCALL(DB, string, "ChertAllTermsList::get_termname", NO_ARGS);
57  Assert(!current_term.empty());
58  Assert(!at_end());
60 }
61 
64 {
65  LOGCALL(DB, Xapian::doccount, "ChertAllTermsList::get_termfreq", NO_ARGS);
66  Assert(!current_term.empty());
67  Assert(!at_end());
68  if (termfreq == 0) read_termfreq();
70 }
71 
72 TermList *
74 {
75  LOGCALL(DB, TermList *, "ChertAllTermsList::next", NO_ARGS);
76  Assert(!at_end());
77  // Set termfreq to 0 to indicate no termfreq/collfreq have been read for
78  // the current term.
79  termfreq = 0;
80 
81  if (rare(!cursor)) {
83  Assert(cursor); // The postlist table isn't optional.
84 
85  if (prefix.empty()) {
86  (void)cursor->find_entry_ge(string("\x00\xff", 2));
87  } else {
88  const string & key = pack_chert_postlist_key(prefix);
89  if (cursor->find_entry_ge(key)) {
90  // The exact term we asked for is there, so just copy it rather
91  // than wasting effort unpacking it from the key.
93  RETURN(NULL);
94  }
95  }
96  goto first_time;
97  }
98 
99  while (true) {
100  cursor->next();
101 first_time:
102  if (cursor->after_end()) {
103  current_term.resize(0);
104  RETURN(NULL);
105  }
106 
107  // Fast check for terms without any zero bytes. ~8.4% faster for
108  // glass.
109  auto nul = cursor->current_key.find('\0');
110  if (nul == string::npos) {
112  break;
113  }
114  if (cursor->current_key[nul + 1] != '\xff') {
115  continue;
116  }
117 
118  const char* p = cursor->current_key.data();
119  const char* pend = p + cursor->current_key.size();
121  throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
122  }
123 
124  // If this key is for the first chunk of a postlist, we're done.
125  // Otherwise we need to skip past continuation chunks until we find the
126  // first chunk of the next postlist.
127  if (p == pend) break;
128  }
129 
130  if (!startswith(current_term, prefix)) {
131  // We've reached the end of the prefixed terms.
132  cursor->to_end();
133  current_term.resize(0);
134  }
135 
136  RETURN(NULL);
137 }
138 
139 TermList *
140 ChertAllTermsList::skip_to(const string &term)
141 {
142  LOGCALL(DB, TermList *, "ChertAllTermsList::skip_to", term);
143  Assert(!at_end());
144  // Set termfreq to 0 to indicate no termfreq has been read for the current
145  // term.
146  termfreq = 0;
147 
148  if (rare(!cursor)) {
150  Assert(cursor); // The postlist table isn't optional.
151  }
152 
153  string key = pack_chert_postlist_key(term);
154  if (cursor->find_entry_ge(key)) {
155  // The exact term we asked for is there, so just copy it rather than
156  // wasting effort unpacking it from the key.
157  current_term = term;
158  } else {
159  if (cursor->after_end()) {
160  current_term.resize(0);
161  RETURN(NULL);
162  }
163 
164  const char *p = cursor->current_key.data();
165  const char *pend = p + cursor->current_key.size();
167  throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
168  }
169  }
170 
171  if (!startswith(current_term, prefix)) {
172  // We've reached the end of the prefixed terms.
173  cursor->to_end();
174  current_term.resize(0);
175  }
176 
177  RETURN(NULL);
178 }
179 
180 bool
182 {
183  LOGCALL(DB, bool, "ChertAllTermsList::at_end", NO_ARGS);
184  RETURN(cursor && cursor->after_end());
185 }
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
bool at_end() const
True if we&#39;re off the end of the list.
void read_termfreq() const
Read and cache the term frequency.
Xapian::doccount termfreq
The term frequency of the term at the current position.
std::string current_term
The termname at the current position.
bool next()
Advance to the next key.
TermList * next()
Advance to the next term in the list.
std::string prefix
The prefix to restrict the terms to.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
void to_end()
Set the cursor to be off the end of the table.
Definition: chert_cursor.h:231
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
Abstract base class for termlists.
Definition: termlist.h:39
~ChertAllTermsList()
Destructor.
ChertPostListTable postlist_table
Table storing posting lists.
#define rare(COND)
Definition: config.h:565
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool find_entry_ge(const string &key)
Position the cursor on the lowest entry with key >= key.
string current_tag
Current tag pointed to by cursor.
Definition: chert_cursor.h:154
Xapian::Internal::intrusive_ptr< const ChertDatabase > database
Keep a reference to our database to stop it being deleted.
TermList * skip_to(const std::string &tname)
Advance to the first term which is >= tname.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:562
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
ChertCursor * cursor_get() const
Get a cursor for reading from the table.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
std::string pack_chert_postlist_key(const std::string &term)
Definition: pack.h:585
ChertCursor * cursor
A cursor which runs through the postlist table reading termnames from the keys.
bool after_end() const
Determine whether cursor is off the end of table.
Definition: chert_cursor.h:238
string current_key
Current key pointed to by cursor.
Definition: chert_cursor.h:149
Xapian::doccount get_termfreq() const
Returns the term frequency of the current term.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Postlists in chert databases.
bool read_tag(bool keep_compressed=false)
Read the tag from the table and store it in current_tag.
Pack types into strings and unpack them again.
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency.
Various handy helpers which std::string really should provide.
std::string get_termname() const
Returns the current termname.
A termlist containing all terms in a chert database.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487