xapian-core  1.4.26
glass_alltermslist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005,2007,2008,2009,2010 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19  * USA
20  */
21 
22 #include <config.h>
23 
24 #include "glass_alltermslist.h"
25 #include "glass_postlist.h"
26 
27 #include "debuglog.h"
28 #include "pack.h"
29 #include "stringutils.h"
30 
31 void
33 {
34  LOGCALL_VOID(DB, "GlassAllTermsList::read_termfreq", NO_ARGS);
35  Assert(!current_term.empty());
36  Assert(!at_end());
37 
38  // Unpack the termfreq from the tag.
39  cursor->read_tag();
40  const char *p = cursor->current_tag.data();
41  const char *pend = p + cursor->current_tag.size();
43 }
44 
46 {
47  LOGCALL_DTOR(DB, "GlassAllTermsList");
48  delete cursor;
49 }
50 
51 string
53 {
54  LOGCALL(DB, string, "GlassAllTermsList::get_termname", NO_ARGS);
55  Assert(!current_term.empty());
56  Assert(!at_end());
58 }
59 
62 {
63  LOGCALL(DB, Xapian::doccount, "GlassAllTermsList::get_termfreq", NO_ARGS);
64  Assert(!current_term.empty());
65  Assert(!at_end());
66  if (termfreq == 0) read_termfreq();
68 }
69 
70 TermList *
72 {
73  LOGCALL(DB, TermList *, "GlassAllTermsList::next", NO_ARGS);
74  Assert(!at_end());
75  // Set termfreq to 0 to indicate no termfreq has been read for the current
76  // term.
77  termfreq = 0;
78 
79  if (rare(!cursor)) {
81  Assert(cursor); // The postlist table isn't optional.
82 
83  if (prefix.empty()) {
84  (void)cursor->find_entry_ge(string("\x00\xff", 2));
85  } else {
86  const string & key = pack_glass_postlist_key(prefix);
87  if (cursor->find_entry_ge(key)) {
88  // The exact term we asked for is there, so just copy it rather
89  // than wasting effort unpacking it from the key.
91  RETURN(NULL);
92  }
93  }
94  goto first_time;
95  }
96 
97  while (true) {
98  cursor->next();
99 first_time:
100  if (cursor->after_end()) {
101  current_term.resize(0);
102  RETURN(NULL);
103  }
104 
105  // Fast check for terms without any zero bytes. ~8.4% faster.
106  auto nul = cursor->current_key.find('\0');
107  if (nul == string::npos) {
109  break;
110  }
111  if (cursor->current_key[nul + 1] != '\xff') {
112  continue;
113  }
114 
115  const char *p = cursor->current_key.data();
116  const char *pend = p + cursor->current_key.size();
118  throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
119  }
120 
121  // If this key is for the first chunk of a postlist, we're done.
122  // Otherwise we need to skip past continuation chunks until we find the
123  // first chunk of the next postlist.
124  if (p == pend) break;
125  }
126 
127  if (!startswith(current_term, prefix)) {
128  // We've reached the end of the prefixed terms.
129  cursor->to_end();
130  current_term.resize(0);
131  }
132 
133  RETURN(NULL);
134 }
135 
136 TermList *
137 GlassAllTermsList::skip_to(const string &term)
138 {
139  LOGCALL(DB, TermList *, "GlassAllTermsList::skip_to", term);
140  Assert(!at_end());
141  // Set termfreq to 0 to indicate no termfreq has been read for the current
142  // term.
143  termfreq = 0;
144 
145  if (rare(!cursor)) {
147  Assert(cursor); // The postlist table isn't optional.
148  }
149 
150  string key = pack_glass_postlist_key(term);
151  if (cursor->find_entry_ge(key)) {
152  // The exact term we asked for is there, so just copy it rather than
153  // wasting effort unpacking it from the key.
154  current_term = term;
155  } else {
156  if (cursor->after_end()) {
157  current_term.resize(0);
158  RETURN(NULL);
159  }
160 
161  const char *p = cursor->current_key.data();
162  const char *pend = p + cursor->current_key.size();
164  throw Xapian::DatabaseCorruptError("PostList table key has unexpected format");
165  }
166  }
167 
168  if (!startswith(current_term, prefix)) {
169  // We've reached the end of the prefixed terms.
170  cursor->to_end();
171  current_term.resize(0);
172  }
173 
174  RETURN(NULL);
175 }
176 
177 bool
179 {
180  LOGCALL(DB, bool, "GlassAllTermsList::at_end", NO_ARGS);
181  RETURN(cursor && cursor->after_end());
182 }
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
static void read_number_of_entries(const char **posptr, const char *end, Xapian::doccount *number_of_entries_ptr, Xapian::termcount *collection_freq_ptr)
Read the number of entries and the collection frequency.
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
A termlist containing all terms in a glass database.
void read_termfreq() const
Read and cache the term frequency.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
bool next()
Advance to the next key.
Abstract base class for termlists.
Definition: termlist.h:39
Postlists in glass databases.
bool after_end() const
Determine whether cursor is off the end of table.
Definition: glass_cursor.h:329
#define rare(COND)
Definition: config.h:575
Xapian::Internal::intrusive_ptr< const GlassDatabase > database
Keep a reference to our database to stop it being deleted.
bool read_tag(bool keep_compressed=false)
Read the tag from the table and store it in current_tag.
~GlassAllTermsList()
Destructor.
TermList * skip_to(const std::string &tname)
Advance to the first term which is >= tname.
GlassCursor * cursor
A cursor which runs through the postlist table reading termnames from the keys.
string current_key
Current key pointed to by cursor.
Definition: glass_cursor.h:239
GlassPostListTable postlist_table
Table storing posting lists.
Xapian::doccount termfreq
The term frequency of the term at the current position.
bool at_end() const
True if we&#39;re off the end of the list.
string current_tag
Current tag pointed to by cursor.
Definition: glass_cursor.h:244
Xapian::doccount get_termfreq() const
Returns the term frequency of the current term.
std::string current_term
The termname at the current position.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
Definition: pack.h:562
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
bool find_entry_ge(const string &key)
Position the cursor on the lowest entry with key >= key.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
TermList * next()
Advance to the next term in the list.
std::string pack_glass_postlist_key(const std::string &term)
Definition: pack.h:613
GlassCursor * cursor_get() const
Get a cursor for reading from the table.
Pack types into strings and unpack them again.
std::string get_termname() const
Returns the current termname.
Various handy helpers which std::string really should provide.
std::string prefix
The prefix to restrict the terms to.
Debug logging macros.
void to_end()
Set the cursor to be off the end of the table.
Definition: glass_cursor.h:322
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487