xapian-core  1.4.25
glass_termlist.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2006,2007,2008,2009,2010,2014,2019 Olly Betts
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 #include "glass_termlist.h"
26 
27 #include "xapian/error.h"
28 
29 #include "expand/expandweight.h"
30 #include "glass_positionlist.h"
31 #include "debuglog.h"
32 #include "omassert.h"
33 #include "pack.h"
34 #include "str.h"
35 
36 using namespace std;
38 
40  Xapian::docid did_,
41  bool throw_if_not_present)
42  : db(db_), did(did_), current_wdf(0), current_termfreq(0)
43 {
44  LOGCALL_CTOR(DB, "GlassTermList", db_ | did_ | throw_if_not_present);
45 
47  data)) {
48  if (!throw_if_not_present) {
49  pos = NULL;
50  return;
51  }
52  throw Xapian::DocNotFoundError("No termlist for document " + str(did));
53  }
54 
55  pos = data.data();
56  end = pos + data.size();
57 
58  if (pos == end) {
59  doclen = 0;
60  termlist_size = 0;
61  return;
62  }
63 
64  // Read doclen
65  if (!unpack_uint(&pos, end, &doclen)) {
66  const char *msg;
67  if (pos == 0) {
68  msg = "Too little data for doclen in termlist";
69  } else {
70  msg = "Overflowed value for doclen in termlist";
71  }
73  }
74 
75  // Read termlist_size
76  if (!unpack_uint(&pos, end, &termlist_size)) {
77  const char *msg;
78  if (pos == 0) {
79  msg = "Too little data for list size in termlist";
80  } else {
81  msg = "Overflowed value for list size in termlist";
82  }
84  }
85 }
86 
89 {
90  LOGCALL(DB, Xapian::termcount, "GlassTermList::get_doclength", NO_ARGS);
91  RETURN(doclen);
92 }
93 
96 {
97  LOGCALL(DB, Xapian::termcount, "GlassTermList::get_unique_terms", NO_ARGS);
98  // get_unique_terms() really ought to only count terms with wdf > 0, but
99  // that's expensive to calculate on demand, so for now let's just ensure
100  // unique_terms <= doclen.
101  RETURN(min(termlist_size, doclen));
102 }
103 
106 {
107  LOGCALL(DB, Xapian::termcount, "GlassTermList::get_approx_size", NO_ARGS);
109 }
110 
111 void
113 {
114  LOGCALL_VOID(DB, "GlassTermList::accumulate_stats", stats);
115  Assert(!at_end());
116  stats.accumulate(shard_index,
118 }
119 
120 string
122 {
123  LOGCALL(DB, string, "GlassTermList::get_termname", NO_ARGS);
125 }
126 
129 {
130  LOGCALL(DB, Xapian::termcount, "GlassTermList::get_wdf", NO_ARGS);
132 }
133 
136 {
137  LOGCALL(DB, Xapian::doccount, "GlassTermList::get_termfreq", NO_ARGS);
138  if (current_termfreq == 0)
141 }
142 
143 TermList *
145 {
146  LOGCALL(DB, TermList *, "GlassTermList::next", NO_ARGS);
147  Assert(!at_end());
148  if (pos == end) {
149  pos = NULL;
150  RETURN(NULL);
151  }
152 
153  // Reset to 0 to indicate that the termfreq needs to be read.
154  current_termfreq = 0;
155 
156  bool wdf_in_reuse = false;
157  if (!current_term.empty()) {
158  // Find out how much of the previous term to reuse.
159  size_t len = static_cast<unsigned char>(*pos++);
160  if (len > current_term.size()) {
161  // The wdf is also stored in the "reuse" byte.
162  wdf_in_reuse = true;
163  size_t divisor = current_term.size() + 1;
164  current_wdf = len / divisor - 1;
165  len %= divisor;
166  }
167  current_term.resize(len);
168  }
169 
170  // Append the new tail to form the next term.
171  size_t append_len = static_cast<unsigned char>(*pos++);
172  current_term.append(pos, append_len);
173  pos += append_len;
174 
175  // Read the wdf if it wasn't packed into the reuse byte.
176  if (!wdf_in_reuse && !unpack_uint(&pos, end, &current_wdf)) {
177  const char *msg;
178  if (pos == 0) {
179  msg = "Too little data for wdf in termlist";
180  } else {
181  msg = "Overflowed value for wdf in termlist";
182  }
183  throw Xapian::DatabaseCorruptError(msg);
184  }
185 
186  RETURN(NULL);
187 }
188 
189 TermList *
190 GlassTermList::skip_to(const string & term)
191 {
192  LOGCALL(API, TermList *, "GlassTermList::skip_to", term);
193  while (pos != NULL && current_term < term) {
194  (void)GlassTermList::next();
195  }
196  RETURN(NULL);
197 }
198 
199 bool
201 {
202  LOGCALL(DB, bool, "GlassTermList::at_end", NO_ARGS);
203  RETURN(pos == NULL);
204 }
205 
208 {
209  LOGCALL(DB, Xapian::termcount, "GlassTermList::positionlist_count", NO_ARGS);
211 }
212 
215 {
216  LOGCALL(DB, Xapian::PositionIterator, "GlassTermList::positionlist_begin", NO_ARGS);
218 }
Xapian::termcount termlist_size
The number of entries in this termlist.
#define RETURN(A)
Definition: debuglog.h:493
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
#define Assert(COND)
Definition: omassert.h:122
std::string get_termname() const
Return the termname at the current position.
Xapian::docid did
The document id that this TermList is for.
Xapian::termcount get_doclength() const
Return the length of this document.
A position list in a glass database.
Xapian::doccount get_doccount() const
Virtual methods of Database::Internal.
const char * end
Pointer to the end of the encoded tag value.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
Convert types to std::string.
GlassTermList(const GlassTermList &)
Don&#39;t allow copying.
Xapian::termcount current_wdf
The wdf for the term at the current position.
std::string current_term
The termname at the current position.
const char * pos
Current position with the encoded tag value held in data.
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
Definition: expandweight.h:76
Collate statistics and calculate the term weights for the ESet.
Xapian::Internal::intrusive_ptr< const GlassDatabase > db
The database we&#39;re reading data from.
TermList * skip_to(const std::string &term)
Skip forward to the specified term.
Xapian::doccount current_termfreq
The term frequency for the term at the current position.
static std::string make_key(Xapian::docid did)
PositionList * open_position_list(Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
GlassTermListTable termlist_table
Table storing term lists.
Xapian::PositionIterator positionlist_begin() const
Return a PositionIterator for the current position.
Class for iterating over term positions.
bool at_end() const
Return true if the current position is past the last term in this list.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
void get_freqs(const string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Virtual methods of Database::Internal.
Xapian::termcount doclen
The length of document did.
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
std::string data
The tag value from the termlist table which holds the encoded termlist.
size_t shard_index
Which shard of a multidatabase this is from.
Definition: termlist.h:114
Collates statistics while calculating term weight in an ESet.
Definition: expandweight.h:37
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
TermList * next()
Advance the current position to the next term in the termlist.
void accumulate_stats(Xapian::Internal::ExpandStats &stats) const
Collate weighting information for the current term.
Various assertion macros.
A TermList in a glass database.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::termcount get_unique_terms() const
Return the number of unique terms.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
virtual Xapian::termcount positionlist_count(Xapian::docid did, const string &term) const
Virtual methods of Database::Internal.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.