chert_termlist.cc
Go to the documentation of this file.
1 /* chert_termlist.cc: Termlists in a chert database
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2006,2007,2008,2010,2011,2014 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 #include "chert_termlist.h"
25 
26 #include "xapian/error.h"
27 
28 #include "expand/expandweight.h"
29 #include "chert_positionlist.h"
30 #include "debuglog.h"
31 #include "omassert.h"
32 #include "pack.h"
33 #include "str.h"
34 
35 using namespace std;
37 
39  Xapian::docid did_)
40  : db(db_), did(did_), current_wdf(0), current_termfreq(0)
41 {
42  LOGCALL_CTOR(DB, "ChertTermList", db_ | did_);
43 
45  data))
46  throw Xapian::DocNotFoundError("No termlist for document " + str(did));
47 
48  pos = data.data();
49  end = pos + data.size();
50 
51  if (pos == end) {
52  doclen = 0;
53  termlist_size = 0;
54  return;
55  }
56 
57  // Read doclen
58  if (!unpack_uint(&pos, end, &doclen)) {
59  const char *msg;
60  if (pos == 0) {
61  msg = "Too little data for doclen in termlist";
62  } else {
63  msg = "Overflowed value for doclen in termlist";
64  }
66  }
67 
68  // Read termlist_size
69  if (!unpack_uint(&pos, end, &termlist_size)) {
70  const char *msg;
71  if (pos == 0) {
72  msg = "Too little data for list size in termlist";
73  } else {
74  msg = "Overflowed value for list size in termlist";
75  }
77  }
78 }
79 
82 {
83  LOGCALL(DB, chert_doclen_t, "ChertTermList::get_doclength", NO_ARGS);
84  RETURN(doclen);
85 }
86 
89 {
90  LOGCALL(DB, Xapian::termcount, "ChertTermList::get_approx_size", NO_ARGS);
92 }
93 
94 void
96 {
97  LOGCALL_VOID(DB, "ChertTermList::accumulate_stats", stats);
98  Assert(!at_end());
100 }
101 
102 string
104 {
105  LOGCALL(DB, string, "ChertTermList::get_termname", NO_ARGS);
107 }
108 
111 {
112  LOGCALL(DB, Xapian::termcount, "ChertTermList::get_wdf", NO_ARGS);
114 }
115 
118 {
119  LOGCALL(DB, Xapian::doccount, "ChertTermList::get_termfreq", NO_ARGS);
120  if (current_termfreq == 0)
123 }
124 
125 TermList *
127 {
128  LOGCALL(DB, TermList *, "ChertTermList::next", NO_ARGS);
129  Assert(!at_end());
130  if (pos == end) {
131  pos = NULL;
132  RETURN(NULL);
133  }
134 
135  // Reset to 0 to indicate that the termfreq needs to be read.
136  current_termfreq = 0;
137 
138  bool wdf_in_reuse = false;
139  if (!current_term.empty()) {
140  // Find out how much of the previous term to reuse.
141  size_t len = static_cast<unsigned char>(*pos++);
142  if (len > current_term.size()) {
143  // The wdf is also stored in the "reuse" byte.
144  wdf_in_reuse = true;
145  size_t divisor = current_term.size() + 1;
146  current_wdf = len / divisor - 1;
147  len %= divisor;
148  }
149  current_term.resize(len);
150  }
151 
152  // Append the new tail to form the next term.
153  size_t append_len = static_cast<unsigned char>(*pos++);
154  current_term.append(pos, append_len);
155  pos += append_len;
156 
157  // Read the wdf if it wasn't packed into the reuse byte.
158  if (!wdf_in_reuse && !unpack_uint(&pos, end, &current_wdf)) {
159  const char *msg;
160  if (pos == 0) {
161  msg = "Too little data for wdf in termlist";
162  } else {
163  msg = "Overflowed value for wdf in termlist";
164  }
165  throw Xapian::DatabaseCorruptError(msg);
166  }
167 
168  RETURN(NULL);
169 }
170 
171 TermList *
172 ChertTermList::skip_to(const string & term)
173 {
174  LOGCALL(API, TermList *, "ChertTermList::skip_to", term);
175  while (pos != NULL && current_term < term) {
176  (void)ChertTermList::next();
177  }
178  RETURN(NULL);
179 }
180 
181 bool
183 {
184  LOGCALL(DB, bool, "ChertTermList::at_end", NO_ARGS);
185  RETURN(pos == NULL);
186 }
187 
190 {
191  LOGCALL(DB, Xapian::termcount, "ChertTermList::positionlist_count", NO_ARGS);
193 }
194 
197 {
198  LOGCALL(DB, Xapian::PositionIterator, "ChertTermList::positionlist_begin", NO_ARGS);
201 }

Documentation for Xapian (version 1.4.0).
Generated on Sat Jun 25 2016 by Doxygen 1.8.1.2.