xapian-core  2.0.0
glass_positionlist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2008,2009,2010,2013,2017,2019 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "glass_positionlist.h"
24 
25 #include <xapian/types.h>
26 
27 #include "bitstream.h"
28 #include "debuglog.h"
29 #include "pack.h"
30 
31 #include <string>
32 
33 using namespace std;
34 
35 void
37  const Xapian::VecCOW<Xapian::termpos> & vec) const
38 {
39  LOGCALL_VOID(DB, "GlassPositionListTable::pack", s | vec);
40  Assert(!vec.empty());
41 
42  pack_uint(s, vec.back());
43 
44  if (vec.size() > 1) {
45  BitWriter wr(s);
46  wr.encode(vec[0], vec.back());
47  wr.encode(vec.size() - 2, vec.back() - vec[0]);
48  wr.encode_interpolative(vec, 0, vec.size() - 1);
49  swap(s, wr.freeze());
50  }
51 }
52 
55 {
56  LOGCALL(DB, Xapian::termcount, "GlassPositionListTable::positionlist_count", data);
57 
58  Assert(!data.empty());
59 
60  const char * pos = data.data();
61  const char * end = pos + data.size();
62  Xapian::termpos pos_last;
63  if (!unpack_uint(&pos, end, &pos_last)) {
64  throw Xapian::DatabaseCorruptError("Position list data corrupt");
65  }
66  if (pos == end) {
67  // Special case for single entry position list.
68  RETURN(1);
69  }
70 
71  // Skip the header we just read.
72  BitReader rd(pos, end);
73  Xapian::termpos pos_first = rd.decode(pos_last);
74  Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
75  RETURN(pos_size);
76 }
77 
80  string_view term) const
81 {
82  LOGCALL(DB, Xapian::termcount, "GlassPositionListTable::positionlist_count", did | term);
83 
84  string data;
85  if (!get_exact_entry(make_key(did, term), data)) {
86  RETURN(0);
87  }
88 
89  RETURN(positionlist_count(data));
90 }
91 
93 
94 void
96 {
97  LOGCALL_VOID(DB, "GlassBasePositionList::set_data", data);
98 
99  have_started = false;
100 
101  if (data.empty()) {
102  // There's no positional information for this term.
103  size = 0;
104  last = 0;
105  current_pos = 1;
106  return;
107  }
108 
109  const char* pos = data.data();
110  const char* end = pos + data.size();
111  Xapian::termpos pos_last;
112  if (!unpack_uint(&pos, end, &pos_last)) {
113  throw Xapian::DatabaseCorruptError("Position list data corrupt");
114  }
115 
116  if (pos == end) {
117  // Special case for single entry position list.
118  size = 1;
119  current_pos = last = pos_last;
120  return;
121  }
122 
123  rd.init(pos, end);
124  Xapian::termpos pos_first = rd.decode(pos_last);
125  Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
126  rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last);
127  size = pos_size;
128  last = pos_last;
129  current_pos = pos_first;
130 }
131 
134 {
135  LOGCALL(DB, Xapian::termcount, "GlassBasePositionList::get_approx_size", NO_ARGS);
136  RETURN(size);
137 }
138 
141 {
142  LOGCALL(DB, Xapian::termpos, "GlassBasePositionList::back", NO_ARGS);
143  RETURN(last);
144 }
145 
148 {
149  LOGCALL(DB, Xapian::termpos, "GlassBasePositionList::get_position", NO_ARGS);
150  Assert(have_started);
151  RETURN(current_pos);
152 }
153 
154 bool
156 {
157  LOGCALL(DB, bool, "GlassBasePositionList::next", NO_ARGS);
158  if (rare(!have_started)) {
159  have_started = true;
160  return current_pos <= last;
161  }
162  if (current_pos == last) {
163  return false;
164  }
165  current_pos = rd.decode_interpolative_next();
166  return true;
167 }
168 
169 bool
171 {
172  LOGCALL(DB, bool, "GlassBasePositionList::skip_to", termpos);
173  have_started = true;
174  if (termpos >= last) {
175  if (termpos == last) {
176  current_pos = last;
177  return true;
178  }
179  return false;
180  }
181  while (current_pos < termpos) {
182  if (current_pos == last) {
183  return false;
184  }
185  current_pos = rd.decode_interpolative_next();
186  }
187  return true;
188 }
189 
191 {
192  LOGCALL_CTOR(DB, "GlassPositionList", data);
193 
194  pos_data = std::move(data);
195 
196  set_data(pos_data);
197 }
198 
199 void
201 {
202  LOGCALL_VOID(DB, "GlassRePositionList::assign_data", data);
203 
204  // We need to ensure the data stays valid while in use, so abuse the cursor
205  // current_tag member as somewhere to store it.
206  cursor.to_end();
207  cursor.current_tag = std::move(data);
208 
209  set_data(cursor.current_tag);
210 }
211 
212 void
214  string_view term)
215 {
216  LOGCALL_VOID(DB, "GlassRePositionList::read_data", did | term);
217 
218  if (!cursor.find_exact(GlassPositionListTable::make_key(did, term))) {
219  cursor.current_tag.clear();
220  }
221 
222  set_data(cursor.current_tag);
223 }
Classes to encode/decode a bitstream.
bool next()
Advance to the next term position in the list.
bool skip_to(Xapian::termpos termpos)
Advance to the first term position which is at least termpos.
Xapian::termpos get_position() const
Returns current position.
Xapian::termcount get_approx_size() const
Returns size of position list.
void set_data(std::string_view data)
Set positional data and start to decode it.
Xapian::termpos back() const
Return the final entry in this positionlist.
Xapian::termcount positionlist_count(std::string_view data) const
Return the number of entries in specified position list data.
void pack(std::string &s, const Xapian::VecCOW< Xapian::termpos > &vec) const
Pack a position list into a string.
static std::string make_key(Xapian::docid did, std::string_view term)
GlassPositionList(const GlassPositionList &)=delete
Copying is not allowed.
void assign_data(std::string &&data)
Fill list with data, and move the position to the start.
void read_data(Xapian::docid did, std::string_view term)
Fill list with data, and move the position to the start.
Read a stream created by BitWriter.
Definition: bitstream.h:66
Xapian::termpos decode(Xapian::termpos outof, bool force=false)
Definition: bitstream.cc:178
Create a stream to which non-byte-aligned values can be written.
Definition: bitstream.h:34
void encode(Xapian::termpos value, Xapian::termpos outof)
Encode value, known to be less than outof.
Definition: bitstream.cc:92
std::string & freeze()
Finish encoding and return the encoded data as a std::string.
Definition: bitstream.h:51
void encode_interpolative(const Xapian::VecCOW< Xapian::termpos > &pos, int j, int k)
Perform interpolative encoding of pos elements between j and k.
Definition: bitstream.cc:158
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
Suitable for "simple" type T.
Definition: smallvector.h:62
const T & back() const
Definition: smallvector.h:337
size_type size() const
Definition: smallvector.h:135
bool empty() const
Definition: smallvector.h:143
#define rare(COND)
Definition: config.h:607
string term
Xapian::termpos pos
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:480
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:479
A position list in a glass database.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
#define Assert(COND)
Definition: omassert.h:122
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315
typedefs for Xapian