xapian-core  2.0.0
honey_positionlist.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004,2005,2006,2008,2009,2010,2013,2017,2019 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "honey_positionlist.h"
24 
25 #include <xapian/types.h>
26 
27 #include "bitstream.h"
28 #include "debuglog.h"
29 #include "honey_cursor.h"
30 #include "pack.h"
31 
32 #include <string>
33 
34 using namespace std;
35 
36 void
38  const Xapian::VecCOW<Xapian::termpos>& vec) const
39 {
40  LOGCALL_VOID(DB, "HoneyPositionTable::pack", s | vec);
41  Assert(!vec.empty());
42 
43  pack_uint(s, vec.back());
44 
45  if (vec.size() > 1) {
46  BitWriter wr(s);
47  wr.encode(vec[0], vec.back());
48  wr.encode(vec.size() - 2, vec.back() - vec[0]);
49  wr.encode_interpolative(vec, 0, vec.size() - 1);
50  swap(s, wr.freeze());
51  }
52 }
53 
56  string_view term) const
57 {
58  LOGCALL(DB, Xapian::termcount, "HoneyPositionTable::positionlist_count", did | term);
59 
60  string data;
61  if (!get_exact_entry(make_key(did, term), data)) {
62  RETURN(0);
63  }
64 
65  const char* pos = data.data();
66  const char* end = pos + data.size();
67  Xapian::termpos pos_last;
68  if (!unpack_uint(&pos, end, &pos_last)) {
69  throw Xapian::DatabaseCorruptError("Position list data corrupt");
70  }
71  if (pos == end) {
72  // Special case for single entry position list.
73  RETURN(1);
74  }
75 
76  // Skip the header we just read.
77  BitReader rd(pos, end);
78  Xapian::termpos pos_first = rd.decode(pos_last);
79  Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
80  RETURN(pos_size);
81 }
82 
84 
85 void
87 {
88  LOGCALL_VOID(DB, "HoneyBasePositionList::set_data", data);
89 
90  have_started = false;
91 
92  if (data.empty()) {
93  // There's no positional information for this term.
94  size = 0;
95  last = 0;
96  current_pos = 1;
97  return;
98  }
99 
100  const char* pos = data.data();
101  const char* end = pos + data.size();
102  Xapian::termpos pos_last;
103  if (!unpack_uint(&pos, end, &pos_last)) {
104  throw Xapian::DatabaseCorruptError("Position list data corrupt");
105  }
106 
107  if (pos == end) {
108  // Special case for single entry position list.
109  size = 1;
110  current_pos = last = pos_last;
111  return;
112  }
113 
114  rd.init(pos, end);
115  Xapian::termpos pos_first = rd.decode(pos_last);
116  Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
117  rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last);
118  size = pos_size;
119  last = pos_last;
120  current_pos = pos_first;
121 }
122 
125 {
126  LOGCALL(DB, Xapian::termcount, "HoneyBasePositionList::get_approx_size", NO_ARGS);
127  RETURN(size);
128 }
129 
132 {
133  LOGCALL(DB, Xapian::termpos, "HoneyBasePositionList::back", NO_ARGS);
134  RETURN(last);
135 }
136 
139 {
140  LOGCALL(DB, Xapian::termpos, "HoneyBasePositionList::get_position", NO_ARGS);
141  Assert(have_started);
142  RETURN(current_pos);
143 }
144 
145 bool
147 {
148  LOGCALL(DB, bool, "HoneyBasePositionList::next", NO_ARGS);
149  if (rare(!have_started)) {
150  have_started = true;
151  return current_pos <= last;
152  }
153  if (current_pos == last) {
154  return false;
155  }
156  current_pos = rd.decode_interpolative_next();
157  return true;
158 }
159 
160 bool
162 {
163  LOGCALL(DB, bool, "HoneyBasePositionList::skip_to", termpos);
164  have_started = true;
165  if (termpos >= last) {
166  if (termpos == last) {
167  current_pos = last;
168  return true;
169  }
170  return false;
171  }
172  while (current_pos < termpos) {
173  if (current_pos == last) {
174  return false;
175  }
176  current_pos = rd.decode_interpolative_next();
177  }
178  return true;
179 }
180 
182 {
183  LOGCALL_CTOR(DB, "HoneyPositionList", data);
184 
185  pos_data = std::move(data);
186 
187  set_data(pos_data);
188 }
189 
190 void
192 {
193  LOGCALL_VOID(DB, "HoneyRePositionList::assign_data", data);
194 
195  // We need to ensure the data stays valid while in use, so abuse the cursor
196  // current_tag member as somewhere to store it.
197  cursor.to_end();
198  cursor.current_tag = std::move(data);
199 
200  set_data(cursor.current_tag);
201 }
202 
203 void
205  const string& term)
206 {
207  LOGCALL_VOID(DB, "HoneyRePositionList::read_data", did | term);
208 
209  if (!cursor.find_exact(HoneyPositionTable::make_key(did, term))) {
210  cursor.current_tag.clear();
211  } else {
212  cursor.read_tag();
213  }
214 
215  set_data(cursor.current_tag);
216 }
Classes to encode/decode a bitstream.
void set_data(const std::string &data)
Set positional data and start to decode it.
bool skip_to(Xapian::termpos termpos)
Advance to the first term position which is at least termpos.
Xapian::termcount get_approx_size() const
Returns size of position list.
Xapian::termpos back() const
Return the final entry in this positionlist.
Xapian::termpos get_position() const
Returns current position.
bool next()
Advance to the next term position in the list.
HoneyPositionList(const HoneyPositionList &)=delete
Copying is not allowed.
static std::string make_key(Xapian::docid did, std::string_view term)
Xapian::termcount positionlist_count(Xapian::docid did, std::string_view term) const
Return the number of entries in specified position list.
void pack(std::string &s, const Xapian::VecCOW< Xapian::termpos > &vec) const
Pack a position list into a string.
void read_data(Xapian::docid did, const std::string &term)
Fill list with data, and move the position to the start.
void assign_data(std::string &&data)
Fill list with data, and move the position to the start.
Read a stream created by BitWriter.
Definition: bitstream.h:66
Xapian::termpos decode(Xapian::termpos outof, bool force=false)
Definition: bitstream.cc:178
Create a stream to which non-byte-aligned values can be written.
Definition: bitstream.h:34
void encode(Xapian::termpos value, Xapian::termpos outof)
Encode value, known to be less than outof.
Definition: bitstream.cc:92
std::string & freeze()
Finish encoding and return the encoded data as a std::string.
Definition: bitstream.h:51
void encode_interpolative(const Xapian::VecCOW< Xapian::termpos > &pos, int j, int k)
Perform interpolative encoding of pos elements between j and k.
Definition: bitstream.cc:158
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:397
Suitable for "simple" type T.
Definition: smallvector.h:62
const T & back() const
Definition: smallvector.h:337
size_type size() const
Definition: smallvector.h:135
bool empty() const
Definition: smallvector.h:143
#define rare(COND)
Definition: config.h:607
string term
Xapian::termpos pos
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:480
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:479
HoneyCursor class.
A position list in a honey database.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
#define Assert(COND)
Definition: omassert.h:122
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:346
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:315
typedefs for Xapian