xapian-core  1.4.20
chert_positionlist.cc
Go to the documentation of this file.
1 /* chert_positionlist.cc: A position list in a chert database.
2  *
3  * Copyright (C) 2004,2005,2006,2008,2010,2013 Olly Betts
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
18  * USA
19  */
20 
21 #include <config.h>
22 
23 #include "chert_positionlist.h"
24 
25 #include <xapian/types.h>
26 
27 #include "bitstream.h"
28 #include "debuglog.h"
29 #include "pack.h"
30 
31 #include <string>
32 #include <vector>
33 
34 using namespace std;
35 
36 void
38  const string & tname,
40  const Xapian::PositionIterator &pos_end,
41  bool check_for_update)
42 {
43  LOGCALL_VOID(DB, "ChertPositionListTable::set_positionlist", did | tname | pos | pos_end | check_for_update);
44  Assert(pos != pos_end);
45 
46  // FIXME: avoid the need for this copy!
47  vector<Xapian::termpos> poscopy(pos, pos_end);
48 
49  string key = make_key(did, tname);
50 
51  string s;
52  pack_uint(s, poscopy.back());
53 
54  if (poscopy.size() > 1) {
55  BitWriter wr(s);
56  wr.encode(poscopy[0], poscopy.back());
57  wr.encode(poscopy.size() - 2, poscopy.back() - poscopy[0]);
58  wr.encode_interpolative(poscopy, 0, poscopy.size() - 1);
59  swap(s, wr.freeze());
60  }
61 
62  if (check_for_update) {
63  string old_tag;
64  if (get_exact_entry(key, old_tag) && s == old_tag)
65  return;
66  }
67  add(key, s);
68 }
69 
72  const string & term) const
73 {
74  LOGCALL(DB, Xapian::termcount, "ChertPositionListTable::positionlist_count", did | term);
75 
76  string data;
77  if (!get_exact_entry(make_key(did, term), data)) {
78  RETURN(0);
79  }
80 
81  const char * pos = data.data();
82  const char * end = pos + data.size();
83  Xapian::termpos pos_last;
84  if (!unpack_uint(&pos, end, &pos_last)) {
85  throw Xapian::DatabaseCorruptError("Position list data corrupt");
86  }
87  if (pos == end) {
88  // Special case for single entry position list.
89  RETURN(1);
90  }
91 
92  // Skip the header we just read.
93  BitReader rd(data, pos - data.data());
94  Xapian::termpos pos_first = rd.decode(pos_last);
95  Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
96  RETURN(pos_size);
97 }
98 
100 
101 bool
103  const string & tname)
104 {
105  LOGCALL(DB, bool, "ChertPositionList::read_data", table | did | tname);
106 
107  have_started = false;
108 
109  string data;
110  if (!table->get_exact_entry(ChertPositionListTable::make_key(did, tname), data)) {
111  // There's no positional information for this term.
112  size = 0;
113  last = 0;
114  current_pos = 1;
115  RETURN(false);
116  }
117 
118  const char * pos = data.data();
119  const char * end = pos + data.size();
120  Xapian::termpos pos_last;
121  if (!unpack_uint(&pos, end, &pos_last)) {
122  throw Xapian::DatabaseCorruptError("Position list data corrupt");
123  }
124  if (pos == end) {
125  // Special case for single entry position list.
126  size = 1;
127  current_pos = last = pos_last;
128  RETURN(true);
129  }
130  // Skip the header we just read.
131  rd.init(data, pos - data.data());
132  Xapian::termpos pos_first = rd.decode(pos_last);
133  Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
134  rd.decode_interpolative(0, pos_size - 1, pos_first, pos_last);
135  size = pos_size;
136  last = pos_last;
137  current_pos = pos_first;
138  RETURN(true);
139 }
140 
143 {
144  LOGCALL(DB, Xapian::termcount, "ChertPositionList::get_approx_size", NO_ARGS);
145  RETURN(size);
146 }
147 
150 {
151  LOGCALL(DB, Xapian::termpos, "ChertPositionList::get_position", NO_ARGS);
152  Assert(have_started);
153  RETURN(current_pos);
154 }
155 
156 bool
158 {
159  LOGCALL(DB, bool, "ChertPositionList::next", NO_ARGS);
160  if (rare(!have_started)) {
161  have_started = true;
162  return current_pos <= last;
163  }
164  if (current_pos == last) {
165  return false;
166  }
167  current_pos = rd.decode_interpolative_next();
168  return true;
169 }
170 
171 bool
173 {
174  LOGCALL(DB, bool, "ChertPositionList::skip_to", termpos);
175  have_started = true;
176  if (termpos >= last) {
177  if (termpos == last) {
178  current_pos = last;
179  return true;
180  }
181  return false;
182  }
183  while (current_pos < termpos) {
184  if (current_pos == last) {
185  return false;
186  }
187  current_pos = rd.decode_interpolative_next();
188  }
189  return true;
190 }
static string make_key(Xapian::docid did, const string &term)
#define RETURN(A)
Definition: debuglog.h:482
#define Assert(COND)
Definition: omassert.h:122
Xapian::termcount get_approx_size() const
Returns size of position list.
typedefs for Xapian
Class managing a Btree table in a Chert database.
Definition: chert_table.h:347
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
bool skip_to(Xapian::termpos termpos)
Advance to the first term position which is at least termpos.
STL namespace.
Xapian::termpos get_position() const
Returns current position.
#define rare(COND)
Definition: config.h:562
void encode_interpolative(const std::vector< Xapian::termpos > &pos, int j, int k)
Perform interpolative encoding of pos elements between j and k.
Definition: bitstream.cc:155
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::termcount positionlist_count(Xapian::docid did, const string &term) const
Return the number of entries in specified position list.
bool read_data(const ChertTable *table, Xapian::docid did, const string &tname)
Fill list with data, and move the position to the start.
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
Read a stream created by BitWriter.
Definition: bitstream.h:64
Class for iterating over term positions.
Classes to encode/decode a bitstream.
A position list in a chert database.
Xapian::termpos decode(Xapian::termpos outof, bool force=false)
Definition: bitstream.cc:174
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
Definition: pack.h:382
bool next()
Advance to the next term position in the list.
Pack types into strings and unpack them again.
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
Create a stream to which non-byte-aligned values can be written.
Definition: bitstream.h:33
void set_positionlist(Xapian::docid did, const string &tname, Xapian::PositionIterator pos, const Xapian::PositionIterator &pos_end, bool check_for_update)
Set the position list for term tname in document did.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
void encode(Xapian::termpos value, Xapian::termpos outof)
Encode value, known to be less than outof.
Definition: bitstream.cc:93
string make_key(Xapian::docid did)
Definition: chert_record.cc:35
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
std::string & freeze()
Finish encoding and return the encoded data as a std::string.
Definition: bitstream.h:50