xapian-core  2.0.0
honey_inverter.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2013 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "honey_inverter.h"
24 
25 #include "honey_positionlist.h"
26 #include "honey_postlist.h"
27 #include "honey_postlisttable.h"
28 
29 #include "api/termlist.h"
30 
31 #include <map>
32 #include <string>
33 
34 using namespace std;
35 
36 void
38  Xapian::docid did,
39  const string& term,
40  const Xapian::VecCOW<Xapian::termpos>& posvec,
41  bool modifying)
42 {
43  string s;
44  position_table.pack(s, posvec);
45  if (modifying) {
46  auto i = pos_changes.find(term);
47  if (i != pos_changes.end()) {
48  map<Xapian::docid, string>& m = i->second;
49  auto j = m.find(did);
50  if (j != m.end()) {
51  // Update existing entry.
52  swap(j->second, s);
53  return;
54  }
55  }
56  const string& key = position_table.make_key(did, term);
57  string old_tag;
58  if (position_table.get_exact_entry(key, old_tag) && s == old_tag) {
59  // Identical to existing entry on disk.
60  return;
61  }
62  }
63  set_positionlist(did, term, s);
64 }
65 
66 void
68  Xapian::docid did,
69  const string& term,
70  const Xapian::TermIterator& term_it,
71  bool modifying)
72 {
73  auto ptr = term_it.internal->get_vec_termpos();
74  if (ptr) {
75  if (!ptr->empty()) {
76  store_positions(position_table, did, term, *ptr, modifying);
77  return;
78  }
79  } else {
81  if (pos != term_it.positionlist_end()) {
83  posvec.reserve(term_it.positionlist_count());
84  while (pos != term_it.positionlist_end()) {
85  posvec.push_back(*pos);
86  ++pos;
87  }
88  store_positions(position_table, did, term, posvec, modifying);
89  return;
90  }
91  }
92  // If we get here, the new position list was empty.
93  if (modifying)
94  delete_positionlist(did, term);
95 }
96 
97 void
99  const string& term,
100  const string& s)
101 {
102  pos_changes.insert(make_pair(term, map<Xapian::docid, string>()))
103  .first->second[did] = s;
104 }
105 
106 void
108  const string& term)
109 {
110  set_positionlist(did, term, string());
111 }
112 
113 bool
115  const string& term,
116  string& s) const
117 {
118  auto i = pos_changes.find(term);
119  if (i == pos_changes.end())
120  return false;
121  const map<Xapian::docid, string>& m = i->second;
122  auto j = m.find(did);
123  if (j == m.end())
124  return false;
125  s = j->second;
126  return true;
127 }
128 
129 bool
131 {
132  if (pos_changes.empty())
133  return !position_table.empty();
134 
135  // FIXME: Can we cheaply keep track of some things to make this more
136  // efficient? E.g. how many sets and deletes we had in total perhaps.
137  honey_tablesize_t changes = 0;
138  for (auto i : pos_changes) {
139  const map<Xapian::docid, string>& m = i.second;
140  for (auto j : m) {
141  const string& s = j.second;
142  if (!s.empty())
143  return true;
144  ++changes;
145  }
146  }
147 
148  // We have positions unless all the existing entries are removed.
149  return changes != position_table.get_entry_count();
150 }
151 
152 void
154 {
155  table.merge_doclen_changes(doclen_changes);
156  doclen_changes.clear();
157 }
158 
159 void
161 {
162  auto i = postlist_changes.find(term);
163  if (i == postlist_changes.end()) return;
164 
165  // Flush buffered changes for just this term's postlist.
166  table.merge_changes(term, i->second);
167  postlist_changes.erase(i);
168 }
169 
170 void
172 {
173  for (auto&& i : postlist_changes) {
174  table.merge_changes(i.first, i.second);
175  }
176  postlist_changes.clear();
177 }
178 
179 void
181 {
182  if (pfx.empty())
183  return flush_all_post_lists(table);
184 
185  auto begin = postlist_changes.lower_bound(pfx);
186  decltype(begin) end;
187  string pfxinc = pfx;
188  while (true) {
189  if (pfxinc.back() != '\xff') {
190  ++pfxinc.back();
191  end = postlist_changes.lower_bound(pfxinc);
192  break;
193  }
194  pfxinc.resize(pfxinc.size() - 1);
195  if (pfxinc.empty()) {
196  end = postlist_changes.end();
197  break;
198  }
199  }
200 
201  for (auto i = begin; i != end; ++i) {
202  table.merge_changes(i->first, i->second);
203  }
204 
205  // Erase all the entries in one go, as that's:
206  // O(log(postlist_changes.size()) + O(number of elements removed)
207  postlist_changes.erase(begin, end);
208 }
209 
210 void
212 {
213  flush_doclengths(table);
214  flush_all_post_lists(table);
215 }
216 
217 void
219 {
220  for (auto i : pos_changes) {
221  const string& term = i.first;
222  const map<Xapian::docid, string>& m = i.second;
223  for (auto j : m) {
224  Xapian::docid did = j.first;
225  const string& s = j.second;
226  if (!s.empty())
227  table.set_positionlist(did, term, s);
228  else
229  table.delete_positionlist(did, term);
230  }
231  }
232  pos_changes.clear();
233 }
bool has_positions(const HoneyPositionTable &position_table) const
void store_positions(const HoneyPositionTable &position_table, Xapian::docid did, const std::string &tname, const Xapian::VecCOW< Xapian::termpos > &posvec, bool modifying)
void flush_post_lists(HoneyPostListTable &table, const std::string &pfx)
Flush postlist changes for all terms which start with pfx.
void flush_pos_lists(HoneyPositionTable &table)
Flush position changes.
void flush(HoneyPostListTable &table)
Flush all postlist table changes.
void set_positionlist(Xapian::docid did, const std::string &term, const std::string &s)
void flush_post_list(HoneyPostListTable &table, const std::string &term)
Flush postlist changes for term.
void flush_all_post_lists(HoneyPostListTable &table)
Flush postlist changes for all terms.
void flush_doclengths(HoneyPostListTable &table)
Flush document length changes.
void delete_positionlist(Xapian::docid did, const std::string &term)
bool get_positionlist(Xapian::docid did, const std::string &term, std::string &s) const
static std::string make_key(Xapian::docid did, std::string_view term)
void delete_positionlist(Xapian::docid did, std::string_view tname)
Delete the position list for term tname in document did.
void pack(std::string &s, const Xapian::VecCOW< Xapian::termpos > &vec) const
Pack a position list into a string.
void set_positionlist(Xapian::docid did, std::string_view tname, std::string_view s)
Set the position list for term tname in document did.
void merge_changes(const std::string &term, const HoneyInverter::PostingChanges &changes)
void merge_doclen_changes(const std::map< Xapian::docid, Xapian::termcount > &changes)
bool get_exact_entry(std::string_view key, std::string *tag) const
Definition: honey_table.cc:247
honey_tablesize_t get_entry_count() const
Definition: honey_table.h:693
bool empty() const
Definition: honey_table.h:659
Class for iterating over term positions.
virtual const Xapian::VecCOW< Xapian::termpos > * get_vec_termpos() const
Get pointer to VecCOW<termpos> if that's the internal representation.
Definition: termlist.cc:42
Class for iterating over a list of terms.
Definition: termiterator.h:41
PositionIterator positionlist_end() const noexcept
Return an end PositionIterator for the current term.
Definition: termiterator.h:109
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
Suitable for "simple" type T.
Definition: smallvector.h:62
void reserve(size_type n)
Definition: smallvector.h:147
void push_back(T elt)
Definition: smallvector.h:190
string term
Xapian::termpos pos
unsigned long long honey_tablesize_t
How many entries there are in a table.
Definition: honey_defs.h:107
HoneyInverter class which "inverts the file".
A position list in a honey database.
PostList in a honey database.
Subclass of HoneyTable which holds postlists.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Abstract base class for termlists.