xapian-core  2.0.0
glass_inverter.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2013,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "glass_inverter.h"
24 
25 #include "glass_postlist.h"
26 #include "glass_positionlist.h"
27 
28 #include "api/termlist.h"
29 
30 #include <map>
31 #include <string>
32 
33 using namespace std;
34 
35 void
37  Xapian::docid did,
38  string_view tname,
39  const Xapian::VecCOW<Xapian::termpos> & posvec,
40  bool modifying)
41 {
42  string s;
43  position_table.pack(s, posvec);
44  if (modifying && has_positions_cache != 0) {
45  // If we add positions we must then have positions, but if we remove
46  // positions we don't know if we then have them or not.
47  has_positions_cache = s.empty() ? -1 : 1;
48 
49  auto i = pos_changes.find(tname);
50  if (i != pos_changes.end()) {
51  map<Xapian::docid, string> & m = i->second;
52  auto j = m.find(did);
53  if (j != m.end()) {
54  // Update existing entry.
55  swap(j->second, s);
56  return;
57  }
58  }
59  const string & key = position_table.make_key(did, tname);
60  string old_tag;
61  if (position_table.get_exact_entry(key, old_tag) && s == old_tag) {
62  // Identical to existing entry on disk.
63  return;
64  }
65  } else {
66  // If we add positions, we must then have positions.
67  if (!s.empty()) has_positions_cache = 1;
68  }
69  set_positionlist(did, tname, s);
70 }
71 
72 void
74  Xapian::docid did,
75  string_view tname,
76  const Xapian::TermIterator & term,
77  bool modifying)
78 {
79  auto ptr = term.internal->get_vec_termpos();
80  if (ptr) {
81  if (!ptr->empty()) {
82  store_positions(position_table, did, tname, *ptr, modifying);
83  return;
84  }
85  } else {
86  Xapian::PositionIterator pos = term.positionlist_begin();
87  if (pos != term.positionlist_end()) {
89  posvec.reserve(term.positionlist_count());
90  while (pos != term.positionlist_end()) {
91  posvec.push_back(*pos);
92  ++pos;
93  }
94  store_positions(position_table, did, tname, posvec, modifying);
95  return;
96  }
97  }
98  // If we get here, the new position list was empty.
99  if (modifying)
100  delete_positionlist(did, tname);
101 }
102 
103 void
105  string_view term,
106  string_view s)
107 {
108  has_positions_cache = s.empty() ? -1 : 1;
109  pos_changes.insert(make_pair(term, map<Xapian::docid, string>()))
110  .first->second[did] = s;
111 }
112 
113 void
115  string_view term)
116 {
117  set_positionlist(did, term, {});
118 }
119 
120 bool
122  string_view term,
123  string & s) const
124 {
125  auto i = pos_changes.find(term);
126  if (i == pos_changes.end())
127  return false;
128  const map<Xapian::docid, string> & m = i->second;
129  auto j = m.find(did);
130  if (j == m.end())
131  return false;
132  s = j->second;
133  return true;
134 }
135 
136 bool
137 Inverter::has_positions(const GlassPositionListTable & position_table) const
138 {
139  if (has_positions_cache < 0) {
140  // FIXME: Can we cheaply keep track of some things to make this more
141  // efficient? E.g. how many sets and deletes we had in total perhaps.
142  glass_tablesize_t changes = 0;
143  for (const auto& i : pos_changes) {
144  const map<Xapian::docid, string>& m = i.second;
145  for (const auto& j : m) {
146  const string & s = j.second;
147  if (!s.empty())
148  return true;
149  ++changes;
150  }
151  }
152 
153  // We have positions unless all the existing entries are removed.
154  has_positions_cache = (changes != position_table.get_entry_count());
155  }
156  return has_positions_cache;
157 }
158 
159 void
161 {
162  table.merge_doclen_changes(doclen_changes);
163  doclen_changes.clear();
164 }
165 
166 void
168 {
169  auto i = postlist_changes.find(term);
170  if (i == postlist_changes.end()) return;
171 
172  // Flush buffered changes for just this term's postlist.
173  table.merge_changes(term, i->second);
174  postlist_changes.erase(i);
175 }
176 
177 void
179 {
180  for (auto i = postlist_changes.begin(); i != postlist_changes.end(); ++i) {
181  table.merge_changes(i->first, i->second);
182  }
183  postlist_changes.clear();
184 }
185 
186 void
188 {
189  if (pfx.empty())
190  return flush_all_post_lists(table);
191 
192  auto begin = postlist_changes.lower_bound(pfx);
193  decltype(begin) end;
194  string pfxinc{pfx};
195  while (true) {
196  if (pfxinc.back() != '\xff') {
197  ++pfxinc.back();
198  end = postlist_changes.lower_bound(pfxinc);
199  break;
200  }
201  pfxinc.resize(pfxinc.size() - 1);
202  if (pfxinc.empty()) {
203  end = postlist_changes.end();
204  break;
205  }
206  }
207 
208  for (auto i = begin; i != end; ++i) {
209  table.merge_changes(i->first, i->second);
210  }
211 
212  // Erase all the entries in one go, as that's:
213  // O(log(postlist_changes.size()) + O(number of elements removed)
214  postlist_changes.erase(begin, end);
215 }
216 
217 void
219 {
220  flush_doclengths(table);
221  flush_all_post_lists(table);
222 }
223 
224 void
226 {
227  for (auto i : pos_changes) {
228  const string & term = i.first;
229  const map<Xapian::docid, string> & m = i.second;
230  for (auto j : m) {
231  Xapian::docid did = j.first;
232  const string & s = j.second;
233  if (!s.empty())
234  table.set_positionlist(did, term, s);
235  else
236  table.delete_positionlist(did, term);
237  }
238  }
239  pos_changes.clear();
240  has_positions_cache = -1;
241 }
void set_positionlist(Xapian::docid did, std::string_view tname, std::string_view s)
Set the position list for term tname in document did.
void pack(std::string &s, const Xapian::VecCOW< Xapian::termpos > &vec) const
Pack a position list into a string.
void delete_positionlist(Xapian::docid did, std::string_view tname)
Delete the position list for term tname in document did.
static std::string make_key(Xapian::docid did, std::string_view term)
void merge_changes(std::string_view term, const Inverter::PostingChanges &changes)
Merge changes for a term.
void merge_doclen_changes(const std::map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
bool get_exact_entry(std::string_view key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
glass_tablesize_t get_entry_count() const
Return a count of the number of entries in the table.
Definition: glass_table.h:685
void flush_post_list(GlassPostListTable &table, std::string_view term)
Flush postlist changes for term.
void flush_all_post_lists(GlassPostListTable &table)
Flush postlist changes for all terms.
bool get_positionlist(Xapian::docid did, std::string_view term, std::string &s) const
void store_positions(const GlassPositionListTable &position_table, Xapian::docid did, std::string_view tname, const Xapian::VecCOW< Xapian::termpos > &posvec, bool modifying)
bool has_positions(const GlassPositionListTable &position_table) const
void delete_positionlist(Xapian::docid did, std::string_view term)
void set_positionlist(Xapian::docid did, std::string_view term, std::string_view s)
void flush_doclengths(GlassPostListTable &table)
Flush document length changes.
void flush_post_lists(GlassPostListTable &table, std::string_view pfx)
Flush postlist changes for all terms which start with pfx.
void flush_pos_lists(GlassPositionListTable &table)
Flush position changes.
void flush(GlassPostListTable &table)
Flush all postlist table changes.
Class for iterating over term positions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
Suitable for "simple" type T.
Definition: smallvector.h:62
void reserve(size_type n)
Definition: smallvector.h:147
void push_back(T elt)
Definition: smallvector.h:190
string term
Xapian::termpos pos
unsigned long long glass_tablesize_t
How many entries there are in a table.
Definition: glass_defs.h:71
Inverter class which "inverts the file".
A position list in a glass database.
Postlists in glass databases.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Abstract base class for termlists.