xapian-core  2.0.0
glass_inverter.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2010,2013,2014,2023,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_GLASS_INVERTER_H
22 #define XAPIAN_INCLUDED_GLASS_INVERTER_H
23 
24 #include "xapian/types.h"
25 
26 #include "api/smallvector.h"
27 
28 #include <map>
29 #include <string>
30 #include <string_view>
31 #include <vector>
32 
33 #include "negate_unsigned.h"
34 #include "omassert.h"
35 #include "str.h"
36 #include "xapian/error.h"
37 
38 class GlassPostListTable;
40 
41 namespace Xapian {
42 class TermIterator;
43 }
44 
47 
49 class Inverter {
50  friend class GlassPostListTable;
51 
54  friend class GlassPostListTable;
55 
61 
67 
69  std::map<Xapian::docid, Xapian::termcount> pl_changes;
70 
71  public:
74  : tf_delta(1), cf_delta(wdf)
75  {
76  pl_changes.insert(std::make_pair(did, wdf));
77  }
78 
83  {
84  pl_changes.insert(std::make_pair(did, DELETED_POSTING));
85  }
86 
89  Xapian::termcount new_wdf)
90  : tf_delta(0),
91  cf_delta(UNSIGNED_OVERFLOW_OK(new_wdf - old_wdf))
92  {
93  pl_changes.insert(std::make_pair(did, new_wdf));
94  }
95 
98  // May overflow past 0.
101  // Add did to term's postlist
102  pl_changes[did] = wdf;
103  }
104 
107  // May overflow past 0.
110  // Remove did from term's postlist.
112  }
113 
116  Xapian::termcount new_wdf) {
117  UNSIGNED_OVERFLOW_OK(cf_delta += new_wdf - old_wdf);
118  pl_changes[did] = new_wdf;
119  }
120 
123 
126  };
127 
129  std::map<std::string, PostingChanges, std::less<>> postlist_changes;
130 
137  mutable int has_positions_cache = -1;
138 
140  std::map<std::string,
141  std::map<Xapian::docid, std::string>,
142  std::less<>> pos_changes;
143 
144  void store_positions(const GlassPositionListTable & position_table,
145  Xapian::docid did,
146  std::string_view tname,
147  const Xapian::VecCOW<Xapian::termpos> & posvec,
148  bool modifying);
149 
151  std::string_view term,
152  std::string_view s);
153 
154  public:
156  std::map<Xapian::docid, Xapian::termcount> doclen_changes;
157 
158  public:
159  void add_posting(Xapian::docid did, const std::string & term,
160  Xapian::doccount wdf) {
161  auto i = postlist_changes.find(term);
162  if (i == postlist_changes.end()) {
163  postlist_changes.insert(
164  std::make_pair(term, PostingChanges(did, wdf)));
165  } else {
166  i->second.add_posting(did, wdf);
167  }
168  }
169 
170  void remove_posting(Xapian::docid did, const std::string & term,
171  Xapian::doccount wdf) {
172  auto i = postlist_changes.find(term);
173  if (i == postlist_changes.end()) {
174  postlist_changes.insert(
175  std::make_pair(term, PostingChanges(did, wdf, false)));
176  } else {
177  i->second.remove_posting(did, wdf);
178  }
179  }
180 
181  void update_posting(Xapian::docid did, const std::string & term,
182  Xapian::termcount old_wdf,
183  Xapian::termcount new_wdf) {
184  auto i = postlist_changes.find(term);
185  if (i == postlist_changes.end()) {
186  postlist_changes.insert(
187  std::make_pair(term, PostingChanges(did, old_wdf, new_wdf)));
188  } else {
189  i->second.update_posting(did, old_wdf, new_wdf);
190  }
191  }
192 
193  void set_positionlist(const GlassPositionListTable & position_table,
194  Xapian::docid did,
195  std::string_view tname,
196  const Xapian::TermIterator & term,
197  bool modifying = false);
198 
200  std::string_view term);
201 
203  std::string_view term,
204  std::string & s) const;
205 
206  bool has_positions(const GlassPositionListTable & position_table) const;
207 
208  void clear() {
209  doclen_changes.clear();
210  postlist_changes.clear();
211  pos_changes.clear();
212  has_positions_cache = -1;
213  }
214 
216  if (add) {
217  Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] == DELETED_POSTING);
218  }
219  doclen_changes[did] = doclen;
220  }
221 
223  Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] != DELETED_POSTING);
225  }
226 
227  bool get_doclength(Xapian::docid did, Xapian::termcount & doclen) const {
228  auto i = doclen_changes.find(did);
229  if (i == doclen_changes.end())
230  return false;
231  if (rare(i->second == DELETED_POSTING))
232  throw Xapian::DocNotFoundError("Document not found: " + str(did));
233  doclen = i->second;
234  return true;
235  }
236 
238  void flush_doclengths(GlassPostListTable & table);
239 
241  void flush_post_list(GlassPostListTable& table, std::string_view term);
242 
245 
247  void flush_post_lists(GlassPostListTable& table, std::string_view pfx);
248 
250  void flush(GlassPostListTable & table);
251 
254 
255  bool get_deltas(std::string_view term,
256  Xapian::termcount& tf_delta,
257  Xapian::termcount& cf_delta) const {
258  auto i = postlist_changes.find(term);
259  if (i == postlist_changes.end()) {
260  return false;
261  }
262  tf_delta = i->second.get_tfdelta();
263  cf_delta = i->second.get_cfdelta();
264  return true;
265  }
266 };
267 
268 #endif // XAPIAN_INCLUDED_GLASS_INVERTER_H
void add(std::string_view key, std::string_view tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
Class for storing the changes in frequencies for a term.
Xapian::termcount get_tfdelta() const
Get the term frequency delta.
void remove_posting(Xapian::docid did, Xapian::termcount wdf)
Remove a posting.
Xapian::termcount tf_delta
Change in term frequency.
PostingChanges(Xapian::docid did, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
Constructor for an updated posting.
PostingChanges(Xapian::docid did, Xapian::termcount wdf)
Constructor for an added posting.
Xapian::termcount get_cfdelta() const
Get the collection frequency delta.
Xapian::termcount cf_delta
Change in collection frequency.
void update_posting(Xapian::docid did, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
Update a posting.
PostingChanges(Xapian::docid did, Xapian::termcount wdf, bool)
Constructor for a removed posting.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term's postlist.
void add_posting(Xapian::docid did, Xapian::termcount wdf)
Add a posting.
Class which "inverts the file".
std::map< std::string, std::map< Xapian::docid, std::string >, std::less<> > pos_changes
Buffered changes to positional data.
bool get_deltas(std::string_view term, Xapian::termcount &tf_delta, Xapian::termcount &cf_delta) const
void delete_doclength(Xapian::docid did)
void flush_post_list(GlassPostListTable &table, std::string_view term)
Flush postlist changes for term.
void remove_posting(Xapian::docid did, const std::string &term, Xapian::doccount wdf)
void flush_all_post_lists(GlassPostListTable &table)
Flush postlist changes for all terms.
void update_posting(Xapian::docid did, const std::string &term, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
std::map< std::string, PostingChanges, std::less<> > postlist_changes
Buffered changes to postlists.
bool get_positionlist(Xapian::docid did, std::string_view term, std::string &s) const
void store_positions(const GlassPositionListTable &position_table, Xapian::docid did, std::string_view tname, const Xapian::VecCOW< Xapian::termpos > &posvec, bool modifying)
bool has_positions(const GlassPositionListTable &position_table) const
void delete_positionlist(Xapian::docid did, std::string_view term)
void set_positionlist(Xapian::docid did, std::string_view term, std::string_view s)
int has_positions_cache
Cached answer to Inverter::has_positions().
bool get_doclength(Xapian::docid did, Xapian::termcount &doclen) const
void clear()
void set_doclength(Xapian::docid did, Xapian::termcount doclen, bool add)
void flush_doclengths(GlassPostListTable &table)
Flush document length changes.
void flush_post_lists(GlassPostListTable &table, std::string_view pfx)
Flush postlist changes for all terms which start with pfx.
void add_posting(Xapian::docid did, const std::string &term, Xapian::doccount wdf)
std::map< Xapian::docid, Xapian::termcount > doclen_changes
Buffered changes to document lengths.
void flush_pos_lists(GlassPositionListTable &table)
Flush position changes.
void flush(GlassPostListTable &table)
Flush all postlist table changes.
Indicates an attempt to access a document not present in the database.
Definition: error.h:662
Class for iterating over a list of terms.
Definition: termiterator.h:41
Suitable for "simple" type T.
Definition: smallvector.h:62
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
#define rare(COND)
Definition: config.h:607
string term
Hierarchy of classes which Xapian can throw as exceptions.
const Xapian::termcount DELETED_POSTING
Magic wdf value used for a deleted posting.
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Negate unsigned integer, avoiding compiler warnings.
constexpr std::enable_if_t< std::is_unsigned_v< T >, T > negate_unsigned(T value)
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
Custom vector implementations using small vector optimisation.
Convert types to std::string.
typedefs for Xapian