xapian-core  2.0.0
honey_inverter.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2010,2013,2014,2023,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_HONEY_INVERTER_H
22 #define XAPIAN_INCLUDED_HONEY_INVERTER_H
23 
24 #include "xapian/types.h"
25 
26 #include "api/smallvector.h"
27 
28 #include <map>
29 #include <string>
30 #include <vector>
31 
32 #include "negate_unsigned.h"
33 #include "omassert.h"
34 #include "str.h"
35 #include "xapian/error.h"
36 
37 class HoneyPostListTable;
38 class HoneyPositionTable;
39 
40 namespace Xapian {
41 class TermIterator;
42 }
43 
46  friend class HoneyPostListTable;
47 
50 
53  friend class HoneyPostListTable;
54 
60 
66 
68  std::map<Xapian::docid, Xapian::termcount> pl_changes;
69 
70  public:
73  : tf_delta(1), cf_delta(wdf)
74  {
75  pl_changes.insert(std::make_pair(did, wdf));
76  }
77 
82  {
83  pl_changes.insert(std::make_pair(did, DELETED_POSTING));
84  }
85 
88  Xapian::termcount new_wdf)
89  : tf_delta(0),
90  cf_delta(UNSIGNED_OVERFLOW_OK(new_wdf - old_wdf))
91  {
92  pl_changes.insert(std::make_pair(did, new_wdf));
93  }
94 
97  // May overflow past 0.
100  // Add did to term's postlist
101  pl_changes[did] = wdf;
102  }
103 
106  // May overflow past 0.
109  // Remove did from term's postlist.
111  }
112 
115  Xapian::termcount new_wdf) {
116  UNSIGNED_OVERFLOW_OK(cf_delta += new_wdf - old_wdf);
117  pl_changes[did] = new_wdf;
118  }
119 
122 
125  };
126 
128  std::map<std::string, PostingChanges, std::less<>> postlist_changes;
129 
131  std::map<std::string,
132  std::map<Xapian::docid, std::string>,
133  std::less<>> pos_changes;
134 
135  void store_positions(const HoneyPositionTable& position_table,
136  Xapian::docid did,
137  const std::string& tname,
138  const Xapian::VecCOW<Xapian::termpos>& posvec,
139  bool modifying);
140 
142  const std::string& term,
143  const std::string& s);
144 
145  public:
147  std::map<Xapian::docid, Xapian::termcount> doclen_changes;
148 
149  public:
150  void add_posting(Xapian::docid did, const std::string& term,
151  Xapian::doccount wdf) {
152  auto i = postlist_changes.find(term);
153  if (i == postlist_changes.end()) {
154  postlist_changes.insert(
155  std::make_pair(term, PostingChanges(did, wdf)));
156  } else {
157  i->second.add_posting(did, wdf);
158  }
159  }
160 
161  void remove_posting(Xapian::docid did, const std::string& term,
162  Xapian::doccount wdf) {
163  auto i = postlist_changes.find(term);
164  if (i == postlist_changes.end()) {
165  postlist_changes.insert(
166  std::make_pair(term, PostingChanges(did, wdf, false)));
167  } else {
168  i->second.remove_posting(did, wdf);
169  }
170  }
171 
172  void update_posting(Xapian::docid did, const std::string& term,
173  Xapian::termcount old_wdf,
174  Xapian::termcount new_wdf) {
175  auto i = postlist_changes.find(term);
176  if (i == postlist_changes.end()) {
177  postlist_changes.insert(
178  std::make_pair(term, PostingChanges(did, old_wdf, new_wdf)));
179  } else {
180  i->second.update_posting(did, old_wdf, new_wdf);
181  }
182  }
183 
184  void set_positionlist(const HoneyPositionTable& position_table,
185  Xapian::docid did,
186  const std::string& tname,
187  const Xapian::TermIterator& term,
188  bool modifying = false);
189 
191  const std::string& term);
192 
194  const std::string& term,
195  std::string& s) const;
196 
197  bool has_positions(const HoneyPositionTable& position_table) const;
198 
199  void clear() {
200  doclen_changes.clear();
201  postlist_changes.clear();
202  pos_changes.clear();
203  }
204 
206  if (add) {
207  Assert(doclen_changes.find(did) == doclen_changes.end() ||
209  }
210  doclen_changes[did] = doclen;
211  }
212 
214  Assert(doclen_changes.find(did) == doclen_changes.end() ||
217  }
218 
219  bool get_doclength(Xapian::docid did, Xapian::termcount& doclen) const {
220  auto i = doclen_changes.find(did);
221  if (i == doclen_changes.end())
222  return false;
223  if (rare(i->second == DELETED_POSTING))
224  throw Xapian::DocNotFoundError("Document not found: " + str(did));
225  doclen = i->second;
226  return true;
227  }
228 
231 
233  void flush_post_list(HoneyPostListTable& table, const std::string& term);
234 
237 
239  void flush_post_lists(HoneyPostListTable& table, const std::string& pfx);
240 
242  void flush(HoneyPostListTable& table);
243 
245  void flush_pos_lists(HoneyPositionTable& table);
246 
247  bool get_deltas(std::string_view term,
248  Xapian::termcount& tf_delta,
249  Xapian::termcount& cf_delta) const {
250  auto i = postlist_changes.find(term);
251  if (i == postlist_changes.end()) {
252  return false;
253  }
254  tf_delta = i->second.get_tfdelta();
255  cf_delta = i->second.get_cfdelta();
256  return true;
257  }
258 };
259 
260 #endif // XAPIAN_INCLUDED_HONEY_INVERTER_H
Class for storing the changes in frequencies for a term.
void remove_posting(Xapian::docid did, Xapian::termcount wdf)
Remove a posting.
PostingChanges(Xapian::docid did, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
Constructor for an updated posting.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term's postlist.
Xapian::termcount cf_delta
Change in collection frequency.
Xapian::termcount tf_delta
Change in term frequency.
PostingChanges(Xapian::docid did, Xapian::termcount wdf, bool)
Constructor for a removed posting.
Xapian::termcount get_cfdelta() const
Get the collection frequency delta.
void update_posting(Xapian::docid did, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
Update a posting.
Xapian::termcount get_tfdelta() const
Get the term frequency delta.
PostingChanges(Xapian::docid did, Xapian::termcount wdf)
Constructor for an added posting.
void add_posting(Xapian::docid did, Xapian::termcount wdf)
Add a posting.
Class which "inverts the file".
bool get_doclength(Xapian::docid did, Xapian::termcount &doclen) const
bool get_deltas(std::string_view term, Xapian::termcount &tf_delta, Xapian::termcount &cf_delta) const
void add_posting(Xapian::docid did, const std::string &term, Xapian::doccount wdf)
bool has_positions(const HoneyPositionTable &position_table) const
void store_positions(const HoneyPositionTable &position_table, Xapian::docid did, const std::string &tname, const Xapian::VecCOW< Xapian::termpos > &posvec, bool modifying)
void flush_post_lists(HoneyPostListTable &table, const std::string &pfx)
Flush postlist changes for all terms which start with pfx.
void flush_pos_lists(HoneyPositionTable &table)
Flush position changes.
void flush(HoneyPostListTable &table)
Flush all postlist table changes.
void set_positionlist(Xapian::docid did, const std::string &term, const std::string &s)
static const Xapian::termcount DELETED_POSTING
Magic wdf value used for a deleted posting.
std::map< std::string, PostingChanges, std::less<> > postlist_changes
Buffered changes to postlists.
void flush_post_list(HoneyPostListTable &table, const std::string &term)
Flush postlist changes for term.
void set_doclength(Xapian::docid did, Xapian::termcount doclen, bool add)
void flush_all_post_lists(HoneyPostListTable &table)
Flush postlist changes for all terms.
void flush_doclengths(HoneyPostListTable &table)
Flush document length changes.
void update_posting(Xapian::docid did, const std::string &term, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
void delete_positionlist(Xapian::docid did, const std::string &term)
void remove_posting(Xapian::docid did, const std::string &term, Xapian::doccount wdf)
bool get_positionlist(Xapian::docid did, const std::string &term, std::string &s) const
std::map< std::string, std::map< Xapian::docid, std::string >, std::less<> > pos_changes
Buffered changes to positional data.
void delete_doclength(Xapian::docid did)
std::map< Xapian::docid, Xapian::termcount > doclen_changes
Buffered changes to document lengths.
void add(std::string_view key, const char *val, size_t val_size, bool compressed=false)
Definition: honey_table.cc:74
Indicates an attempt to access a document not present in the database.
Definition: error.h:662
Class for iterating over a list of terms.
Definition: termiterator.h:41
Suitable for "simple" type T.
Definition: smallvector.h:62
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
#define rare(COND)
Definition: config.h:607
string term
Hierarchy of classes which Xapian can throw as exceptions.
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Negate unsigned integer, avoiding compiler warnings.
constexpr std::enable_if_t< std::is_unsigned_v< T >, T > negate_unsigned(T value)
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
Custom vector implementations using small vector optimisation.
Convert types to std::string.
typedefs for Xapian