xapian-core  1.4.21
glass_inverter.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2010,2013,2014 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_GLASS_INVERTER_H
22 #define XAPIAN_INCLUDED_GLASS_INVERTER_H
23 
24 #include "xapian/types.h"
25 
26 #include <map>
27 #include <string>
28 #include <vector>
29 
30 #include "omassert.h"
31 #include "str.h"
32 #include "xapian/error.h"
33 
34 class GlassPostListTable;
36 
37 namespace Xapian {
38 class TermIterator;
39 }
40 
43 
45 class Inverter {
46  friend class GlassPostListTable;
47 
50  friend class GlassPostListTable;
51 
54 
57 
59  std::map<Xapian::docid, Xapian::termcount> pl_changes;
60 
61  public:
64  : tf_delta(1), cf_delta(Xapian::termcount_diff(wdf))
65  {
66  pl_changes.insert(std::make_pair(did, wdf));
67  }
68 
71  : tf_delta(-1), cf_delta(-Xapian::termcount_diff(wdf))
72  {
73  pl_changes.insert(std::make_pair(did, DELETED_POSTING));
74  }
75 
78  Xapian::termcount new_wdf)
79  : tf_delta(0), cf_delta(Xapian::termcount_diff(new_wdf - old_wdf))
80  {
81  pl_changes.insert(std::make_pair(did, new_wdf));
82  }
83 
86  ++tf_delta;
87  cf_delta += wdf;
88  // Add did to term's postlist
89  pl_changes[did] = wdf;
90  }
91 
94  --tf_delta;
95  cf_delta -= wdf;
96  // Remove did from term's postlist.
97  pl_changes[did] = DELETED_POSTING;
98  }
99 
102  Xapian::termcount new_wdf) {
103  cf_delta += new_wdf - old_wdf;
104  pl_changes[did] = new_wdf;
105  }
106 
108  Xapian::termcount_diff get_tfdelta() const { return tf_delta; }
109 
111  Xapian::termcount_diff get_cfdelta() const { return cf_delta; }
112  };
113 
115  std::map<std::string, PostingChanges> postlist_changes;
116 
123  mutable int has_positions_cache = -1;
124 
126  std::map<std::string, std::map<Xapian::docid, std::string>> pos_changes;
127 
128  void store_positions(const GlassPositionListTable & position_table,
129  Xapian::docid did,
130  const std::string & tname,
131  const std::vector<Xapian::termpos> & posvec,
132  bool modifying);
133 
134  void set_positionlist(Xapian::docid did,
135  const std::string & term,
136  const std::string & s);
137 
138  public:
140  std::map<Xapian::docid, Xapian::termcount> doclen_changes;
141 
142  public:
143  void add_posting(Xapian::docid did, const std::string & term,
144  Xapian::doccount wdf) {
145  std::map<std::string, PostingChanges>::iterator i;
146  i = postlist_changes.find(term);
147  if (i == postlist_changes.end()) {
148  postlist_changes.insert(
149  std::make_pair(term, PostingChanges(did, wdf)));
150  } else {
151  i->second.add_posting(did, wdf);
152  }
153  }
154 
155  void remove_posting(Xapian::docid did, const std::string & term,
156  Xapian::doccount wdf) {
157  std::map<std::string, PostingChanges>::iterator i;
158  i = postlist_changes.find(term);
159  if (i == postlist_changes.end()) {
160  postlist_changes.insert(
161  std::make_pair(term, PostingChanges(did, wdf, false)));
162  } else {
163  i->second.remove_posting(did, wdf);
164  }
165  }
166 
167  void update_posting(Xapian::docid did, const std::string & term,
168  Xapian::termcount old_wdf,
169  Xapian::termcount new_wdf) {
170  std::map<std::string, PostingChanges>::iterator i;
171  i = postlist_changes.find(term);
172  if (i == postlist_changes.end()) {
173  postlist_changes.insert(
174  std::make_pair(term, PostingChanges(did, old_wdf, new_wdf)));
175  } else {
176  i->second.update_posting(did, old_wdf, new_wdf);
177  }
178  }
179 
180  void set_positionlist(const GlassPositionListTable & position_table,
181  Xapian::docid did,
182  const std::string & tname,
183  const Xapian::TermIterator & term,
184  bool modifying = false);
185 
186  void delete_positionlist(Xapian::docid did,
187  const std::string & term);
188 
189  bool get_positionlist(Xapian::docid did,
190  const std::string & term,
191  std::string & s) const;
192 
193  bool has_positions(const GlassPositionListTable & position_table) const;
194 
195  void clear() {
196  doclen_changes.clear();
197  postlist_changes.clear();
198  pos_changes.clear();
199  has_positions_cache = -1;
200  }
201 
203  if (add) {
204  Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] == DELETED_POSTING);
205  }
206  doclen_changes[did] = doclen;
207  }
208 
210  Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] != DELETED_POSTING);
211  doclen_changes[did] = DELETED_POSTING;
212  }
213 
214  bool get_doclength(Xapian::docid did, Xapian::termcount & doclen) const {
215  std::map<Xapian::docid, Xapian::termcount>::const_iterator i;
216  i = doclen_changes.find(did);
217  if (i == doclen_changes.end())
218  return false;
219  if (rare(i->second == DELETED_POSTING))
220  throw Xapian::DocNotFoundError("Document not found: " + str(did));
221  doclen = i->second;
222  return true;
223  }
224 
226  void flush_doclengths(GlassPostListTable & table);
227 
229  void flush_post_list(GlassPostListTable & table, const std::string & term);
230 
232  void flush_all_post_lists(GlassPostListTable & table);
233 
235  void flush_post_lists(GlassPostListTable & table, const std::string & pfx);
236 
238  void flush(GlassPostListTable & table);
239 
241  void flush_pos_lists(GlassPositionListTable & table);
242 
243  bool get_deltas(const std::string & term,
244  Xapian::termcount_diff & tf_delta,
245  Xapian::termcount_diff & cf_delta) const {
246  std::map<std::string, PostingChanges>::const_iterator i;
247  i = postlist_changes.find(term);
248  if (i == postlist_changes.end()) {
249  return false;
250  }
251  tf_delta = i->second.get_tfdelta();
252  cf_delta = i->second.get_cfdelta();
253  return true;
254  }
255 };
256 
257 #endif // XAPIAN_INCLUDED_GLASS_INVERTER_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
#define Assert(COND)
Definition: omassert.h:122
typedefs for Xapian
PostingChanges(Xapian::docid did, Xapian::termcount wdf)
Constructor for an added posting.
Xapian::termcount_diff get_tfdelta() const
Get the term frequency delta.
Class which "inverts the file".
void delete_doclength(Xapian::docid did)
Class for storing the changes in frequencies for a term.
Convert types to std::string.
#define rare(COND)
Definition: config.h:573
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
void clear()
Xapian::termcount_diff get_cfdelta() const
Get the collection frequency delta.
void add(const std::string &key, std::string tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
bool get_doclength(Xapian::docid did, Xapian::termcount &doclen) const
PostingChanges(Xapian::docid did, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
Constructor for an updated posting.
void add_posting(Xapian::docid did, const std::string &term, Xapian::doccount wdf)
string str(int value)
Convert int to std::string.
Definition: str.cc:90
std::map< std::string, std::map< Xapian::docid, std::string > > pos_changes
Buffered changes to positional data.
std::map< Xapian::docid, Xapian::termcount > pl_changes
Changes to this term&#39;s postlist.
Xapian::termcount_diff cf_delta
Change in collection frequency.
void set_doclength(Xapian::docid did, Xapian::termcount doclen, bool add)
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
PostingChanges(Xapian::docid did, Xapian::termcount wdf, bool)
Constructor for a removed posting.
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:79
void update_posting(Xapian::docid did, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
Update a posting.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
void add_posting(Xapian::docid did, Xapian::termcount wdf)
Add a posting.
void update_posting(Xapian::docid did, const std::string &term, Xapian::termcount old_wdf, Xapian::termcount new_wdf)
void remove_posting(Xapian::docid did, const std::string &term, Xapian::doccount wdf)
Xapian::termcount_diff tf_delta
Change in term frequency,.
const Xapian::termcount DELETED_POSTING
Magic wdf value used for a deleted posting.
void remove_posting(Xapian::docid did, Xapian::termcount wdf)
Remove a posting.
std::map< Xapian::docid, Xapian::termcount > doclen_changes
Buffered changes to document lengths.
Various assertion macros.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
std::map< std::string, PostingChanges > postlist_changes
Buffered changes to postlists.
bool get_deltas(const std::string &term, Xapian::termcount_diff &tf_delta, Xapian::termcount_diff &cf_delta) const