xapian-core  1.4.27
glass_inverter.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2013 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include "glass_inverter.h"
24 
25 #include "glass_postlist.h"
26 #include "glass_positionlist.h"
27 
28 #include "api/termlist.h"
29 
30 #include <map>
31 #include <string>
32 
33 using namespace std;
34 
35 void
37  Xapian::docid did,
38  const string & tname,
39  const vector<Xapian::termpos> & posvec,
40  bool modifying)
41 {
42  string s;
43  position_table.pack(s, posvec);
44  if (modifying && has_positions_cache != 0) {
45  // If we add positions we must then have positions, but if we remove
46  // positions we don't know if we then have them or not.
47  has_positions_cache = s.empty() ? -1 : 1;
48 
49  auto i = pos_changes.find(tname);
50  if (i != pos_changes.end()) {
51  map<Xapian::docid, string> & m = i->second;
52  auto j = m.find(did);
53  if (j != m.end()) {
54  // Update existing entry.
55  swap(j->second, s);
56  return;
57  }
58  }
59  const string & key = position_table.make_key(did, tname);
60  string old_tag;
61  if (position_table.get_exact_entry(key, old_tag) && s == old_tag) {
62  // Identical to existing entry on disk.
63  return;
64  }
65  } else {
66  // If we add positions, we must then have positions.
67  if (!s.empty()) has_positions_cache = 1;
68  }
69  set_positionlist(did, tname, s);
70 }
71 
72 void
74  Xapian::docid did,
75  const string & tname,
76  const Xapian::TermIterator & term,
77  bool modifying)
78 {
79  const std::vector<Xapian::termpos> * ptr;
80  ptr = term.internal->get_vector_termpos();
81  if (ptr) {
82  if (!ptr->empty()) {
83  store_positions(position_table, did, tname, *ptr, modifying);
84  return;
85  }
86  } else {
88  if (pos != term.positionlist_end()) {
89  vector<Xapian::termpos> posvec(pos, Xapian::PositionIterator());
90  store_positions(position_table, did, tname, posvec, modifying);
91  return;
92  }
93  }
94  // If we get here, the new position list was empty.
95  if (modifying)
96  delete_positionlist(did, tname);
97 }
98 
99 void
101  const string & term,
102  const string & s)
103 {
104  has_positions_cache = s.empty() ? -1 : 1;
105  pos_changes.insert(make_pair(term, map<Xapian::docid, string>()))
106  .first->second[did] = s;
107 }
108 
109 void
111  const string & term)
112 {
113  set_positionlist(did, term, string());
114 }
115 
116 bool
118  const string & term,
119  string & s) const
120 {
121  auto i = pos_changes.find(term);
122  if (i == pos_changes.end())
123  return false;
124  const map<Xapian::docid, string> & m = i->second;
125  auto j = m.find(did);
126  if (j == m.end())
127  return false;
128  s = j->second;
129  return true;
130 }
131 
132 bool
133 Inverter::has_positions(const GlassPositionListTable & position_table) const
134 {
135  if (has_positions_cache < 0) {
136  // FIXME: Can we cheaply keep track of some things to make this more
137  // efficient? E.g. how many sets and deletes we had in total perhaps.
138  glass_tablesize_t changes = 0;
139  for (const auto& i : pos_changes) {
140  const map<Xapian::docid, string>& m = i.second;
141  for (const auto& j : m) {
142  const string & s = j.second;
143  if (!s.empty())
144  return true;
145  ++changes;
146  }
147  }
148 
149  // We have positions unless all the existing entries are removed.
150  has_positions_cache = (changes != position_table.get_entry_count());
151  }
152  return has_positions_cache;
153 }
154 
155 void
157 {
158  table.merge_doclen_changes(doclen_changes);
159  doclen_changes.clear();
160 }
161 
162 void
163 Inverter::flush_post_list(GlassPostListTable & table, const string & term)
164 {
165  map<string, PostingChanges>::iterator i;
166  i = postlist_changes.find(term);
167  if (i == postlist_changes.end()) return;
168 
169  // Flush buffered changes for just this term's postlist.
170  table.merge_changes(term, i->second);
171  postlist_changes.erase(i);
172 }
173 
174 void
176 {
177  map<string, PostingChanges>::const_iterator i;
178  for (i = postlist_changes.begin(); i != postlist_changes.end(); ++i) {
179  table.merge_changes(i->first, i->second);
180  }
181  postlist_changes.clear();
182 }
183 
184 void
185 Inverter::flush_post_lists(GlassPostListTable & table, const string & pfx)
186 {
187  if (pfx.empty())
188  return flush_all_post_lists(table);
189 
190  map<string, PostingChanges>::iterator i, begin, end;
191  begin = postlist_changes.lower_bound(pfx);
192  string pfxinc = pfx;
193  while (true) {
194  if (pfxinc.back() != '\xff') {
195  ++pfxinc.back();
196  end = postlist_changes.lower_bound(pfxinc);
197  break;
198  }
199  pfxinc.resize(pfxinc.size() - 1);
200  if (pfxinc.empty()) {
201  end = postlist_changes.end();
202  break;
203  }
204  }
205 
206  for (i = begin; i != end; ++i) {
207  table.merge_changes(i->first, i->second);
208  }
209 
210  // Erase all the entries in one go, as that's:
211  // O(log(postlist_changes.size()) + O(number of elements removed)
212  postlist_changes.erase(begin, end);
213 }
214 
215 void
217 {
218  flush_doclengths(table);
219  flush_all_post_lists(table);
220 }
221 
222 void
224 {
225  for (auto i : pos_changes) {
226  const string & term = i.first;
227  const map<Xapian::docid, string> & m = i.second;
228  for (auto j : m) {
229  Xapian::docid did = j.first;
230  const string & s = j.second;
231  if (!s.empty())
232  table.set_positionlist(did, term, s);
233  else
234  table.delete_positionlist(did, term);
235  }
236  }
237  pos_changes.clear();
238  has_positions_cache = -1;
239 }
void pack(string &s, const std::vector< Xapian::termpos > &vec) const
Pack a position list into a string.
void merge_doclen_changes(const map< Xapian::docid, Xapian::termcount > &doclens)
Merge document length changes.
A position list in a glass database.
bool get_positionlist(Xapian::docid did, const std::string &term, std::string &s) const
bool has_positions(const GlassPositionListTable &position_table) const
void store_positions(const GlassPositionListTable &position_table, Xapian::docid did, const std::string &tname, const std::vector< Xapian::termpos > &posvec, bool modifying)
void delete_positionlist(Xapian::docid did, const string &tname)
Delete the position list for term tname in document did.
Postlists in glass databases.
STL namespace.
unsigned long long glass_tablesize_t
How many entries there are in a table.
Definition: glass_defs.h:71
void delete_positionlist(Xapian::docid did, const std::string &term)
Class for iterating over a list of terms.
Definition: termiterator.h:41
PositionIterator positionlist_end() const
Return an end PositionIterator for the current term.
Definition: termiterator.h:110
void flush_post_list(GlassPostListTable &table, const std::string &term)
Flush postlist changes for term.
virtual const std::vector< Xapian::termpos > * get_vector_termpos() const
Get pointer to vector<termpos> if that&#39;s the internal representation.
Definition: termlist.cc:42
void flush(GlassPostListTable &table)
Flush all postlist table changes.
void flush_all_post_lists(GlassPostListTable &table)
Flush postlist changes for all terms.
void set_positionlist(Xapian::docid did, const std::string &term, const std::string &s)
void merge_changes(const string &term, const Inverter::PostingChanges &changes)
Merge changes for a term.
Class for iterating over term positions.
void flush_pos_lists(GlassPositionListTable &table)
Flush position changes.
glass_tablesize_t get_entry_count() const
Return a count of the number of entries in the table.
Definition: glass_table.h:676
bool get_exact_entry(const std::string &key, std::string &tag) const
Read an entry from the table, if and only if it is exactly that being asked for.
Abstract base class for termlists.
void flush_post_lists(GlassPostListTable &table, const std::string &pfx)
Flush postlist changes for all terms which start with pfx.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Inverter class which "inverts the file".
static string make_key(Xapian::docid did, const string &term)
void flush_doclengths(GlassPostListTable &table)
Flush document length changes.
void set_positionlist(Xapian::docid did, const string &tname, const string &s)
Set the position list for term tname in document did.
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.