xapian-core  2.0.0
terminfo.cc
Go to the documentation of this file.
1 
4 /* Copyright 2017,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "terminfo.h"
24 
25 #include "omassert.h"
26 
27 #include <algorithm>
28 #include <limits>
29 
30 using namespace std;
31 
32 void
34 {
35  Assert(!is_deleted());
36  inplace_merge(positions.begin(),
37  positions.begin() + split,
38  positions.end());
39  split = 0;
40 }
41 
42 bool
44 {
45  if (rare(is_deleted())) {
46  wdf = wdf_inc;
47  split = 0;
48  positions.push_back(termpos);
49  return true;
50  }
51 
52  wdf += wdf_inc;
53 
54  // Optimise the common case of adding positions in ascending order.
55  if (positions.empty()) {
56  positions.push_back(termpos);
57  return false;
58  }
59  if (termpos > positions.back()) {
60  if (split) {
61  // Check for duplicate before split.
62  auto i = lower_bound(positions.cbegin(),
63  positions.cbegin() + split,
64  termpos);
65  if (i != positions.cbegin() + split && *i == termpos)
66  return false;
67  }
68  positions.push_back(termpos);
69  return false;
70  }
71 
72  if (termpos == positions.back()) {
73  // Duplicate of last entry.
74  return false;
75  }
76 
77  if (split > 0) {
78  // We could merge in the new entry at the same time, but that seems to
79  // make things much more complex for minor gains.
80  merge();
81  }
82 
83  // We keep positions sorted, so use lower_bound() which can binary chop to
84  // find the entry.
85  auto i = lower_bound(positions.cbegin(), positions.cend(), termpos);
86  // Add unless termpos is already in the list.
87  if (i == positions.cend() || *i != termpos) {
88  auto new_split = positions.size();
89  if constexpr(sizeof(split) < sizeof(Xapian::termpos)) {
90  if (rare(new_split > numeric_limits<decltype(split)>::max())) {
91  // The split point would be beyond the size of the type used to
92  // hold it, which is really unlikely if that type is 32-bit.
93  // Just insert the old way in this case.
94  positions.insert(i, termpos);
95  return false;
96  }
97  } else {
98  // This assertion should always be true because we shouldn't have
99  // duplicate entries and the split point can't be after the final
100  // entry.
101  AssertRel(new_split, <=, numeric_limits<decltype(split)>::max());
102  }
103  split = new_split;
104  positions.push_back(termpos);
105  }
106  return false;
107 }
108 
109 bool
111 {
112  Assert(!is_deleted());
113 
114  if (rare(positions.empty()))
115  return false;
116 
117  // Special case removing the final position, which we can handle in O(1).
118  if (positions.back() == termpos) {
119  positions.pop_back();
120  if (split == positions.size()) {
121  split = 0;
122  // We removed the only entry from after the split.
123  }
124  return true;
125  }
126 
127  if (split > 0) {
128  // We could remove the requested entry at the same time, but that seems
129  // fiddly to do.
130  merge();
131  }
132 
133  // We keep positions sorted, so use lower_bound() which can binary chop to
134  // find the entry.
135  auto i = lower_bound(positions.cbegin(), positions.cend(), termpos);
136  if (i == positions.cend() || *i != termpos) {
137  // Not there.
138  return false;
139  }
140  positions.erase(i);
141  return true;
142 }
143 
146  Xapian::termpos termpos_last)
147 {
148  Assert(!is_deleted());
149 
150  if (split > 0) {
151  // We could remove the requested entries at the same time, but that
152  // seems fiddly to do.
153  merge();
154  }
155 
156  // Find the range [i, j) that the specified termpos range maps to. Use
157  // binary chop to search, since this is a sorted list.
158  auto i = lower_bound(positions.cbegin(), positions.cend(), termpos_first);
159  if (i == positions.cend() || *i > termpos_last) {
160  return 0;
161  }
162  auto j = upper_bound(i, positions.cend(), termpos_last);
163  size_t size_before = positions.size();
164  positions.erase(i, j);
165  return Xapian::termpos(size_before - positions.size());
166 }
bool add_position(Xapian::termcount wdf_inc, Xapian::termpos termpos)
Add a position.
Definition: terminfo.cc:43
bool remove_position(Xapian::termpos termpos)
Remove a position.
Definition: terminfo.cc:110
void merge() const
Merge sorted ranges before and after split.
Definition: terminfo.cc:33
Xapian::termpos remove_positions(Xapian::termpos termpos_first, Xapian::termpos termpos_last)
Remove a range of positions.
Definition: terminfo.cc:145
#define rare(COND)
Definition: config.h:607
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
Various assertion macros.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
#define Assert(COND)
Definition: omassert.h:122
Metadata for a term in a document.