xapian-core  2.0.0
stringutils.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004-2023 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_STRINGUTILS_H
22 #define XAPIAN_INCLUDED_STRINGUTILS_H
23 
24 // Hack to allow inclusion from xapian-omega.
25 // FIXME: Move C_isalpha(), etc to the public API?
26 #define XAPIAN_IN_XAPIAN_H
27 #include <xapian/constinfo.h>
28 #undef XAPIAN_IN_XAPIAN_H
29 
30 #include <algorithm>
31 #include <string>
32 #include <string_view>
33 #include <cstring>
34 
38 #define STRINGIZE_(X) #X
39 
41 #define STRINGIZE(X) STRINGIZE_(X)
42 
48 #define CONST_STRLEN(S) (sizeof(S"") - 1)
49 
50 /* C++20 added starts_with(), ends_with() and contains() methods to std::string
51  * and std::string_view which provide this functionality, but we don't yet
52  * require C++20.
53  */
54 
55 inline bool
56 startswith(std::string_view s, char pfx)
57 {
58  return !s.empty() && s[0] == pfx;
59 }
60 
61 inline bool
62 startswith(std::string_view s, const char* pfx, size_t len)
63 {
64  return s.size() >= len && (std::memcmp(s.data(), pfx, len) == 0);
65 }
66 
67 inline bool
68 startswith(std::string_view s, const char* pfx)
69 {
70  return startswith(s, pfx, std::strlen(pfx));
71 }
72 
73 inline bool
74 startswith(std::string_view s, std::string_view pfx)
75 {
76  return startswith(s, pfx.data(), pfx.size());
77 }
78 
79 inline bool
80 endswith(std::string_view s, char sfx)
81 {
82  return !s.empty() && s[s.size() - 1] == sfx;
83 }
84 
85 inline bool
86 endswith(std::string_view s, const char* sfx, size_t len)
87 {
88  return s.size() >= len && (std::memcmp(s.data() + s.size() - len, sfx, len) == 0);
89 }
90 
91 inline bool
92 endswith(std::string_view s, const char* sfx)
93 {
94  return endswith(s, sfx, std::strlen(sfx));
95 }
96 
97 inline bool
98 endswith(std::string_view s, std::string_view sfx)
99 {
100  return endswith(s, sfx.data(), sfx.size());
101 }
102 
103 inline bool
104 contains(std::string_view s, char substring)
105 {
106  return s.find(substring) != s.npos;
107 }
108 
109 inline bool
110 contains(std::string_view s, const char* substring, size_t len)
111 {
112  return s.find(substring, 0, len) != s.npos;
113 }
114 
115 inline bool
116 contains(std::string_view s, const char* substring)
117 {
118  return s.find(substring) != s.npos;
119 }
120 
121 inline bool
122 contains(std::string_view s, std::string_view substring)
123 {
124  return s.find(substring) != s.npos;
125 }
126 
127 inline std::string::size_type
128 common_prefix_length(std::string_view a, std::string_view b)
129 {
130  std::string::size_type minlen = std::min(a.size(), b.size());
131  std::string::size_type common;
132  for (common = 0; common < minlen; ++common) {
133  if (a[common] != b[common]) break;
134  }
135  return common;
136 }
137 
138 inline std::string::size_type
139 common_prefix_length(std::string_view a, std::string_view b,
140  std::string::size_type max_prefix_len)
141 {
142  std::string::size_type minlen = std::min({a.size(),
143  b.size(),
144  max_prefix_len});
145  std::string::size_type common;
146  for (common = 0; common < minlen; ++common) {
147  if (a[common] != b[common]) break;
148  }
149  return common;
150 }
151 
152 // Like C's isXXXXX() but:
153 // (a) always work in the C locale
154 // (b) handle signed char as well as unsigned char
155 // (c) have a suitable signature for use as predicates with find_if()
156 // (d) add negated versions isnotXXXXX() which are useful as predicates
157 
158 namespace Xapian {
159  namespace Internal {
160  const unsigned char HEX_MASK = 0x0f;
161  const unsigned char IS_UPPER = 0x10;
162  const unsigned char IS_ALPHA = 0x20; // NB Same as ASCII "case bit".
163  const unsigned char IS_DIGIT = 0x40;
164  const unsigned char IS_SPACE = 0x80;
165  }
166 }
167 
168 // FIXME: These functions assume ASCII or an ASCII compatible character set
169 // such as ISO-8859-N or UTF-8. EBCDIC would need some work (patches
170 // welcome!)
171 static_assert('\x20' == ' ', "character set isn't a superset of ASCII");
172 
173 // Add explicit conversion to bool to prevent compiler warning from "aCC +w":
174 // Warning (suggestion) 818: [...] # Type `int' is larger than type `bool',
175 // truncation in value may result.
176 
177 inline unsigned char C_tab_(char ch) {
178  const unsigned char * C_tab = Xapian::Internal::get_constinfo_()->C_tab;
179  return C_tab[static_cast<unsigned char>(ch)];
180 }
181 
182 inline bool C_isdigit(char ch) {
183  using namespace Xapian::Internal;
184  return bool(C_tab_(ch) & IS_DIGIT);
185 }
186 
187 inline bool C_isxdigit(char ch) {
188  using namespace Xapian::Internal;
189  // Include IS_DIGIT so '0' gives true.
190  return bool(C_tab_(ch) & (HEX_MASK|IS_DIGIT));
191 }
192 
193 inline bool C_isupper(char ch) {
194  using namespace Xapian::Internal;
195  return bool(C_tab_(ch) & IS_UPPER);
196 }
197 
198 inline bool C_islower(char ch) {
199  using namespace Xapian::Internal;
200  return (C_tab_(ch) & (IS_ALPHA|IS_UPPER)) == IS_ALPHA;
201 }
202 
203 inline bool C_isalpha(char ch) {
204  using namespace Xapian::Internal;
205  return bool(C_tab_(ch) & IS_ALPHA);
206 }
207 
208 inline bool C_isalnum(char ch) {
209  using namespace Xapian::Internal;
210  return bool(C_tab_(ch) & (IS_ALPHA|IS_DIGIT));
211 }
212 
213 inline bool C_isspace(char ch) {
214  using namespace Xapian::Internal;
215  return bool(C_tab_(ch) & IS_SPACE);
216 }
217 
218 inline bool C_isnotdigit(char ch) { return !C_isdigit(ch); }
219 inline bool C_isnotxdigit(char ch) { return !C_isxdigit(ch); }
220 inline bool C_isnotupper(char ch) { return !C_isupper(ch); }
221 inline bool C_isnotlower(char ch) { return !C_islower(ch); }
222 inline bool C_isnotalpha(char ch) { return !C_isalpha(ch); }
223 inline bool C_isnotalnum(char ch) { return !C_isalnum(ch); }
224 inline bool C_isnotspace(char ch) { return !C_isspace(ch); }
225 
226 inline char C_tolower(char ch) {
227  using namespace Xapian::Internal;
228  return ch | (C_tab_(ch) & IS_ALPHA);
229 }
230 
231 inline char C_toupper(char ch) {
232  using namespace Xapian::Internal;
233  return ch &~ (C_tab_(ch) & IS_ALPHA);
234 }
235 
236 inline int hex_digit(char ch) {
237  using namespace Xapian::Internal;
238  return C_tab_(ch) & HEX_MASK;
239 }
240 
248 inline char hex_decode(char ch1, char ch2) {
249  return char(hex_digit(ch1) << 4 | hex_digit(ch2));
250 }
251 
252 #endif // XAPIAN_INCLUDED_STRINGUTILS_H
Mechanism for accessing a struct of constant information.
const unsigned char IS_ALPHA
Definition: stringutils.h:162
const unsigned char HEX_MASK
Definition: stringutils.h:160
const unsigned char IS_SPACE
Definition: stringutils.h:164
const struct constinfo * get_constinfo_() noexcept
Definition: constinfo.cc:43
const unsigned char IS_DIGIT
Definition: stringutils.h:163
const unsigned char IS_UPPER
Definition: stringutils.h:161
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
bool C_isupper(char ch)
Definition: stringutils.h:193
bool C_isalnum(char ch)
Definition: stringutils.h:208
bool C_isnotalnum(char ch)
Definition: stringutils.h:223
bool C_isalpha(char ch)
Definition: stringutils.h:203
char hex_decode(char ch1, char ch2)
Decode a pair of ASCII hex digits.
Definition: stringutils.h:248
bool C_isnotalpha(char ch)
Definition: stringutils.h:222
bool C_isspace(char ch)
Definition: stringutils.h:213
int hex_digit(char ch)
Definition: stringutils.h:236
bool endswith(std::string_view s, char sfx)
Definition: stringutils.h:80
char C_toupper(char ch)
Definition: stringutils.h:231
bool C_isnotdigit(char ch)
Definition: stringutils.h:218
bool C_isxdigit(char ch)
Definition: stringutils.h:187
bool startswith(std::string_view s, char pfx)
Definition: stringutils.h:56
std::string::size_type common_prefix_length(std::string_view a, std::string_view b)
Definition: stringutils.h:128
bool C_isnotlower(char ch)
Definition: stringutils.h:221
bool C_isnotxdigit(char ch)
Definition: stringutils.h:219
bool C_isnotupper(char ch)
Definition: stringutils.h:220
bool C_isdigit(char ch)
Definition: stringutils.h:182
bool contains(std::string_view s, char substring)
Definition: stringutils.h:104
bool C_isnotspace(char ch)
Definition: stringutils.h:224
bool C_islower(char ch)
Definition: stringutils.h:198
unsigned char C_tab_(char ch)
Definition: stringutils.h:177
char C_tolower(char ch)
Definition: stringutils.h:226
unsigned char C_tab[256]
Definition: constinfo.h:31