xapian-core  1.4.27
stringutils.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004-2022 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_STRINGUTILS_H
22 #define XAPIAN_INCLUDED_STRINGUTILS_H
23 
24 #include <xapian/constinfo.h>
25 
26 #include <algorithm>
27 #include <string>
28 #include <cstring>
29 
33 #define STRINGIZE_(X) #X
34 
36 #define STRINGIZE(X) STRINGIZE_(X)
37 
43 #define CONST_STRLEN(S) (sizeof(S"") - 1)
44 
45 /* C++20 added starts_with(), ends_with() and contains() methods to std::string
46  * and std::string_view which provide this functionality, but we don't yet
47  * require C++20.
48  */
49 
50 inline bool
51 startswith(const std::string & s, char pfx)
52 {
53  return !s.empty() && s[0] == pfx;
54 }
55 
56 inline bool
57 startswith(const std::string & s, const char * pfx, size_t len)
58 {
59  return s.size() >= len && (std::memcmp(s.data(), pfx, len) == 0);
60 }
61 
62 inline bool
63 startswith(const std::string & s, const char * pfx)
64 {
65  return startswith(s, pfx, std::strlen(pfx));
66 }
67 
68 inline bool
69 startswith(const std::string & s, const std::string & pfx)
70 {
71  return startswith(s, pfx.data(), pfx.size());
72 }
73 
74 inline bool
75 endswith(const std::string & s, char sfx)
76 {
77  return !s.empty() && s[s.size() - 1] == sfx;
78 }
79 
80 inline bool
81 endswith(const std::string & s, const char * sfx, size_t len)
82 {
83  return s.size() >= len && (std::memcmp(s.data() + s.size() - len, sfx, len) == 0);
84 }
85 
86 inline bool
87 endswith(const std::string & s, const char * sfx)
88 {
89  return endswith(s, sfx, std::strlen(sfx));
90 }
91 
92 inline bool
93 endswith(const std::string & s, const std::string & sfx)
94 {
95  return endswith(s, sfx.data(), sfx.size());
96 }
97 
98 inline bool
99 contains(const std::string& s, char substring)
100 {
101  return s.find(substring) != s.npos;
102 }
103 
104 inline bool
105 contains(const std::string& s, const char* substring, size_t len)
106 {
107  return s.find(substring, 0, len) != s.npos;
108 }
109 
110 inline bool
111 contains(const std::string& s, const char* substring)
112 {
113  return s.find(substring) != s.npos;
114 }
115 
116 inline bool
117 contains(const std::string& s, const std::string& substring)
118 {
119  return s.find(substring) != s.npos;
120 }
121 
122 inline std::string::size_type
123 common_prefix_length(const std::string &a, const std::string &b)
124 {
125  std::string::size_type minlen = std::min(a.size(), b.size());
126  std::string::size_type common;
127  for (common = 0; common < minlen; ++common) {
128  if (a[common] != b[common]) break;
129  }
130  return common;
131 }
132 
133 inline std::string::size_type
134 common_prefix_length(const std::string& a, const std::string& b,
135  std::string::size_type max_prefix_len)
136 {
137  std::string::size_type minlen = std::min({a.size(),
138  b.size(),
139  max_prefix_len});
140  std::string::size_type common;
141  for (common = 0; common < minlen; ++common) {
142  if (a[common] != b[common]) break;
143  }
144  return common;
145 }
146 
147 // Like C's isXXXXX() but:
148 // (a) always work in the C locale
149 // (b) handle signed char as well as unsigned char
150 // (c) have a suitable signature for use as predicates with find_if()
151 // (d) add negated versions isnotXXXXX() which are useful as predicates
152 
153 namespace Xapian {
154  namespace Internal {
155  const unsigned char HEX_MASK = 0x0f;
156  const unsigned char IS_UPPER = 0x10;
157  const unsigned char IS_ALPHA = 0x20; // NB Same as ASCII "case bit".
158  const unsigned char IS_DIGIT = 0x40;
159  const unsigned char IS_SPACE = 0x80;
160  }
161 }
162 
163 // FIXME: These functions assume ASCII or an ASCII compatible character set
164 // such as ISO-8859-N or UTF-8. EBCDIC would need some work (patches
165 // welcome!)
166 static_assert('\x20' == ' ', "character set isn't a superset of ASCII");
167 
168 // Add explicit conversion to bool to prevent compiler warning from "aCC +w":
169 // Warning (suggestion) 818: [...] # Type `int' is larger than type `bool',
170 // truncation in value may result.
171 
172 inline unsigned char C_tab_(char ch) {
173  const unsigned char * C_tab = Xapian::Internal::get_constinfo_()->C_tab;
174  return C_tab[static_cast<unsigned char>(ch)];
175 }
176 
177 inline bool C_isdigit(char ch) {
178  using namespace Xapian::Internal;
179  return bool(C_tab_(ch) & IS_DIGIT);
180 }
181 
182 inline bool C_isxdigit(char ch) {
183  using namespace Xapian::Internal;
184  // Include IS_DIGIT so '0' gives true.
185  return bool(C_tab_(ch) & (HEX_MASK|IS_DIGIT));
186 }
187 
188 inline bool C_isupper(char ch) {
189  using namespace Xapian::Internal;
190  return bool(C_tab_(ch) & IS_UPPER);
191 }
192 
193 inline bool C_islower(char ch) {
194  using namespace Xapian::Internal;
195  return (C_tab_(ch) & (IS_ALPHA|IS_UPPER)) == IS_ALPHA;
196 }
197 
198 inline bool C_isalpha(char ch) {
199  using namespace Xapian::Internal;
200  return bool(C_tab_(ch) & IS_ALPHA);
201 }
202 
203 inline bool C_isalnum(char ch) {
204  using namespace Xapian::Internal;
205  return bool(C_tab_(ch) & (IS_ALPHA|IS_DIGIT));
206 }
207 
208 inline bool C_isspace(char ch) {
209  using namespace Xapian::Internal;
210  return bool(C_tab_(ch) & IS_SPACE);
211 }
212 
213 inline bool C_isnotdigit(char ch) { return !C_isdigit(ch); }
214 inline bool C_isnotxdigit(char ch) { return !C_isxdigit(ch); }
215 inline bool C_isnotupper(char ch) { return !C_isupper(ch); }
216 inline bool C_isnotlower(char ch) { return !C_islower(ch); }
217 inline bool C_isnotalpha(char ch) { return !C_isalpha(ch); }
218 inline bool C_isnotalnum(char ch) { return !C_isalnum(ch); }
219 inline bool C_isnotspace(char ch) { return !C_isspace(ch); }
220 
221 inline char C_tolower(char ch) {
222  using namespace Xapian::Internal;
223  return ch | (C_tab_(ch) & IS_ALPHA);
224 }
225 
226 inline char C_toupper(char ch) {
227  using namespace Xapian::Internal;
228  return ch &~ (C_tab_(ch) & IS_ALPHA);
229 }
230 
231 inline int hex_digit(char ch) {
232  using namespace Xapian::Internal;
233  return C_tab_(ch) & HEX_MASK;
234 }
235 
243 inline char hex_decode(char ch1, char ch2) {
244  return char(hex_digit(ch1) << 4 | hex_digit(ch2));
245 }
246 
247 #endif // XAPIAN_INCLUDED_STRINGUTILS_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
bool endswith(const std::string &s, char sfx)
Definition: stringutils.h:75
unsigned char C_tab_(char ch)
Definition: stringutils.h:172
char C_tolower(char ch)
Definition: stringutils.h:221
int hex_digit(char ch)
Definition: stringutils.h:231
const unsigned char IS_DIGIT
Definition: stringutils.h:158
const unsigned char IS_UPPER
Definition: stringutils.h:156
bool contains(const std::string &s, char substring)
Definition: stringutils.h:99
const unsigned char IS_ALPHA
Definition: stringutils.h:157
char C_toupper(char ch)
Definition: stringutils.h:226
Mechanism for accessing a struct of constant information.
const struct constinfo * get_constinfo_()
Definition: constinfo.cc:43
bool C_isupper(char ch)
Definition: stringutils.h:188
bool C_islower(char ch)
Definition: stringutils.h:193
bool C_isdigit(char ch)
Definition: stringutils.h:177
bool C_isnotalpha(char ch)
Definition: stringutils.h:217
bool C_isspace(char ch)
Definition: stringutils.h:208
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:51
unsigned char C_tab[256]
Definition: constinfo.h:31
bool C_isalnum(char ch)
Definition: stringutils.h:203
bool C_isalpha(char ch)
Definition: stringutils.h:198
std::string::size_type common_prefix_length(const std::string &a, const std::string &b)
Definition: stringutils.h:123
char hex_decode(char ch1, char ch2)
Decode a pair of ASCII hex digits.
Definition: stringutils.h:243
bool C_isnotlower(char ch)
Definition: stringutils.h:216
bool C_isxdigit(char ch)
Definition: stringutils.h:182
bool C_isnotdigit(char ch)
Definition: stringutils.h:213
bool C_isnotxdigit(char ch)
Definition: stringutils.h:214
bool C_isnotspace(char ch)
Definition: stringutils.h:219
const unsigned char HEX_MASK
Definition: stringutils.h:155
bool C_isnotupper(char ch)
Definition: stringutils.h:215
bool C_isnotalnum(char ch)
Definition: stringutils.h:218
const unsigned char IS_SPACE
Definition: stringutils.h:159