xapian-core  1.4.22
stringutils.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004-2022 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef XAPIAN_INCLUDED_STRINGUTILS_H
22 #define XAPIAN_INCLUDED_STRINGUTILS_H
23 
24 #include <xapian/constinfo.h>
25 
26 #include <algorithm>
27 #include <string>
28 #include <cstring>
29 
33 #define STRINGIZE_(X) #X
34 
36 #define STRINGIZE(X) STRINGIZE_(X)
37 
43 #define CONST_STRLEN(S) (sizeof(S"") - 1)
44 
45 inline bool
46 startswith(const std::string & s, char pfx)
47 {
48  return !s.empty() && s[0] == pfx;
49 }
50 
51 inline bool
52 startswith(const std::string & s, const char * pfx, size_t len)
53 {
54  return s.size() >= len && (std::memcmp(s.data(), pfx, len) == 0);
55 }
56 
57 inline bool
58 startswith(const std::string & s, const char * pfx)
59 {
60  return startswith(s, pfx, std::strlen(pfx));
61 }
62 
63 inline bool
64 startswith(const std::string & s, const std::string & pfx)
65 {
66  return startswith(s, pfx.data(), pfx.size());
67 }
68 
69 inline bool
70 endswith(const std::string & s, char sfx)
71 {
72  return !s.empty() && s[s.size() - 1] == sfx;
73 }
74 
75 inline bool
76 endswith(const std::string & s, const char * sfx, size_t len)
77 {
78  return s.size() >= len && (std::memcmp(s.data() + s.size() - len, sfx, len) == 0);
79 }
80 
81 inline bool
82 endswith(const std::string & s, const char * sfx)
83 {
84  return endswith(s, sfx, std::strlen(sfx));
85 }
86 
87 inline bool
88 endswith(const std::string & s, const std::string & sfx)
89 {
90  return endswith(s, sfx.data(), sfx.size());
91 }
92 
93 inline std::string::size_type
94 common_prefix_length(const std::string &a, const std::string &b)
95 {
96  std::string::size_type minlen = std::min(a.size(), b.size());
97  std::string::size_type common;
98  for (common = 0; common < minlen; ++common) {
99  if (a[common] != b[common]) break;
100  }
101  return common;
102 }
103 
104 // Like C's isXXXXX() but:
105 // (a) always work in the C locale
106 // (b) handle signed char as well as unsigned char
107 // (c) have a suitable signature for use as predicates with find_if()
108 // (d) add negated versions isnotXXXXX() which are useful as predicates
109 
110 namespace Xapian {
111  namespace Internal {
112  const unsigned char HEX_MASK = 0x0f;
113  const unsigned char IS_UPPER = 0x10;
114  const unsigned char IS_ALPHA = 0x20; // NB Same as ASCII "case bit".
115  const unsigned char IS_DIGIT = 0x40;
116  const unsigned char IS_SPACE = 0x80;
117  }
118 }
119 
120 // FIXME: These functions assume ASCII or an ASCII compatible character set
121 // such as ISO-8859-N or UTF-8. EBCDIC would need some work (patches
122 // welcome!)
123 static_assert('\x20' == ' ', "character set isn't a superset of ASCII");
124 
125 // Add explicit conversion to bool to prevent compiler warning from "aCC +w":
126 // Warning (suggestion) 818: [...] # Type `int' is larger than type `bool',
127 // truncation in value may result.
128 
129 inline unsigned char C_tab_(char ch) {
130  const unsigned char * C_tab = Xapian::Internal::get_constinfo_()->C_tab;
131  return C_tab[static_cast<unsigned char>(ch)];
132 }
133 
134 inline bool C_isdigit(char ch) {
135  using namespace Xapian::Internal;
136  return bool(C_tab_(ch) & IS_DIGIT);
137 }
138 
139 inline bool C_isxdigit(char ch) {
140  using namespace Xapian::Internal;
141  // Include IS_DIGIT so '0' gives true.
142  return bool(C_tab_(ch) & (HEX_MASK|IS_DIGIT));
143 }
144 
145 inline bool C_isupper(char ch) {
146  using namespace Xapian::Internal;
147  return bool(C_tab_(ch) & IS_UPPER);
148 }
149 
150 inline bool C_islower(char ch) {
151  using namespace Xapian::Internal;
152  return (C_tab_(ch) & (IS_ALPHA|IS_UPPER)) == IS_ALPHA;
153 }
154 
155 inline bool C_isalpha(char ch) {
156  using namespace Xapian::Internal;
157  return bool(C_tab_(ch) & IS_ALPHA);
158 }
159 
160 inline bool C_isalnum(char ch) {
161  using namespace Xapian::Internal;
162  return bool(C_tab_(ch) & (IS_ALPHA|IS_DIGIT));
163 }
164 
165 inline bool C_isspace(char ch) {
166  using namespace Xapian::Internal;
167  return bool(C_tab_(ch) & IS_SPACE);
168 }
169 
170 inline bool C_isnotdigit(char ch) { return !C_isdigit(ch); }
171 inline bool C_isnotxdigit(char ch) { return !C_isxdigit(ch); }
172 inline bool C_isnotupper(char ch) { return !C_isupper(ch); }
173 inline bool C_isnotlower(char ch) { return !C_islower(ch); }
174 inline bool C_isnotalpha(char ch) { return !C_isalpha(ch); }
175 inline bool C_isnotalnum(char ch) { return !C_isalnum(ch); }
176 inline bool C_isnotspace(char ch) { return !C_isspace(ch); }
177 
178 inline char C_tolower(char ch) {
179  using namespace Xapian::Internal;
180  return ch | (C_tab_(ch) & IS_ALPHA);
181 }
182 
183 inline char C_toupper(char ch) {
184  using namespace Xapian::Internal;
185  return ch &~ (C_tab_(ch) & IS_ALPHA);
186 }
187 
188 inline int hex_digit(char ch) {
189  using namespace Xapian::Internal;
190  return C_tab_(ch) & HEX_MASK;
191 }
192 
200 inline char hex_decode(char ch1, char ch2) {
201  return char(hex_digit(ch1) << 4 | hex_digit(ch2));
202 }
203 
204 #endif // XAPIAN_INCLUDED_STRINGUTILS_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
bool endswith(const std::string &s, char sfx)
Definition: stringutils.h:70
unsigned char C_tab_(char ch)
Definition: stringutils.h:129
char C_tolower(char ch)
Definition: stringutils.h:178
int hex_digit(char ch)
Definition: stringutils.h:188
const unsigned char IS_DIGIT
Definition: stringutils.h:115
const unsigned char IS_UPPER
Definition: stringutils.h:113
const unsigned char IS_ALPHA
Definition: stringutils.h:114
char C_toupper(char ch)
Definition: stringutils.h:183
Mechanism for accessing a struct of constant information.
const struct constinfo * get_constinfo_()
Definition: constinfo.cc:43
bool C_isupper(char ch)
Definition: stringutils.h:145
bool C_islower(char ch)
Definition: stringutils.h:150
bool C_isdigit(char ch)
Definition: stringutils.h:134
bool C_isnotalpha(char ch)
Definition: stringutils.h:174
bool C_isspace(char ch)
Definition: stringutils.h:165
bool startswith(const std::string &s, char pfx)
Definition: stringutils.h:46
unsigned char C_tab[256]
Definition: constinfo.h:31
bool C_isalnum(char ch)
Definition: stringutils.h:160
bool C_isalpha(char ch)
Definition: stringutils.h:155
std::string::size_type common_prefix_length(const std::string &a, const std::string &b)
Definition: stringutils.h:94
char hex_decode(char ch1, char ch2)
Decode a pair of ASCII hex digits.
Definition: stringutils.h:200
bool C_isnotlower(char ch)
Definition: stringutils.h:173
bool C_isxdigit(char ch)
Definition: stringutils.h:139
bool C_isnotdigit(char ch)
Definition: stringutils.h:170
bool C_isnotxdigit(char ch)
Definition: stringutils.h:171
bool C_isnotspace(char ch)
Definition: stringutils.h:176
const unsigned char HEX_MASK
Definition: stringutils.h:112
bool C_isnotupper(char ch)
Definition: stringutils.h:172
bool C_isnotalnum(char ch)
Definition: stringutils.h:175
const unsigned char IS_SPACE
Definition: stringutils.h:116