27 static inline bool bad_cont(
unsigned char ch) {
28 return static_cast<signed char>(ch) >=
static_cast<signed char>(0xc0);
40 buf[0] = char(0xc0 | (ch >> 6));
41 buf[1] = char(0x80 | (ch & 0x3f));
45 buf[0] = char(0xe0 | (ch >> 12));
46 buf[1] = char(0x80 | ((ch >> 6) & 0x3f));
47 buf[2] = char(0x80 | (ch & 0x3f));
51 buf[0] = char(0xf0 | (ch >> 18));
52 buf[1] = char(0x80 | ((ch >> 12) & 0x3f));
53 buf[2] = char(0x80 | ((ch >> 6) & 0x3f));
54 buf[3] = char(0x80 | (ch & 0x3f));
66 Utf8Iterator::calculate_sequence_length() const noexcept
73 unsigned char ch = *
p;
81 if (ch < 0xc2)
return (ch < 0x80);
93 (
p[0] == 0xe0 &&
p[1] < 0xa0) ||
94 (
p[0] == 0xed &&
p[1] >= 0xa0))
102 (
p[0] == 0xf0 &&
p[1] < 0x90) ||
103 (
p[0] == 0xf4 &&
p[1] >= 0x90))
110 if (
p == NULL)
return unsigned(-1);
111 if (seqlen == 0) calculate_sequence_length();
112 unsigned char ch = *
p;
113 if (seqlen == 1)
return ch;
114 if (seqlen == 2)
return ((ch & 0x1f) << 6) | (
p[1] & 0x3f);
116 return ((ch & 0x0f) << 12) | ((
p[1] & 0x3f) << 6) | (
p[2] & 0x3f);
117 return ((ch & 0x07) << 18) | ((
p[1] & 0x3f) << 12) |
118 ((
p[2] & 0x3f) << 6) | (
p[3] & 0x3f);
122 Utf8Iterator::strict_deref() const noexcept
124 if (
p == NULL)
return unsigned(-1);
126 if (!calculate_sequence_length())
127 return unsigned(*
p) | 0x80000000;
129 unsigned char ch = *
p;
130 if (seqlen == 1)
return ch;
131 if (seqlen == 2)
return ((ch & 0x1f) << 6) | (
p[1] & 0x3f);
133 return ((ch & 0x0f) << 12) | ((
p[1] & 0x3f) << 6) | (
p[2] & 0x3f);
134 return ((ch & 0x07) << 18) | ((
p[1] & 0x3f) << 12) |
135 ((
p[2] & 0x3f) << 6) | (
p[3] & 0x3f);
unsigned nonascii_to_utf8(unsigned ch, char *buf)
Convert a single non-ASCII Unicode character to UTF-8.
The Xapian namespace contains public interfaces for the Xapian library.
const Query operator*(double factor, const Query &q)
Scale a Xapian::Query object using OP_SCALE_WEIGHT.
Unicode and UTF-8 related classes and functions.
static bool bad_cont(unsigned char ch)