29 static inline bool bad_cont(
unsigned char ch) {
30 return static_cast<signed char>(ch) >= static_cast<signed char>(0xc0);
42 buf[0] = char(0xc0 | (ch >> 6));
43 buf[1] = char(0x80 | (ch & 0x3f));
47 buf[0] = char(0xe0 | (ch >> 12));
48 buf[1] = char(0x80 | ((ch >> 6) & 0x3f));
49 buf[2] = char(0x80 | (ch & 0x3f));
53 buf[0] = char(0xf0 | (ch >> 18));
54 buf[1] = char(0x80 | ((ch >> 12) & 0x3f));
55 buf[2] = char(0x80 | ((ch >> 6) & 0x3f));
56 buf[3] = char(0x80 | (ch & 0x3f));
67 Utf8Iterator::Utf8Iterator(
const char* p_)
69 assign(p_, strlen(p_));
80 unsigned char ch = *p;
88 if (ch < 0xc2)
return (ch < 0x80);
100 (p[0] == 0xe0 && p[1] < 0xa0))
108 (p[0] == 0xf0 && p[1] < 0x90) ||
109 (p[0] == 0xf4 && p[1] >= 0x90))
116 if (p == NULL)
return unsigned(-1);
117 if (seqlen == 0) calculate_sequence_length();
118 unsigned char ch = *p;
119 if (seqlen == 1)
return ch;
120 if (seqlen == 2)
return ((ch & 0x1f) << 6) | (p[1] & 0x3f);
122 return ((ch & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f);
123 return ((ch & 0x07) << 18) | ((p[1] & 0x3f) << 12) |
124 ((p[2] & 0x3f) << 6) | (p[3] & 0x3f);
130 if (p == NULL)
return unsigned(-1);
132 if (!calculate_sequence_length())
133 return unsigned(*p) | 0x80000000;
135 unsigned char ch = *p;
136 if (seqlen == 1)
return ch;
137 if (seqlen == 2)
return ((ch & 0x1f) << 6) | (p[1] & 0x3f);
139 return ((ch & 0x0f) << 12) | ((p[1] & 0x3f) << 6) | (p[2] & 0x3f);
140 return ((ch & 0x07) << 18) | ((p[1] & 0x3f) << 12) |
141 ((p[2] & 0x3f) << 6) | (p[3] & 0x3f);
Unicode and UTF-8 related classes and functions.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned nonascii_to_utf8(unsigned ch, char *buf)
Convert a single non-ASCII Unicode character to UTF-8.
const Query operator*(double factor, const Query &q)
Scale a Xapian::Query object using OP_SCALE_WEIGHT.
static bool bad_cont(unsigned char ch)