72 #define CREATE_SIZE 16
75 SnowballStemImplementation::create_s()
78 if (mem == NULL)
throw std::bad_alloc();
94 SnowballStemImplementation::skip_utf8(
const symbol * p,
int c,
int lb,
int l,
int n)
98 if (c >= l)
return -1;
102 if (p[c] >> 6 != 2)
break;
109 if (c <= lb)
return -1;
110 if (p[--c] >= 0x80) {
112 if (p[c] >= 0xC0)
break;
126 SnowballStemImplementation::increase_size(
symbol * p,
int n)
128 int new_size = n + 20;
129 void * mem = realloc(
reinterpret_cast<char *
>(p) -
HEAD,
132 throw std::bad_alloc();
135 SET_CAPACITY(q, new_size);
140 StemImplementation::~StemImplementation() { }
142 SnowballStemImplementation::~SnowballStemImplementation()
148 SnowballStemImplementation::operator()(
const string & word)
150 const symbol * s =
reinterpret_cast<const symbol *
>(word.data());
151 replace_s(0, l, word.size(), s);
157 return string(
reinterpret_cast<const char *
>(p), l);
165 if (tmp >= l)
return 0;
167 if (b0 < 0xC0 || tmp == l) {
171 b1 = p[tmp++] & 0x3F;
172 if (b0 < 0xE0 || tmp == l) {
173 *slot = (b0 & 0x1F) << 6 | b1;
176 b2 = p[tmp++] & 0x3F;
177 if (b0 < 0xF0 || tmp == l) {
178 *slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
181 *slot = (b0 & 0x7) << 18 | b1 << 12 | b2 << 6 | (p[tmp] & 0x3F);
185 int SnowballStemImplementation::get_b_utf8(
int * slot) {
188 if (tmp <= lb)
return 0;
190 if (b < 0x80 || tmp == lb) {
196 if (b >= 0xC0 || tmp == lb) {
197 *slot = (b & 0x1F) << 6 | a;
200 a |= (b & 0x3F) << 6;
202 if (b >= 0xE0 || tmp == lb) {
203 *slot = (b & 0xF) << 12 | a;
206 *slot = (p[--tmp] & 0x7) << 18 | (b & 0x3F) << 12 | a;
211 SnowballStemImplementation::in_grouping_U(
const unsigned char * s,
int min,
218 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
226 SnowballStemImplementation::in_grouping_b_U(
const unsigned char * s,
int min,
231 int w = get_b_utf8(&ch);
233 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
241 SnowballStemImplementation::out_grouping_U(
const unsigned char * s,
int min,
248 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
256 SnowballStemImplementation::out_grouping_b_U(
const unsigned char * s,
int min,
261 int w = get_b_utf8(&ch);
263 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
270 int SnowballStemImplementation::eq_s(
int s_size,
const symbol * s) {
271 if (l - c < s_size || memcmp(p + c, s, s_size *
sizeof(
symbol)) != 0)
277 int SnowballStemImplementation::eq_s_b(
int s_size,
const symbol * s) {
278 if (c - lb < s_size || memcmp(p + c - s_size, s, s_size *
sizeof(
symbol)) != 0)
285 SnowballStemImplementation::find_among(
const symbol * pool,
286 const struct among * v,
int v_size,
287 const unsigned char * fnum,
299 int first_key_inspected = 0;
302 int k = i + ((j - i) >> 1);
304 int common = common_i < common_j ? common_i : common_j;
305 const struct among * w = v + k;
306 for (
int x = common; x < w->
s_size; ++x) {
307 if (c_orig + common == l) { diff = -1;
break; }
308 diff = q[common] - (pool + w->
s)[x];
309 if (diff != 0)
break;
327 if (first_key_inspected)
break;
328 first_key_inspected = 1;
332 const struct among * w = v + i;
333 if (common_i >= w->
s_size) {
335 if (!fnum || !fnum[i])
return w->
result;
337 int res = f[fnum[i] - 1](
this);
339 if (res)
return w->
result;
349 SnowballStemImplementation::find_among_b(
const symbol * pool,
350 const struct among * v,
int v_size,
351 const unsigned char * fnum,
357 const symbol * q = p + c - 1;
363 int first_key_inspected = 0;
366 int k = i + ((j - i) >> 1);
368 int common = common_i < common_j ? common_i : common_j;
369 const struct among * w = v + k;
370 for (
int x = w->
s_size - 1 - common; x >= 0; --x) {
371 if (c_orig - common == lb) { diff = -1;
break; }
372 diff = q[- common] - (pool + w->
s)[x];
373 if (diff != 0)
break;
376 if (diff < 0) { j = k; common_j = common; }
377 else { i = k; common_i = common; }
381 if (first_key_inspected)
break;
382 first_key_inspected = 1;
386 const struct among * w = v + i;
387 if (common_i >= w->
s_size) {
389 if (!fnum || !fnum[i])
return w->
result;
391 int res = f[fnum[i] - 1](
this);
393 if (res)
return w->
result;
410 if (adjustment != 0) {
411 if (adjustment + len >
CAPACITY(p)) {
412 p = increase_size(p, adjustment + len);
414 memmove(p +
c_ket + adjustment,
417 SET_SIZE(p, adjustment + len);
428 int SnowballStemImplementation::slice_check() {
430 if (bra < 0 || bra > ket || ket > l) {
432 fprintf(stderr,
"faulty slice operation:\n");
440 int SnowballStemImplementation::slice_from_s(
int s_size,
const symbol *
s) {
441 if (slice_check())
return -1;
442 replace_s(bra, ket,
s_size,
s);
451 if (
c_bra <= bra) bra += adjustment;
452 if (
c_bra <= ket) ket += adjustment;
456 if (slice_check())
return NULL;
460 v = increase_size(v, len);
462 memmove(v, p + bra, len *
sizeof(
symbol));
471 v = increase_size(v, len);
473 memmove(v, p, len *
sizeof(
symbol));
478 int SnowballStemImplementation::len_utf8(
const symbol * v) {
483 if (
static_cast<signed char>(
b) >=
static_cast<signed char>(0xc0))
490 void SnowballStemImplementation::debug(
int number,
int line_count) {
493 if (
number >= 0) printf(
"%3d (line %4d): [%d]'",
number, line_count, limit);
494 for (i = 0; i <= limit; ++i) {
495 if (lb == i) printf(
"{");
496 if (bra == i) printf(
"[");
497 if (c == i) printf(
"|");
498 if (ket == i) printf(
"]");
499 if (l == i) printf(
"}");
502 if (ch == 0) ch =
'#';
InternalError indicates a runtime problem of some sort.
Hierarchy of classes which Xapian can throw as exceptions.
The Xapian namespace contains public interfaces for the Xapian library.
Various assertion macros.
Base class for implementations of stemming algorithms.
int(* among_function)(Xapian::StemImplementation *)