72 #define CREATE_SIZE 16 75 SnowballStemImplementation::create_s()
78 if (mem == NULL)
throw std::bad_alloc();
94 SnowballStemImplementation::skip_utf8(
const symbol * p,
int c,
int lb,
int l,
int n)
98 if (c >= l)
return -1;
102 if (p[c] >> 6 != 2)
break;
109 if (c <= lb)
return -1;
110 if (p[--c] >= 0x80) {
112 if (p[c] >= 0xC0)
break;
126 SnowballStemImplementation::increase_size(
symbol * p,
int n)
128 int new_size = n + 20;
129 void * mem = realloc(reinterpret_cast<char *>(p) -
HEAD,
132 throw std::bad_alloc();
135 SET_CAPACITY(q, new_size);
140 StemImplementation::~StemImplementation() { }
142 SnowballStemImplementation::~SnowballStemImplementation()
148 SnowballStemImplementation::operator()(
const string & word)
150 const symbol * s =
reinterpret_cast<const symbol *
>(word.data());
151 replace_s(0, l, word.size(), s);
157 return string(reinterpret_cast<const char *>(p), l);
165 if (tmp >= l)
return 0;
167 if (b0 < 0xC0 || tmp == l) {
171 b1 = p[tmp++] & 0x3F;
172 if (b0 < 0xE0 || tmp == l) {
173 *slot = (b0 & 0x1F) << 6 | b1;
176 b2 = p[tmp++] & 0x3F;
177 if (b0 < 0xF0 || tmp == l) {
178 *slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
181 *slot = (b0 & 0xE) << 18 | b1 << 12 | b2 << 6 | (p[tmp] & 0x3F);
185 int SnowballStemImplementation::get_b_utf8(
int * slot) {
188 if (tmp <= lb)
return 0;
190 if (b < 0x80 || tmp == lb) {
196 if (b >= 0xC0 || tmp == lb) {
197 *slot = (b & 0x1F) << 6 | a;
200 a |= (b & 0x3F) << 6;
202 if (b >= 0xE0 || tmp == lb) {
203 *slot = (b & 0xF) << 12 | a;
206 *slot = (p[--tmp] & 0xE) << 18 | (b & 0x3F) << 12 | a;
211 SnowballStemImplementation::in_grouping_U(
const unsigned char * s,
int min,
218 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
226 SnowballStemImplementation::in_grouping_b_U(
const unsigned char * s,
int min,
231 int w = get_b_utf8(&ch);
233 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
241 SnowballStemImplementation::out_grouping_U(
const unsigned char * s,
int min,
248 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
256 SnowballStemImplementation::out_grouping_b_U(
const unsigned char * s,
int min,
261 int w = get_b_utf8(&ch);
263 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
270 int SnowballStemImplementation::eq_s(
int s_size,
const symbol * s) {
271 if (l - c < s_size || memcmp(p + c, s, s_size *
sizeof(
symbol)) != 0)
277 int SnowballStemImplementation::eq_s_b(
int s_size,
const symbol * s) {
278 if (c - lb < s_size || memcmp(p + c - s_size, s, s_size *
sizeof(
symbol)) != 0)
285 SnowballStemImplementation::find_among(
const symbol * pool,
286 const struct among * v,
int v_size,
287 const unsigned char * fnum,
299 int first_key_inspected = 0;
302 int k = i + ((j - i) >> 1);
304 int common = common_i < common_j ? common_i : common_j;
305 const struct among * w = v + k;
306 for (
int x = common; x < w->
s_size; ++x) {
307 if (c_orig + common == l) { diff = -1;
break; }
308 diff = q[common] - (pool + w->
s)[x];
309 if (diff != 0)
break;
327 if (first_key_inspected)
break;
328 first_key_inspected = 1;
332 const struct among * w = v + i;
333 if (common_i >= w->
s_size) {
335 if (!fnum || !fnum[i])
return w->
result;
337 int res = f[fnum[i] - 1](
this);
339 if (res)
return w->
result;
349 SnowballStemImplementation::find_among_b(
const symbol * pool,
350 const struct among * v,
int v_size,
351 const unsigned char * fnum,
357 const symbol * q = p + c - 1;
363 int first_key_inspected = 0;
366 int k = i + ((j - i) >> 1);
368 int common = common_i < common_j ? common_i : common_j;
369 const struct among * w = v + k;
370 for (
int x = w->
s_size - 1 - common; x >= 0; --x) {
371 if (c_orig - common == lb) { diff = -1;
break; }
372 diff = q[- common] - (pool + w->
s)[x];
373 if (diff != 0)
break;
376 if (diff < 0) { j = k; common_j = common; }
377 else { i = k; common_i = common; }
381 if (first_key_inspected)
break;
382 first_key_inspected = 1;
386 const struct among * w = v + i;
387 if (common_i >= w->
s_size) {
389 if (!fnum || !fnum[i])
return w->
result;
391 int res = f[fnum[i] - 1](
this);
393 if (res)
return w->
result;
408 adjustment = s_size - (c_ket -
c_bra);
410 if (adjustment != 0) {
411 if (adjustment + len >
CAPACITY(p)) {
412 p = increase_size(p, adjustment + len);
414 memmove(p + c_ket + adjustment,
416 (len - c_ket) *
sizeof(
symbol));
417 SET_SIZE(p, adjustment + len);
424 if (s_size) memmove(p + c_bra, s, s_size *
sizeof(
symbol));
428 int SnowballStemImplementation::slice_check() {
430 if (bra < 0 || bra > ket || ket > l) {
432 fprintf(stderr,
"faulty slice operation:\n");
440 int SnowballStemImplementation::slice_from_s(
int s_size,
const symbol *
s) {
441 if (slice_check())
return -1;
442 replace_s(bra, ket, s_size, s);
450 int adjustment = replace_s(c_bra, c_ket, s_size, s);
451 if (c_bra <= bra) bra += adjustment;
452 if (c_bra <= ket) ket += adjustment;
456 if (slice_check())
return NULL;
460 v = increase_size(v, len);
462 memmove(v, p + bra, len *
sizeof(
symbol));
471 v = increase_size(v, len);
473 memmove(v, p, len *
sizeof(
symbol));
478 int SnowballStemImplementation::len_utf8(
const symbol * v) {
483 if (static_cast<signed char>(b) >= static_cast<signed char>(0xc0))
490 void SnowballStemImplementation::debug(
int number,
int line_count) {
493 if (number >= 0) printf(
"%3d (line %4d): [%d]'", number, line_count, limit);
494 for (i = 0; i <= limit; ++i) {
495 if (lb == i) printf(
"{");
496 if (bra == i) printf(
"[");
497 if (c == i) printf(
"|");
498 if (ket == i) printf(
"]");
499 if (l == i) printf(
"}");
502 if (ch == 0) ch =
'#';
The Xapian namespace contains public interfaces for the Xapian library.
Hierarchy of classes which Xapian can throw as exceptions.
Base class for implementations of stemming algorithms.
int(* among_function)(Xapian::StemImplementation *)
InternalError indicates a runtime problem of some sort.
Various assertion macros.