00001
00002
00003 #include <limits.h>
00004 #include "dutch.h"
00005
00006 static const symbol s_pool[] = {
00007 #define s_0_1 0
00008 0xC3, 0xA1,
00009 #define s_0_2 2
00010 0xC3, 0xA4,
00011 #define s_0_3 4
00012 0xC3, 0xA9,
00013 #define s_0_4 6
00014 0xC3, 0xAB,
00015 #define s_0_5 8
00016 0xC3, 0xAD,
00017 #define s_0_6 10
00018 0xC3, 0xAF,
00019 #define s_0_7 12
00020 0xC3, 0xB3,
00021 #define s_0_8 14
00022 0xC3, 0xB6,
00023 #define s_0_9 16
00024 0xC3, 0xBA,
00025 #define s_0_10 18
00026 0xC3, 0xBC,
00027 #define s_1_1 20
00028 'I',
00029 #define s_1_2 21
00030 'Y',
00031 #define s_2_0 22
00032 'd', 'd',
00033 #define s_2_1 24
00034 'k', 'k',
00035 #define s_2_2 26
00036 't', 't',
00037 #define s_3_0 28
00038 'e', 'n', 'e',
00039 #define s_3_1 31
00040 's', 'e',
00041 #define s_3_2 s_3_0
00042 #define s_3_3 33
00043 'h', 'e', 'd', 'e', 'n',
00044 #define s_3_4 s_3_1
00045 #define s_4_0 38
00046 'e', 'n', 'd',
00047 #define s_4_1 41
00048 'i', 'g',
00049 #define s_4_2 43
00050 'i', 'n', 'g',
00051 #define s_4_3 46
00052 'l', 'i', 'j', 'k',
00053 #define s_4_4 50
00054 'b', 'a', 'a', 'r',
00055 #define s_4_5 54
00056 'b', 'a', 'r',
00057 #define s_5_0 57
00058 'a', 'a',
00059 #define s_5_1 59
00060 'e', 'e',
00061 #define s_5_2 61
00062 'o', 'o',
00063 #define s_5_3 63
00064 'u', 'u',
00065 };
00066
00067
00068 static const struct among a_0[11] =
00069 {
00070 { 0, 0, -1, 6},
00071 { 2, s_0_1, 0, 1},
00072 { 2, s_0_2, 0, 1},
00073 { 2, s_0_3, 0, 2},
00074 { 2, s_0_4, 0, 2},
00075 { 2, s_0_5, 0, 3},
00076 { 2, s_0_6, 0, 3},
00077 { 2, s_0_7, 0, 4},
00078 { 2, s_0_8, 0, 4},
00079 { 2, s_0_9, 0, 5},
00080 { 2, s_0_10, 0, 5}
00081 };
00082
00083
00084 static const struct among a_1[3] =
00085 {
00086 { 0, 0, -1, 3},
00087 { 1, s_1_1, 0, 2},
00088 { 1, s_1_2, 0, 1}
00089 };
00090
00091
00092 static const struct among a_2[3] =
00093 {
00094 { 2, s_2_0, -1, -1},
00095 { 2, s_2_1, -1, -1},
00096 { 2, s_2_2, -1, -1}
00097 };
00098
00099
00100 static const struct among a_3[5] =
00101 {
00102 { 3, s_3_0, -1, 2},
00103 { 2, s_3_1, -1, 3},
00104 { 2, s_3_2, -1, 2},
00105 { 5, s_3_3, 2, 1},
00106 { 1, s_3_4, -1, 3}
00107 };
00108
00109
00110 static const struct among a_4[6] =
00111 {
00112 { 3, s_4_0, -1, 1},
00113 { 2, s_4_1, -1, 2},
00114 { 3, s_4_2, -1, 1},
00115 { 4, s_4_3, -1, 3},
00116 { 4, s_4_4, -1, 4},
00117 { 3, s_4_5, -1, 5}
00118 };
00119
00120
00121 static const struct among a_5[4] =
00122 {
00123 { 2, s_5_0, -1, -1},
00124 { 2, s_5_1, -1, -1},
00125 { 2, s_5_2, -1, -1},
00126 { 2, s_5_3, -1, -1}
00127 };
00128
00129 static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
00130
00131 static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
00132
00133 static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
00134
00135 static const symbol s_0[] = { 'a' };
00136 static const symbol s_1[] = { 'e' };
00137 static const symbol s_2[] = { 'i' };
00138 static const symbol s_3[] = { 'o' };
00139 static const symbol s_4[] = { 'u' };
00140 static const symbol s_5[] = { 'Y' };
00141 static const symbol s_6[] = { 'I' };
00142 static const symbol s_7[] = { 'Y' };
00143 static const symbol s_8[] = { 'y' };
00144 static const symbol s_9[] = { 'i' };
00145 static const symbol s_10[] = { 'g', 'e', 'm' };
00146 static const symbol s_11[] = { 'h', 'e', 'i', 'd' };
00147 static const symbol s_12[] = { 'h', 'e', 'i', 'd' };
00148 static const symbol s_13[] = { 'e', 'n' };
00149 static const symbol s_14[] = { 'i', 'g' };
00150
00151 int Xapian::InternalStemDutch::r_prelude() {
00152 int among_var;
00153 { int c_test1 = c;
00154 while(1) {
00155 int c2 = c;
00156 bra = c;
00157 if (c + 1 >= l || p[c + 1] >> 5 != 5 || !((340306450 >> (p[c + 1] & 0x1f)) & 1)) among_var = 6; else
00158 among_var = find_among(s_pool, a_0, 11, 0, 0);
00159 if (!(among_var)) goto lab0;
00160 ket = c;
00161 switch(among_var) {
00162 case 0: goto lab0;
00163 case 1:
00164 { int ret = slice_from_s(1, s_0);
00165 if (ret < 0) return ret;
00166 }
00167 break;
00168 case 2:
00169 { int ret = slice_from_s(1, s_1);
00170 if (ret < 0) return ret;
00171 }
00172 break;
00173 case 3:
00174 { int ret = slice_from_s(1, s_2);
00175 if (ret < 0) return ret;
00176 }
00177 break;
00178 case 4:
00179 { int ret = slice_from_s(1, s_3);
00180 if (ret < 0) return ret;
00181 }
00182 break;
00183 case 5:
00184 { int ret = slice_from_s(1, s_4);
00185 if (ret < 0) return ret;
00186 }
00187 break;
00188 case 6:
00189 { int ret = skip_utf8(p, c, 0, l, 1);
00190 if (ret < 0) goto lab0;
00191 c = ret;
00192 }
00193 break;
00194 }
00195 continue;
00196 lab0:
00197 c = c2;
00198 break;
00199 }
00200 c = c_test1;
00201 }
00202 { int c3 = c;
00203 bra = c;
00204 if (c == l || p[c] != 'y') { c = c3; goto lab1; }
00205 c++;
00206 ket = c;
00207 { int ret = slice_from_s(1, s_5);
00208 if (ret < 0) return ret;
00209 }
00210 lab1:
00211 ;
00212 }
00213 while(1) {
00214 int c4 = c;
00215 while(1) {
00216 int c5 = c;
00217 if (in_grouping_U(g_v, 97, 232, 0)) goto lab3;
00218 bra = c;
00219 { int c6 = c;
00220 if (c == l || p[c] != 'i') goto lab5;
00221 c++;
00222 ket = c;
00223 if (in_grouping_U(g_v, 97, 232, 0)) goto lab5;
00224 { int ret = slice_from_s(1, s_6);
00225 if (ret < 0) return ret;
00226 }
00227 goto lab4;
00228 lab5:
00229 c = c6;
00230 if (c == l || p[c] != 'y') goto lab3;
00231 c++;
00232 ket = c;
00233 { int ret = slice_from_s(1, s_7);
00234 if (ret < 0) return ret;
00235 }
00236 }
00237 lab4:
00238 c = c5;
00239 break;
00240 lab3:
00241 c = c5;
00242 { int ret = skip_utf8(p, c, 0, l, 1);
00243 if (ret < 0) goto lab2;
00244 c = ret;
00245 }
00246 }
00247 continue;
00248 lab2:
00249 c = c4;
00250 break;
00251 }
00252 return 1;
00253 }
00254
00255 int Xapian::InternalStemDutch::r_mark_regions() {
00256 I_p1 = l;
00257 I_p2 = l;
00258 { int ret = out_grouping_U(g_v, 97, 232, 1);
00259 if (ret < 0) return 0;
00260 c += ret;
00261 }
00262 { int ret = in_grouping_U(g_v, 97, 232, 1);
00263 if (ret < 0) return 0;
00264 c += ret;
00265 }
00266 I_p1 = c;
00267
00268 if (!(I_p1 < 3)) goto lab0;
00269 I_p1 = 3;
00270 lab0:
00271 { int ret = out_grouping_U(g_v, 97, 232, 1);
00272 if (ret < 0) return 0;
00273 c += ret;
00274 }
00275 { int ret = in_grouping_U(g_v, 97, 232, 1);
00276 if (ret < 0) return 0;
00277 c += ret;
00278 }
00279 I_p2 = c;
00280 return 1;
00281 }
00282
00283 int Xapian::InternalStemDutch::r_postlude() {
00284 int among_var;
00285 while(1) {
00286 int c1 = c;
00287 bra = c;
00288 if (c >= l || (p[c + 0] != 73 && p[c + 0] != 89)) among_var = 3; else
00289 among_var = find_among(s_pool, a_1, 3, 0, 0);
00290 if (!(among_var)) goto lab0;
00291 ket = c;
00292 switch(among_var) {
00293 case 0: goto lab0;
00294 case 1:
00295 { int ret = slice_from_s(1, s_8);
00296 if (ret < 0) return ret;
00297 }
00298 break;
00299 case 2:
00300 { int ret = slice_from_s(1, s_9);
00301 if (ret < 0) return ret;
00302 }
00303 break;
00304 case 3:
00305 { int ret = skip_utf8(p, c, 0, l, 1);
00306 if (ret < 0) goto lab0;
00307 c = ret;
00308 }
00309 break;
00310 }
00311 continue;
00312 lab0:
00313 c = c1;
00314 break;
00315 }
00316 return 1;
00317 }
00318
00319 int Xapian::InternalStemDutch::r_R1() {
00320 if (!(I_p1 <= c)) return 0;
00321 return 1;
00322 }
00323
00324 int Xapian::InternalStemDutch::r_R2() {
00325 if (!(I_p2 <= c)) return 0;
00326 return 1;
00327 }
00328
00329 int Xapian::InternalStemDutch::r_undouble() {
00330 { int m_test1 = l - c;
00331 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1050640 >> (p[c - 1] & 0x1f)) & 1)) return 0;
00332 if (!(find_among_b(s_pool, a_2, 3, 0, 0))) return 0;
00333 c = l - m_test1;
00334 }
00335 ket = c;
00336 { int ret = skip_utf8(p, c, lb, 0, -1);
00337 if (ret < 0) return 0;
00338 c = ret;
00339 }
00340 bra = c;
00341 if (slice_del() == -1) return -1;
00342 return 1;
00343 }
00344
00345 int Xapian::InternalStemDutch::r_e_ending() {
00346 B_e_found = 0;
00347 ket = c;
00348 if (c <= lb || p[c - 1] != 'e') return 0;
00349 c--;
00350 bra = c;
00351 { int ret = r_R1();
00352 if (ret <= 0) return ret;
00353 }
00354 { int m_test1 = l - c;
00355 if (out_grouping_b_U(g_v, 97, 232, 0)) return 0;
00356 c = l - m_test1;
00357 }
00358 if (slice_del() == -1) return -1;
00359 B_e_found = 1;
00360 { int ret = r_undouble();
00361 if (ret <= 0) return ret;
00362 }
00363 return 1;
00364 }
00365
00366 int Xapian::InternalStemDutch::r_en_ending() {
00367 { int ret = r_R1();
00368 if (ret <= 0) return ret;
00369 }
00370 { int m1 = l - c; (void)m1;
00371 if (out_grouping_b_U(g_v, 97, 232, 0)) return 0;
00372 c = l - m1;
00373 { int m2 = l - c; (void)m2;
00374 if (!(eq_s_b(3, s_10))) goto lab0;
00375 return 0;
00376 lab0:
00377 c = l - m2;
00378 }
00379 }
00380 if (slice_del() == -1) return -1;
00381 { int ret = r_undouble();
00382 if (ret <= 0) return ret;
00383 }
00384 return 1;
00385 }
00386
00387 int Xapian::InternalStemDutch::r_standard_suffix() {
00388 int among_var;
00389 { int m1 = l - c; (void)m1;
00390 ket = c;
00391 if (c <= lb || p[c - 1] >> 5 != 3 || !((540704 >> (p[c - 1] & 0x1f)) & 1)) goto lab0;
00392 among_var = find_among_b(s_pool, a_3, 5, 0, 0);
00393 if (!(among_var)) goto lab0;
00394 bra = c;
00395 switch(among_var) {
00396 case 0: goto lab0;
00397 case 1:
00398 { int ret = r_R1();
00399 if (ret == 0) goto lab0;
00400 if (ret < 0) return ret;
00401 }
00402 { int ret = slice_from_s(4, s_11);
00403 if (ret < 0) return ret;
00404 }
00405 break;
00406 case 2:
00407 { int ret = r_en_ending();
00408 if (ret == 0) goto lab0;
00409 if (ret < 0) return ret;
00410 }
00411 break;
00412 case 3:
00413 { int ret = r_R1();
00414 if (ret == 0) goto lab0;
00415 if (ret < 0) return ret;
00416 }
00417 if (out_grouping_b_U(g_v_j, 97, 232, 0)) goto lab0;
00418 if (slice_del() == -1) return -1;
00419 break;
00420 }
00421 lab0:
00422 c = l - m1;
00423 }
00424 { int m2 = l - c; (void)m2;
00425 { int ret = r_e_ending();
00426 if (ret == 0) goto lab1;
00427 if (ret < 0) return ret;
00428 }
00429 lab1:
00430 c = l - m2;
00431 }
00432 { int m3 = l - c; (void)m3;
00433 ket = c;
00434 if (!(eq_s_b(4, s_12))) goto lab2;
00435 bra = c;
00436 { int ret = r_R2();
00437 if (ret == 0) goto lab2;
00438 if (ret < 0) return ret;
00439 }
00440 { int m4 = l - c; (void)m4;
00441 if (c <= lb || p[c - 1] != 'c') goto lab3;
00442 c--;
00443 goto lab2;
00444 lab3:
00445 c = l - m4;
00446 }
00447 if (slice_del() == -1) return -1;
00448 ket = c;
00449 if (!(eq_s_b(2, s_13))) goto lab2;
00450 bra = c;
00451 { int ret = r_en_ending();
00452 if (ret == 0) goto lab2;
00453 if (ret < 0) return ret;
00454 }
00455 lab2:
00456 c = l - m3;
00457 }
00458 { int m5 = l - c; (void)m5;
00459 ket = c;
00460 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((264336 >> (p[c - 1] & 0x1f)) & 1)) goto lab4;
00461 among_var = find_among_b(s_pool, a_4, 6, 0, 0);
00462 if (!(among_var)) goto lab4;
00463 bra = c;
00464 switch(among_var) {
00465 case 0: goto lab4;
00466 case 1:
00467 { int ret = r_R2();
00468 if (ret == 0) goto lab4;
00469 if (ret < 0) return ret;
00470 }
00471 if (slice_del() == -1) return -1;
00472 { int m6 = l - c; (void)m6;
00473 ket = c;
00474 if (!(eq_s_b(2, s_14))) goto lab6;
00475 bra = c;
00476 { int ret = r_R2();
00477 if (ret == 0) goto lab6;
00478 if (ret < 0) return ret;
00479 }
00480 { int m7 = l - c; (void)m7;
00481 if (c <= lb || p[c - 1] != 'e') goto lab7;
00482 c--;
00483 goto lab6;
00484 lab7:
00485 c = l - m7;
00486 }
00487 if (slice_del() == -1) return -1;
00488 goto lab5;
00489 lab6:
00490 c = l - m6;
00491 { int ret = r_undouble();
00492 if (ret == 0) goto lab4;
00493 if (ret < 0) return ret;
00494 }
00495 }
00496 lab5:
00497 break;
00498 case 2:
00499 { int ret = r_R2();
00500 if (ret == 0) goto lab4;
00501 if (ret < 0) return ret;
00502 }
00503 { int m8 = l - c; (void)m8;
00504 if (c <= lb || p[c - 1] != 'e') goto lab8;
00505 c--;
00506 goto lab4;
00507 lab8:
00508 c = l - m8;
00509 }
00510 if (slice_del() == -1) return -1;
00511 break;
00512 case 3:
00513 { int ret = r_R2();
00514 if (ret == 0) goto lab4;
00515 if (ret < 0) return ret;
00516 }
00517 if (slice_del() == -1) return -1;
00518 { int ret = r_e_ending();
00519 if (ret == 0) goto lab4;
00520 if (ret < 0) return ret;
00521 }
00522 break;
00523 case 4:
00524 { int ret = r_R2();
00525 if (ret == 0) goto lab4;
00526 if (ret < 0) return ret;
00527 }
00528 if (slice_del() == -1) return -1;
00529 break;
00530 case 5:
00531 { int ret = r_R2();
00532 if (ret == 0) goto lab4;
00533 if (ret < 0) return ret;
00534 }
00535 if (!(B_e_found)) goto lab4;
00536 if (slice_del() == -1) return -1;
00537 break;
00538 }
00539 lab4:
00540 c = l - m5;
00541 }
00542 { int m9 = l - c; (void)m9;
00543 if (out_grouping_b_U(g_v_I, 73, 232, 0)) goto lab9;
00544 { int m_test10 = l - c;
00545 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((2129954 >> (p[c - 1] & 0x1f)) & 1)) goto lab9;
00546 if (!(find_among_b(s_pool, a_5, 4, 0, 0))) goto lab9;
00547 if (out_grouping_b_U(g_v, 97, 232, 0)) goto lab9;
00548 c = l - m_test10;
00549 }
00550 ket = c;
00551 { int ret = skip_utf8(p, c, lb, 0, -1);
00552 if (ret < 0) goto lab9;
00553 c = ret;
00554 }
00555 bra = c;
00556 if (slice_del() == -1) return -1;
00557 lab9:
00558 c = l - m9;
00559 }
00560 return 1;
00561 }
00562
00563 int Xapian::InternalStemDutch::stem() {
00564 { int c1 = c;
00565 { int ret = r_prelude();
00566 if (ret == 0) goto lab0;
00567 if (ret < 0) return ret;
00568 }
00569 lab0:
00570 c = c1;
00571 }
00572 { int c2 = c;
00573 { int ret = r_mark_regions();
00574 if (ret == 0) goto lab1;
00575 if (ret < 0) return ret;
00576 }
00577 lab1:
00578 c = c2;
00579 }
00580 lb = c; c = l;
00581
00582 { int m3 = l - c; (void)m3;
00583 { int ret = r_standard_suffix();
00584 if (ret == 0) goto lab2;
00585 if (ret < 0) return ret;
00586 }
00587 lab2:
00588 c = l - m3;
00589 }
00590 c = lb;
00591 { int c4 = c;
00592 { int ret = r_postlude();
00593 if (ret == 0) goto lab3;
00594 if (ret < 0) return ret;
00595 }
00596 lab3:
00597 c = c4;
00598 }
00599 return 1;
00600 }
00601
00602 Xapian::InternalStemDutch::InternalStemDutch()
00603 : I_p2(0), I_p1(0), B_e_found(0)
00604 {
00605 }
00606
00607 Xapian::InternalStemDutch::~InternalStemDutch()
00608 {
00609 }
00610
00611 std::string
00612 Xapian::InternalStemDutch::get_description() const
00613 {
00614 return "dutch";
00615 }