00001
00002
00003 #include <limits.h>
00004 #include "swedish.h"
00005
00006 static const symbol s_pool[] = {
00007 #define s_0_0 0
00008 'a',
00009 #define s_0_1 1
00010 'a', 'r', 'n', 'a',
00011 #define s_0_2 5
00012 'e', 'r', 'n', 'a',
00013 #define s_0_3 9
00014 'h', 'e', 't', 'e', 'r', 'n', 'a',
00015 #define s_0_4 16
00016 'o', 'r', 'n', 'a',
00017 #define s_0_5 20
00018 'a', 'd',
00019 #define s_0_6 22
00020 'e',
00021 #define s_0_7 23
00022 'a', 'd', 'e',
00023 #define s_0_8 26
00024 'a', 'n', 'd', 'e',
00025 #define s_0_9 30
00026 'a', 'r', 'n', 'e',
00027 #define s_0_10 34
00028 'a', 'r', 'e',
00029 #define s_0_11 37
00030 'a', 's', 't', 'e',
00031 #define s_0_12 41
00032 'e', 'n',
00033 #define s_0_13 43
00034 'a', 'n', 'd', 'e', 'n',
00035 #define s_0_14 48
00036 'a', 'r', 'e', 'n',
00037 #define s_0_15 52
00038 'h', 'e', 't', 'e', 'n',
00039 #define s_0_16 57
00040 'e', 'r', 'n',
00041 #define s_0_17 60
00042 'a', 'r',
00043 #define s_0_18 62
00044 'e', 'r',
00045 #define s_0_19 64
00046 'h', 'e', 't', 'e', 'r',
00047 #define s_0_20 69
00048 'o', 'r',
00049 #define s_0_21 71
00050 's',
00051 #define s_0_22 72
00052 'a', 's',
00053 #define s_0_23 74
00054 'a', 'r', 'n', 'a', 's',
00055 #define s_0_24 79
00056 'e', 'r', 'n', 'a', 's',
00057 #define s_0_25 84
00058 'o', 'r', 'n', 'a', 's',
00059 #define s_0_26 89
00060 'e', 's',
00061 #define s_0_27 91
00062 'a', 'd', 'e', 's',
00063 #define s_0_28 95
00064 'a', 'n', 'd', 'e', 's',
00065 #define s_0_29 100
00066 'e', 'n', 's',
00067 #define s_0_30 103
00068 'a', 'r', 'e', 'n', 's',
00069 #define s_0_31 108
00070 'h', 'e', 't', 'e', 'n', 's',
00071 #define s_0_32 114
00072 'e', 'r', 'n', 's',
00073 #define s_0_33 118
00074 'a', 't',
00075 #define s_0_34 120
00076 'a', 'n', 'd', 'e', 't',
00077 #define s_0_35 125
00078 'h', 'e', 't',
00079 #define s_0_36 128
00080 'a', 's', 't',
00081 #define s_1_0 131
00082 'd', 'd',
00083 #define s_1_1 133
00084 'g', 'd',
00085 #define s_1_2 135
00086 'n', 'n',
00087 #define s_1_3 137
00088 'd', 't',
00089 #define s_1_4 139
00090 'g', 't',
00091 #define s_1_5 141
00092 'k', 't',
00093 #define s_1_6 143
00094 't', 't',
00095 #define s_2_0 145
00096 'i', 'g',
00097 #define s_2_1 147
00098 'l', 'i', 'g',
00099 #define s_2_2 150
00100 'e', 'l', 's',
00101 #define s_2_3 153
00102 'f', 'u', 'l', 'l', 't',
00103 #define s_2_4 158
00104 'l', 0xC3, 0xB6, 's', 't',
00105 };
00106
00107
00108 static const struct among a_0[37] =
00109 {
00110 { 1, s_0_0, -1, 1},
00111 { 4, s_0_1, 0, 1},
00112 { 4, s_0_2, 0, 1},
00113 { 7, s_0_3, 2, 1},
00114 { 4, s_0_4, 0, 1},
00115 { 2, s_0_5, -1, 1},
00116 { 1, s_0_6, -1, 1},
00117 { 3, s_0_7, 6, 1},
00118 { 4, s_0_8, 6, 1},
00119 { 4, s_0_9, 6, 1},
00120 { 3, s_0_10, 6, 1},
00121 { 4, s_0_11, 6, 1},
00122 { 2, s_0_12, -1, 1},
00123 { 5, s_0_13, 12, 1},
00124 { 4, s_0_14, 12, 1},
00125 { 5, s_0_15, 12, 1},
00126 { 3, s_0_16, -1, 1},
00127 { 2, s_0_17, -1, 1},
00128 { 2, s_0_18, -1, 1},
00129 { 5, s_0_19, 18, 1},
00130 { 2, s_0_20, -1, 1},
00131 { 1, s_0_21, -1, 2},
00132 { 2, s_0_22, 21, 1},
00133 { 5, s_0_23, 22, 1},
00134 { 5, s_0_24, 22, 1},
00135 { 5, s_0_25, 22, 1},
00136 { 2, s_0_26, 21, 1},
00137 { 4, s_0_27, 26, 1},
00138 { 5, s_0_28, 26, 1},
00139 { 3, s_0_29, 21, 1},
00140 { 5, s_0_30, 29, 1},
00141 { 6, s_0_31, 29, 1},
00142 { 4, s_0_32, 21, 1},
00143 { 2, s_0_33, -1, 1},
00144 { 5, s_0_34, -1, 1},
00145 { 3, s_0_35, -1, 1},
00146 { 3, s_0_36, -1, 1}
00147 };
00148
00149
00150 static const struct among a_1[7] =
00151 {
00152 { 2, s_1_0, -1, -1},
00153 { 2, s_1_1, -1, -1},
00154 { 2, s_1_2, -1, -1},
00155 { 2, s_1_3, -1, -1},
00156 { 2, s_1_4, -1, -1},
00157 { 2, s_1_5, -1, -1},
00158 { 2, s_1_6, -1, -1}
00159 };
00160
00161
00162 static const struct among a_2[5] =
00163 {
00164 { 2, s_2_0, -1, 1},
00165 { 3, s_2_1, 0, 1},
00166 { 3, s_2_2, -1, 1},
00167 { 5, s_2_3, -1, 3},
00168 { 5, s_2_4, -1, 2}
00169 };
00170
00171 static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
00172
00173 static const unsigned char g_s_ending[] = { 119, 127, 149 };
00174
00175 static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' };
00176 static const symbol s_1[] = { 'f', 'u', 'l', 'l' };
00177
00178 int Xapian::InternalStemSwedish::r_mark_regions() {
00179 I_p1 = l;
00180 { int c_test1 = c;
00181 { int ret = skip_utf8(p, c, 0, l, + 3);
00182 if (ret < 0) return 0;
00183 c = ret;
00184 }
00185 I_x = c;
00186 c = c_test1;
00187 }
00188 if (out_grouping_U(g_v, 97, 246, 1) < 0) return 0;
00189 { int ret = in_grouping_U(g_v, 97, 246, 1);
00190 if (ret < 0) return 0;
00191 c += ret;
00192 }
00193 I_p1 = c;
00194
00195 if (!(I_p1 < I_x)) goto lab0;
00196 I_p1 = I_x;
00197 lab0:
00198 return 1;
00199 }
00200
00201 int Xapian::InternalStemSwedish::r_main_suffix() {
00202 int among_var;
00203 { int m1 = l - c; (void)m1;
00204 int mlimit1;
00205 if (c < I_p1) return 0;
00206 c = I_p1;
00207 mlimit1 = lb; lb = c;
00208 c = l - m1;
00209 ket = c;
00210 if (c <= lb || p[c - 1] >> 5 != 3 || !((1851442 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00211 among_var = find_among_b(s_pool, a_0, 37, 0, 0);
00212 if (!(among_var)) { lb = mlimit1; return 0; }
00213 bra = c;
00214 lb = mlimit1;
00215 }
00216 switch(among_var) {
00217 case 0: return 0;
00218 case 1:
00219 if (slice_del() == -1) return -1;
00220 break;
00221 case 2:
00222 if (in_grouping_b_U(g_s_ending, 98, 121, 0)) return 0;
00223 if (slice_del() == -1) return -1;
00224 break;
00225 }
00226 return 1;
00227 }
00228
00229 int Xapian::InternalStemSwedish::r_consonant_pair() {
00230 { int m1 = l - c; (void)m1;
00231 int mlimit1;
00232 if (c < I_p1) return 0;
00233 c = I_p1;
00234 mlimit1 = lb; lb = c;
00235 c = l - m1;
00236 { int m2 = l - c; (void)m2;
00237 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1064976 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00238 if (!(find_among_b(s_pool, a_1, 7, 0, 0))) { lb = mlimit1; return 0; }
00239 c = l - m2;
00240 ket = c;
00241 { int ret = skip_utf8(p, c, lb, 0, -1);
00242 if (ret < 0) { lb = mlimit1; return 0; }
00243 c = ret;
00244 }
00245 bra = c;
00246 if (slice_del() == -1) return -1;
00247 }
00248 lb = mlimit1;
00249 }
00250 return 1;
00251 }
00252
00253 int Xapian::InternalStemSwedish::r_other_suffix() {
00254 int among_var;
00255 { int m1 = l - c; (void)m1;
00256 int mlimit1;
00257 if (c < I_p1) return 0;
00258 c = I_p1;
00259 mlimit1 = lb; lb = c;
00260 c = l - m1;
00261 ket = c;
00262 if (c - 1 <= lb || p[c - 1] >> 5 != 3 || !((1572992 >> (p[c - 1] & 0x1f)) & 1)) { lb = mlimit1; return 0; }
00263 among_var = find_among_b(s_pool, a_2, 5, 0, 0);
00264 if (!(among_var)) { lb = mlimit1; return 0; }
00265 bra = c;
00266 switch(among_var) {
00267 case 0: { lb = mlimit1; return 0; }
00268 case 1:
00269 if (slice_del() == -1) return -1;
00270 break;
00271 case 2:
00272 { int ret = slice_from_s(4, s_0);
00273 if (ret < 0) return ret;
00274 }
00275 break;
00276 case 3:
00277 { int ret = slice_from_s(4, s_1);
00278 if (ret < 0) return ret;
00279 }
00280 break;
00281 }
00282 lb = mlimit1;
00283 }
00284 return 1;
00285 }
00286
00287 int Xapian::InternalStemSwedish::stem() {
00288 { int c1 = c;
00289 { int ret = r_mark_regions();
00290 if (ret == 0) goto lab0;
00291 if (ret < 0) return ret;
00292 }
00293 lab0:
00294 c = c1;
00295 }
00296 lb = c; c = l;
00297
00298 { int m2 = l - c; (void)m2;
00299 { int ret = r_main_suffix();
00300 if (ret == 0) goto lab1;
00301 if (ret < 0) return ret;
00302 }
00303 lab1:
00304 c = l - m2;
00305 }
00306 { int m3 = l - c; (void)m3;
00307 { int ret = r_consonant_pair();
00308 if (ret == 0) goto lab2;
00309 if (ret < 0) return ret;
00310 }
00311 lab2:
00312 c = l - m3;
00313 }
00314 { int m4 = l - c; (void)m4;
00315 { int ret = r_other_suffix();
00316 if (ret == 0) goto lab3;
00317 if (ret < 0) return ret;
00318 }
00319 lab3:
00320 c = l - m4;
00321 }
00322 c = lb;
00323 return 1;
00324 }
00325
00326 Xapian::InternalStemSwedish::InternalStemSwedish()
00327 : I_x(0), I_p1(0)
00328 {
00329 }
00330
00331 Xapian::InternalStemSwedish::~InternalStemSwedish()
00332 {
00333 }
00334
00335 const char *
00336 Xapian::InternalStemSwedish::get_description() const
00337 {
00338 return "swedish";
00339 }