00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
00023 #define XAPIAN_INCLUDED_STEMINTERNAL_H
00024
00025 #include <xapian/stem.h>
00026
00027 #include <cstdlib>
00028 #include <string>
00029
00030 typedef unsigned char symbol;
00031
00032 #define HEAD (2*sizeof(int))
00033
00034
00035
00036
00037 inline int
00038 SIZE(const symbol* p)
00039 {
00040 const void * void_p = reinterpret_cast<const void *>(p);
00041 return reinterpret_cast<const int *>(void_p)[-1];
00042 }
00043
00044 inline void
00045 SET_SIZE(symbol* p, int n)
00046 {
00047 void * void_p = reinterpret_cast<void *>(p);
00048 reinterpret_cast<int *>(void_p)[-1] = n;
00049 }
00050
00051 inline int
00052 CAPACITY(const symbol* p)
00053 {
00054 const void * void_p = reinterpret_cast<const void *>(p);
00055 return reinterpret_cast<const int *>(void_p)[-2];
00056 }
00057
00058 inline void
00059 SET_CAPACITY(symbol* p, int n)
00060 {
00061 void * void_p = reinterpret_cast<void *>(p);
00062 reinterpret_cast<int *>(void_p)[-2] = n;
00063 }
00064
00065 typedef int (*among_function)(Xapian::StemImplementation *);
00066
00067 struct among {
00068 int s_size;
00069 unsigned s;
00070 int substring_i;
00071 int result;
00072 };
00073
00074 extern symbol * create_s();
00075
00076 inline void lose_s(symbol * p) {
00077 if (p) std::free(reinterpret_cast<char *>(p) - HEAD);
00078 }
00079
00080 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
00081
00082 namespace Xapian {
00083
00084 class SnowballStemImplementation : public StemImplementation {
00085 int slice_check();
00086
00087 protected:
00088 symbol * p;
00089 int c, l, lb, bra, ket;
00090
00091 int get_utf8(int * slot);
00092 int get_b_utf8(int * slot);
00093
00094 int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
00095 int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00096 int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
00097 int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00098
00099 int eq_s(int s_size, const symbol * s);
00100 int eq_s_b(int s_size, const symbol * s);
00101 int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
00102 int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
00103
00104 int find_among(const symbol *pool, const struct among * v, int v_size,
00105 const unsigned char * fnum, const among_function * f);
00106 int find_among_b(const symbol *pool, const struct among * v, int v_size,
00107 const unsigned char * fnum, const among_function * f);
00108
00109 int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
00110 int slice_from_s(int s_size, const symbol * s);
00111 int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
00112
00113 int slice_del() { return slice_from_s(0, 0); }
00114
00115 void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
00116 void insert_v(int c_bra, int c_ket, const symbol * v) {
00117 insert_s(c_bra, c_ket, SIZE(v), v);
00118 }
00119
00120 symbol * slice_to(symbol * v);
00121 symbol * assign_to(symbol * v);
00122
00123 #if 0
00124 void debug(int number, int line_count);
00125 #endif
00126
00127 public:
00129 SnowballStemImplementation()
00130 : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
00131
00133 virtual ~SnowballStemImplementation();
00134
00136 virtual std::string operator()(const std::string & word);
00137
00139 virtual int stem() = 0;
00140 };
00141
00142 }
00143
00144 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H