00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
00022 #define XAPIAN_INCLUDED_STEMINTERNAL_H
00023
00024 #include <xapian/base.h>
00025 #include <xapian/stem.h>
00026
00027 #include <stdlib.h>
00028 #include <string>
00029
00030
00031
00032
00033 typedef unsigned char symbol;
00034
00035 #define HEAD (2*sizeof(int))
00036
00037
00038
00039
00040 inline int
00041 SIZE(const symbol* p)
00042 {
00043 const void * void_p = reinterpret_cast<const void *>(p);
00044 return reinterpret_cast<const int *>(void_p)[-1];
00045 }
00046
00047 inline void
00048 SET_SIZE(symbol* p, int n)
00049 {
00050 void * void_p = reinterpret_cast<void *>(p);
00051 reinterpret_cast<int *>(void_p)[-1] = n;
00052 }
00053
00054 inline int
00055 CAPACITY(const symbol* p)
00056 {
00057 const void * void_p = reinterpret_cast<const void *>(p);
00058 return reinterpret_cast<const int *>(void_p)[-2];
00059 }
00060
00061 inline void
00062 SET_CAPACITY(symbol* p, int n)
00063 {
00064 void * void_p = reinterpret_cast<void *>(p);
00065 reinterpret_cast<int *>(void_p)[-2] = n;
00066 }
00067
00068 typedef int (*among_function)(Xapian::Stem::Internal *);
00069
00070 struct among {
00071 int s_size;
00072 unsigned s;
00073 int substring_i;
00074 int result;
00075 };
00076
00077 extern symbol * create_s();
00078
00079 inline void lose_s(symbol * p) {
00080 if (p) free(reinterpret_cast<char *>(p) - HEAD);
00081 }
00082
00083 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
00084
00085 namespace Xapian {
00086
00087 class Stem::Internal : public Xapian::Internal::RefCntBase {
00088 int slice_check();
00089
00090 protected:
00091 symbol * p;
00092 int c, l, lb, bra, ket;
00093
00094 int get_utf8(int * slot);
00095 int get_b_utf8(int * slot);
00096
00097 int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
00098 int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00099 int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
00100 int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00101
00102 int eq_s(int s_size, const symbol * s);
00103 int eq_s_b(int s_size, const symbol * s);
00104 int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
00105 int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
00106
00107 int find_among(const symbol *pool, const struct among * v, int v_size,
00108 const unsigned char * fnum, const among_function * f);
00109 int find_among_b(const symbol *pool, const struct among * v, int v_size,
00110 const unsigned char * fnum, const among_function * f);
00111
00112 int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
00113 int slice_from_s(int s_size, const symbol * s);
00114 int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
00115
00116 int slice_del() { return slice_from_s(0, 0); }
00117
00118 void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
00119 void insert_v(int c_bra, int c_ket, const symbol * v) {
00120 insert_s(c_bra, c_ket, SIZE(v), v);
00121 }
00122
00123 symbol * slice_to(symbol * v);
00124 symbol * assign_to(symbol * v);
00125
00126 #if 0
00127 void debug(int number, int line_count);
00128 #endif
00129
00130 public:
00132 Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
00133
00135 virtual ~Internal();
00136
00138 std::string operator()(const std::string & word);
00139
00141 virtual int stem() = 0;
00142
00144 virtual const char * get_description() const = 0;
00145 };
00146
00147 }
00148
00149 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H