languages/steminternal.h

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007,2009 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
00022 #define XAPIAN_INCLUDED_STEMINTERNAL_H
00023 
00024 #include <xapian/base.h>
00025 #include <xapian/stem.h>
00026 
00027 #include <stdlib.h>
00028 #include <string>
00029 
00030 // FIXME: we might want to make Stem::Internal a virtual base class and have
00031 // Stem::Internal::Snowball to allow for non-Snowball stemmers...
00032 
00033 typedef unsigned char symbol;
00034 
00035 #define HEAD (2*sizeof(int))
00036 
00037 // Cast via (void*) to avoid warnings about alignment (the pointers *are*
00038 // appropriately aligned).
00039 
00040 inline int
00041 SIZE(const symbol* p)
00042 {
00043     const void * void_p = reinterpret_cast<const void *>(p);
00044     return reinterpret_cast<const int *>(void_p)[-1];
00045 }
00046 
00047 inline void
00048 SET_SIZE(symbol* p, int n)
00049 {
00050     void * void_p = reinterpret_cast<void *>(p);
00051     reinterpret_cast<int *>(void_p)[-1] = n;
00052 }
00053 
00054 inline int
00055 CAPACITY(const symbol* p)
00056 {
00057     const void * void_p = reinterpret_cast<const void *>(p);
00058     return reinterpret_cast<const int *>(void_p)[-2];
00059 }
00060 
00061 inline void
00062 SET_CAPACITY(symbol* p, int n)
00063 {
00064     void * void_p = reinterpret_cast<void *>(p);
00065     reinterpret_cast<int *>(void_p)[-2] = n;
00066 }
00067 
00068 typedef int (*among_function)(Xapian::Stem::Internal *);
00069 
00070 struct among {
00071     int s_size;         /* length of search string (in symbols) */
00072     unsigned s;         /* offset in pool to search string */
00073     int substring_i;    /* index to longest matching substring */
00074     int result;         /* result of the lookup */
00075 };
00076 
00077 extern symbol * create_s();
00078 
00079 inline void lose_s(symbol * p) {
00080     if (p) free(reinterpret_cast<char *>(p) - HEAD);
00081 }
00082 
00083 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
00084 
00085 namespace Xapian {
00086 
00087 class Stem::Internal : public Xapian::Internal::RefCntBase {
00088     int slice_check();
00089 
00090   protected:
00091     symbol * p;
00092     int c, l, lb, bra, ket;
00093 
00094     int get_utf8(int * slot);
00095     int get_b_utf8(int * slot);
00096 
00097     int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
00098     int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00099     int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
00100     int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
00101 
00102     int eq_s(int s_size, const symbol * s);
00103     int eq_s_b(int s_size, const symbol * s);
00104     int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
00105     int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
00106 
00107     int find_among(const symbol *pool, const struct among * v, int v_size,
00108                    const unsigned char * fnum, const among_function * f);
00109     int find_among_b(const symbol *pool, const struct among * v, int v_size,
00110                      const unsigned char * fnum, const among_function * f);
00111 
00112     int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
00113     int slice_from_s(int s_size, const symbol * s);
00114     int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
00115 
00116     int slice_del() { return slice_from_s(0, 0); }
00117 
00118     void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
00119     void insert_v(int c_bra, int c_ket, const symbol * v) {
00120         insert_s(c_bra, c_ket, SIZE(v), v);
00121     }
00122 
00123     symbol * slice_to(symbol * v);
00124     symbol * assign_to(symbol * v);
00125 
00126 #if 0
00127     void debug(int number, int line_count);
00128 #endif
00129 
00130   public:
00132     Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
00133 
00135     virtual ~Internal();
00136 
00138     std::string operator()(const std::string & word);
00139 
00141     virtual int stem() = 0;
00142 
00144     virtual const char * get_description() const = 0;
00145 };
00146 
00147 }
00148 
00149 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.