xapian-core  1.4.25
steminternal.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2010,2016 Olly Betts
5  * Copyright (C) 2010 Evgeny Sizikov
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
23 #define XAPIAN_INCLUDED_STEMINTERNAL_H
24 
25 #include <xapian/stem.h>
26 
27 #include "alignment_cast.h"
28 
29 #include <cstdlib>
30 #include <string>
31 
32 typedef unsigned char symbol;
33 
34 #define HEAD (2 * sizeof(int))
35 
37 
38 struct among {
39  int s_size; /* length of search string (in symbols) */
40  unsigned s; /* offset in pool to search string */
41  int substring_i; /* index to longest matching substring */
42  int result; /* result of the lookup */
43 };
44 
45 inline void lose_s(symbol * p) {
46  if (p) std::free(reinterpret_cast<char *>(p) - HEAD);
47 }
48 
49 namespace Xapian {
50 
52  int slice_check();
53 
54  protected:
55  symbol * p;
56  int c, l, lb, bra, ket;
57 
58  static int
59  SIZE(const symbol* p)
60  {
61  return alignment_cast<const int *>(p)[-1];
62  }
63 
64  static void
65  SET_SIZE(symbol* p, int n)
66  {
67  alignment_cast<int *>(p)[-1] = n;
68  }
69 
70  static int
71  CAPACITY(const symbol* p)
72  {
73  return alignment_cast<const int *>(p)[-2];
74  }
75 
76  static void
77  SET_CAPACITY(symbol* p, int n)
78  {
79  alignment_cast<int *>(p)[-2] = n;
80  }
81 
82  static int skip_utf8(const symbol * p, int c, int lb, int l, int n);
83 
84  static symbol * increase_size(symbol * p, int n);
85 
86  static symbol * create_s();
87 
88  int get_utf8(int * slot);
89  int get_b_utf8(int * slot);
90 
91  int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
92  int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
93  int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
94  int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
95 
96  int eq_s(int s_size, const symbol * s);
97  int eq_s_b(int s_size, const symbol * s);
98  int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
99  int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
100 
101  int find_among(const symbol *pool, const struct among * v, int v_size,
102  const unsigned char * fnum, const among_function * f);
103  int find_among_b(const symbol *pool, const struct among * v, int v_size,
104  const unsigned char * fnum, const among_function * f);
105 
106  int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
107  int slice_from_s(int s_size, const symbol * s);
108  int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
109 
110  int slice_del() { return slice_from_s(0, 0); }
111 
112  void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
113  void insert_v(int c_bra, int c_ket, const symbol * v) {
114  insert_s(c_bra, c_ket, SIZE(v), v);
115  }
116 
117  symbol * slice_to(symbol * v);
118  symbol * assign_to(symbol * v);
119 
120  int len_utf8(const symbol * v);
121 
122 #if 0
123  void debug(int number, int line_count);
124 #endif
125 
126  public:
129  : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
130 
132  virtual ~SnowballStemImplementation();
133 
135  virtual std::string operator()(const std::string & word);
136 
138  virtual int stem() = 0;
139 };
140 
141 }
142 
143 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
int s_size
Definition: steminternal.h:39
static void SET_CAPACITY(symbol *p, int n)
Definition: steminternal.h:77
int substring_i
Definition: steminternal.h:41
unsigned char symbol
Definition: steminternal.h:32
Cast a pointer we know is suitably aligned.
int number
Definition: header.h:195
std::enable_if< std::is_const< typename std::remove_pointer< U >::type >::value, T >::type alignment_cast(U ptr)
Cast a pointer we know is suitably aligned.
#define HEAD
Definition: steminternal.h:34
static int CAPACITY(const symbol *p)
Definition: steminternal.h:71
void insert_v(int c_bra, int c_ket, const symbol *v)
Definition: steminternal.h:113
unsigned short symbol
Definition: header.h:6
static int SIZE(const symbol *p)
Definition: steminternal.h:59
Definition: header.h:86
int(* among_function)(Xapian::StemImplementation *)
Definition: steminternal.h:36
Definition: header.h:90
Class representing a stemming algorithm implementation.
Definition: stem.h:38
Definition: header.h:191
int slice_from_v(const symbol *v)
Definition: steminternal.h:108
int result
Definition: steminternal.h:42
int get_utf8(const symbol *p, int *slot)
#define SIZE(p)
Definition: header.h:17
static void SET_SIZE(symbol *p, int n)
Definition: steminternal.h:65
SnowballStemImplementation()
Perform initialisation common to all Snowball stemmers.
Definition: steminternal.h:128
unsigned s
Definition: steminternal.h:40
stemming algorithms
void lose_s(symbol *p)
Definition: steminternal.h:45