xapian-core  1.4.27
stem.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2010,2011,2012,2015,2018,2019 Olly Betts
5  * Copyright (C) 2010 Evgeny Sizikov
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include <xapian/stem.h>
25 
26 #include <xapian/error.h>
27 
28 #include "steminternal.h"
29 
30 #include "allsnowballheaders.h"
31 #include "keyword.h"
32 #include "sbl-dispatch.h"
33 
34 #include <string>
35 
36 using namespace std;
37 
38 namespace Xapian {
39 
40 Stem::Stem(const Stem & o) : internal(o.internal) { }
41 
42 Stem &
44 {
45  internal = o.internal;
46  return *this;
47 }
48 
49 Stem::Stem(Stem &&) = default;
50 
51 Stem &
52 Stem::operator=(Stem &&) = default;
53 
55 
56 static StemImplementation*
57 stem_internal_factory(const std::string& language, bool fallback)
58 {
59  int l = keyword2(tab, language.data(), language.size());
60  if (l >= 0) {
61  switch (static_cast<sbl_code>(l)) {
62  case ARABIC:
63  return new InternalStemArabic;
64  case ARMENIAN:
65  return new InternalStemArmenian;
66  case BASQUE:
67  return new InternalStemBasque;
68  case CATALAN:
69  return new InternalStemCatalan;
70  case DANISH:
71  return new InternalStemDanish;
72  case DUTCH:
73  return new InternalStemDutch;
74  case EARLYENGLISH:
75  return new InternalStemEarlyenglish;
76  case ENGLISH:
77  return new InternalStemEnglish;
78  case FINNISH:
79  return new InternalStemFinnish;
80  case FRENCH:
81  return new InternalStemFrench;
82  case GERMAN:
83  return new InternalStemGerman;
84  case GERMAN2:
85  return new InternalStemGerman2;
86  case HUNGARIAN:
87  return new InternalStemHungarian;
88  case INDONESIAN:
89  return new InternalStemIndonesian;
90  case IRISH:
91  return new InternalStemIrish;
92  case ITALIAN:
93  return new InternalStemItalian;
94  case KRAAIJ_POHLMANN:
95  return new InternalStemKraaij_pohlmann;
96  case LITHUANIAN:
97  return new InternalStemLithuanian;
98  case LOVINS:
99  return new InternalStemLovins;
100  case NEPALI:
101  return new InternalStemNepali;
102  case NORWEGIAN:
103  return new InternalStemNorwegian;
104  case NONE:
105  return NULL;
106  case PORTUGUESE:
107  return new InternalStemPortuguese;
108  case PORTER:
109  return new InternalStemPorter;
110  case RUSSIAN:
111  return new InternalStemRussian;
112  case ROMANIAN:
113  return new InternalStemRomanian;
114  case SPANISH:
115  return new InternalStemSpanish;
116  case SWEDISH:
117  return new InternalStemSwedish;
118  case TAMIL:
119  return new InternalStemTamil;
120  case TURKISH:
121  return new InternalStemTurkish;
122  }
123  }
124  if (fallback || language.empty())
125  return NULL;
126  throw Xapian::InvalidArgumentError("Language code " + language + " unknown");
127 }
128 
129 Stem::Stem(const std::string& language)
130  : internal(stem_internal_factory(language, false)) { }
131 
132 Stem::Stem(const std::string& language, bool fallback)
133  : internal(stem_internal_factory(language, fallback)) { }
134 
136 
138 
139 string
140 Stem::operator()(const std::string &word) const
141 {
142  if (!internal.get() || word.empty()) return word;
143  return internal->operator()(word);
144 }
145 
146 string
148 {
149  string desc = "Xapian::Stem(";
150  if (internal.get()) {
151  desc += internal->get_description();
152  desc += ')';
153  } else {
154  desc += "none)";
155  }
156  return desc;
157 }
158 
159 }
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
static const unsigned char tab[]
Definition: sbl-dispatch.h:58
Class representing a stemming algorithm.
Definition: stem.h:62
Xapian::Internal::intrusive_ptr< StemImplementation > internal
Definition: stem.h:65
Stem()
Construct a Xapian::Stem object which doesn&#39;t change terms.
Definition: stem.cc:54
Include headers for all Snowball stemmers.
STL namespace.
int keyword2(const unsigned char *p, const char *s, size_t len)
2 byte offset variant.
Definition: keyword.cc:56
#define false
Definition: header.h:9
std::string operator()(const std::string &word) const
Stem a word.
Definition: stem.cc:140
Hierarchy of classes which Xapian can throw as exceptions.
Stem & operator=(const Stem &o)
Assignment.
Definition: stem.cc:43
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Base class for implementations of stemming algorithms.
std::string get_description() const
Return a string describing this object.
Definition: stem.cc:147
Efficient keyword to enum lookup.
Map string to language code.
Class representing a stemming algorithm implementation.
Definition: stem.h:38
static StemImplementation * stem_internal_factory(const std::string &language, bool fallback)
Definition: stem.cc:57
static string language
Definition: stemtest.cc:39
~Stem()
Destructor.
Definition: stem.cc:137
stemming algorithms