28 #ifndef XAPIAN_INCLUDED_CJK_TOKENIZER_H 29 #define XAPIAN_INCLUDED_CJK_TOKENIZER_H 32 # error config.h must be included first in each C++ source file 88 bool unigram()
const {
return offset == 0; }
99 return !(*
this == other);
103 #endif // XAPIAN_INCLUDED_CJK_TOKENIZER_H Unicode and UTF-8 related classes and functions.
const std::string & operator*() const
bool is_cjk_enabled()
Should we use the CJK n-gram code?
bool unigram() const
Is this a unigram?
CJKTokenIterator(const Xapian::Utf8Iterator &it_)
bool operator!=(const CJKTokenIterator &other) const
bool codepoint_is_cjk(unsigned codepoint)
Iterator returning unigrams and bigrams.
CJKTokenIterator(const std::string &s)
An iterator which returns Unicode character values from a UTF-8 encoded string.
bool operator==(const CJKTokenIterator &other) const
const Xapian::Utf8Iterator & get_utf8iterator() const
void get_cjk(Xapian::Utf8Iterator &it)
std::string current_token