28 #ifndef XAPIAN_INCLUDED_WORD_BREAKER_H 29 #define XAPIAN_INCLUDED_WORD_BREAKER_H 32 # error config.h must be included first in each C++ source file 84 bool unigram()
const {
return offset == 0; }
95 return !(*
this == other);
99 #endif // XAPIAN_INCLUDED_WORD_BREAKER_H Unicode and UTF-8 related classes and functions.
const Xapian::Utf8Iterator & get_utf8iterator() const
NgramIterator(const std::string &s)
Iterator returning unigrams and bigrams.
NgramIterator & operator++()
void get_unbroken(Xapian::Utf8Iterator &it)
std::string current_token
bool operator!=(const NgramIterator &other) const
const std::string & operator*() const
An iterator which returns Unicode character values from a UTF-8 encoded string.
void init()
Call to set current_token at the start.
bool is_unbroken_script(unsigned codepoint)
unsigned offset
Offset to penultimate Unicode character in current_token.
bool unigram() const
Is this a unigram?
bool operator==(const NgramIterator &other) const
bool is_ngram_enabled()
Should we use the n-gram code?
NgramIterator(const Xapian::Utf8Iterator &it_)