22 #ifndef XAPIAN_INCLUDED_EDITDISTANCE_H
23 #define XAPIAN_INCLUDED_EDITDISTANCE_H
56 mutable std::vector<unsigned>
utf32;
107 int calc(
const unsigned* ptr,
int len,
int max_distance)
const;
118 for (Utf8Iterator it(target_); it != Utf8Iterator(); ++it) {
145 int operator()(
const std::string& candidate,
int max_distance)
const {
147 size_t target_utf32_len =
target.size();
158 if (target_utf32_len > candidate.size() + max_distance) {
167 if (
target_bytes > candidate.size() + 4 * max_distance) {
171 if (
target_bytes + 4 * max_distance < candidate.size()) {
180 int lb = std::abs(
int(
utf32.size()) -
int(target_utf32_len));
181 if (lb > max_distance) {
Calculate edit distances to a target string.
~EditDistanceCalculator()
std::vector< unsigned > target
Target in UTF-32.
EditDistanceCalculator & operator=(const EditDistanceCalculator &)=delete
Don't allow assignment.
int operator()(const std::string &candidate, int max_distance) const
Calculate edit distance for a sequence.
freqs_bitmap target_freqs
Occurrence bitmap for target sequence.
std::vector< unsigned > utf32
Current candidate in UTF-32.
int calc(const unsigned *ptr, int len, int max_distance) const
Calculate edit distance.
unsigned long long freqs_bitmap
The type to use for the occurrence bitmaps.
EditDistanceCalculator(std::string_view target_)
Constructor.
static constexpr unsigned FREQS_MASK
freqs_bitmap target_freqs2
Second occurrence bitmap for target sequence.
EditDistanceCalculator(const EditDistanceCalculator &)=delete
Don't allow copying.
An iterator which returns Unicode character values from a UTF-8 encoded string.
Various assertion macros.
Unicode and UTF-8 related classes and functions.