46 edist_seq(
const Char* ptr_,
int len_) : ptr(ptr_), len(len_) { }
75 return k + maxdist + fkp_rows * (
p + 1);
79 edist_state(
const Char* ptr1,
int len1,
const Char* ptr2,
int len2,
81 : seq1(ptr1, len1), seq2(ptr2, len2), fkp(fkp_), maxdist(len2) {
86 fkp_rows = 2 * maxdist + 1;
91 memset(fkp,
unsigned(INT_MIN) >> (8 * (
sizeof(
int) - 1)),
92 sizeof(
int) * (calc_index(maxdist, maxdist - 2) + 1));
94 for (
int k = 1; k <= maxdist; ++k) {
95 set_f_kp(k, k - 1, -1);
96 set_f_kp(-k, k - 1, k - 1);
101 return fkp[calc_index(k,
p)];
105 fkp[calc_index(k,
p)] = val;
109 if (pos1 <= 0 || pos2 <= 0 || pos1 >= seq1.
len || pos2 >= seq2.
len)
111 return (seq1.
ptr[pos1 - 1] == seq2.
ptr[pos2] &&
112 seq1.
ptr[pos1] == seq2.
ptr[pos2 - 1]);
115 void edist_calc_f_kp(
int k,
int p);
121 int maxlen = get_f_kp(k,
p - 1) + 1;
122 int maxlen2 = get_f_kp(k - 1,
p - 1);
123 int maxlen3 = get_f_kp(k + 1,
p - 1) + 1;
125 if (is_transposed(maxlen, maxlen + k)) {
130 if (maxlen >= maxlen2) {
131 if (maxlen >= maxlen3) {
138 if (maxlen2 >= maxlen3) {
149 while (maxlen < seq1.len &&
150 maxlen + k < seq2.len &&
151 seq1.ptr[maxlen] == seq2.ptr[maxlen + k]) {
154 set_f_kp(k,
p, maxlen);
160 int* fkp_,
int max_distance)
162 int lendiff = len2 - len1;
171 if (len1 == 0)
return len2;
176 while (
p <= max_distance) {
177 for (
int temp_p = 0; temp_p !=
p; ++temp_p) {
178 int inc =
p - temp_p;
179 if (abs(lendiff - inc) <= temp_p) {
182 if (abs(lendiff + inc) <= temp_p) {
188 if (state.
get_f_kp(lendiff,
p) == len1)
break;
197 int max_distance)
const
203 for (
int i = 0; i != len; ++i) {
204 unsigned ch = ptr[i];
206 freqs2 |= (freqs & bit);
216 int ed_lower_bound = bits / 2;
217 if (ed_lower_bound > max_distance) {
220 return ed_lower_bound;
228 int maxdist = target.size() + max_distance;
229 int max_cols = maxdist * 2;
230 int max_rows = maxdist * 2 + 1;
231 array =
new int[max_rows * max_cols];
234 return seqcmp_editdist<unsigned>(ptr, len, &target[0], target.size(),
235 array, max_distance);
int calc(const unsigned *ptr, int len, int max_distance) const
Calculate edit distance.
unsigned long long freqs_bitmap
The type to use for the occurrence bitmaps.
edist_state & operator=(const edist_state &)=delete
Don't allow assignment.
bool is_transposed(int pos1, int pos2) const
void edist_calc_f_kp(int k, int p)
int get_f_kp(int k, int p) const
int calc_index(int k, int p) const
void set_f_kp(int k, int p, int val)
edist_state(const edist_state &)=delete
Don't allow copying.
edist_state(const Char *ptr1, int len1, const Char *ptr2, int len2, int *fkp_)
static int seqcmp_editdist(const Char *ptr1, int len1, const Char *ptr2, int len2, int *fkp_, int max_distance)
Edit distance calculation algorithm.
Various assertion macros.
Count the number of set bits in an integer type.
static void add_popcount(A &accumulator, V value)
Add the number of set bits in value to accumulator.
edist_seq(const Char *ptr_, int len_)