00001
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <config.h>
00031
00032 #include "editdistance.h"
00033
00034 #include "omassert.h"
00035
00036 #include <algorithm>
00037 #include <cstdlib>
00038
00039 using namespace std;
00040
00041 template<class CHR>
00042 struct edist_seq {
00043 edist_seq(const CHR * ptr_, int len_) : ptr(ptr_), len(len_) { }
00044 const CHR * ptr;
00045 int len;
00046 };
00047
00048 template<class CHR>
00049 class edist_state {
00051 void operator=(const edist_state &);
00052
00054 edist_state(const edist_state &);
00055
00056 edist_seq<CHR> seq1;
00057 edist_seq<CHR> seq2;
00058
00059
00060
00061
00062
00063
00064 int * fkp;
00065 int fkp_cols;
00066
00067
00068
00069 int maxdist;
00070
00071 int calc_index(int k, int p) const {
00072 return (k + maxdist) * fkp_cols + p + 1;
00073 }
00074
00075 public:
00076
00077 edist_state(const CHR * ptr1, int len1, const CHR * ptr2, int len2);
00078
00079 ~edist_state();
00080
00081 int get_f_kp(int k, int p) const {
00082 return fkp[calc_index(k, p)];
00083 }
00084
00085 void set_f_kp(int k, int p, int val) {
00086 fkp[calc_index(k, p)] = val;
00087 }
00088
00089 bool is_transposed(int pos1, int pos2) const {
00090 if (pos1 <= 0 || pos2 <= 0 || pos1 >= seq1.len || pos2 >= seq2.len) return false;
00091 return (seq1.ptr[pos1 - 1] == seq2.ptr[pos2] &&
00092 seq1.ptr[pos1] == seq2.ptr[pos2 - 1]);
00093 }
00094
00095 void edist_calc_f_kp(int k, int p);
00096 };
00097
00098 template<class CHR>
00099 void edist_state<CHR>::edist_calc_f_kp(int k, int p)
00100 {
00101 int maxlen = get_f_kp(k, p - 1) + 1;
00102 int maxlen2 = get_f_kp(k - 1, p - 1);
00103 int maxlen3 = get_f_kp(k + 1, p - 1) + 1;
00104
00105 if (is_transposed(maxlen, maxlen + k)) {
00106
00107 ++maxlen;
00108 }
00109
00110 if (maxlen >= maxlen2) {
00111 if (maxlen >= maxlen3) {
00112
00113 } else {
00114
00115 maxlen = maxlen3;
00116 }
00117 } else {
00118 if (maxlen2 >= maxlen3) {
00119
00120 maxlen = maxlen2;
00121 } else {
00122
00123 maxlen = maxlen3;
00124 }
00125 }
00126
00127
00128
00129 while (maxlen < seq1.len &&
00130 maxlen + k < seq2.len &&
00131 seq1.ptr[maxlen] == seq2.ptr[maxlen + k]) {
00132 ++maxlen;
00133 }
00134 set_f_kp(k, p, maxlen);
00135 }
00136
00137 #define INF 1000000
00138 template<class CHR>
00139 edist_state<CHR>::edist_state(const CHR * ptr1, int len1,
00140 const CHR * ptr2, int len2)
00141 : seq1(ptr1, len1), seq2(ptr2, len2), maxdist(len2)
00142 {
00143 Assert(len2 >= len1);
00144
00145 int fkp_rows = maxdist * 2 + 1;
00146
00147 fkp_cols = maxdist + 2;
00148
00149 fkp = new int[fkp_rows * fkp_cols];
00150
00151 for (int k = -maxdist; k <= maxdist; k++) {
00152 for (int p = -1; p <= maxdist; p++) {
00153 if (p == abs(k) - 1) {
00154 if (k < 0) {
00155 set_f_kp(k, p, abs(k) - 1);
00156 } else {
00157 set_f_kp(k, p, -1);
00158 }
00159 } else if (p < abs(k)) {
00160 set_f_kp(k, p, -INF);
00161 }
00162 }
00163 }
00164 }
00165
00166 template<class CHR>
00167 edist_state<CHR>::~edist_state() {
00168 delete [] fkp;
00169 }
00170
00171 template<class CHR>
00172 static int
00173 seqcmp_editdist(const CHR * ptr1, int len1, const CHR * ptr2, int len2,
00174 int max_distance)
00175 {
00176 int lendiff = len2 - len1;
00177
00178 if (lendiff < 0) {
00179 lendiff = -lendiff;
00180 swap(ptr1, ptr2);
00181 swap(len1, len2);
00182 }
00183
00184
00185 if (len1 == 0) return len2;
00186
00187 edist_state<CHR> state(ptr1, len1, ptr2, len2);
00188
00189 int p = lendiff;
00190 while (p <= max_distance) {
00191 for (int temp_p = 0; temp_p != p; ++temp_p) {
00192 int inc = p - temp_p;
00193 if (abs(lendiff - inc) <= temp_p) {
00194 state.edist_calc_f_kp(lendiff - inc, temp_p);
00195 }
00196 if (abs(lendiff + inc) <= temp_p) {
00197 state.edist_calc_f_kp(lendiff + inc, temp_p);
00198 }
00199 }
00200 state.edist_calc_f_kp(lendiff, p);
00201
00202 if (state.get_f_kp(lendiff, p) == len1) break;
00203 ++p;
00204 }
00205
00206 return p;
00207 }
00208
00209 int
00210 edit_distance_unsigned(const unsigned * ptr1, int len1,
00211 const unsigned * ptr2, int len2,
00212 int max_distance)
00213 {
00214 return seqcmp_editdist<unsigned>(ptr1, len1, ptr2, len2, max_distance);
00215 }