Edit distance is a number quantifying how different are two given strings. It is calculated by counting the minimum number of operations required to transform one string into the other.
In this implementation I assume, that only three operations are available:
- copy – cost 0
- insert – cost 1
- delete – cost 1
Read more on wikipedia.
Input
n a m b
n, m – length of string
a, b – strings (without spaces, new lines etc.)
Sample input
5 heart 4 head
Output
The algorithm returns a single number, which is the edit distance.
Complexity
This solution runs in Θ(n · m) time.
Source code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
#define INT_MAX 2147483647 #include <stdio.h> #include <vector> #include <list> #include <algorithm> #include <math.h> #include <iostream> #include <string> using namespace std; struct Field { int cost; }; string in, out; int getCost(Field **tab, int y, int x) { if (y < 0 || x < 0) return -1; return tab[y][x].cost; } void setCostFor(Field **tab, int y, int x, int cost, int operationCost) { if (cost != -1 && cost < tab[y][x].cost) { tab[y][x].cost = cost + operationCost; } } void setMinFor(Field **tab, int y, int x) { //copy - characters are the same if (in[x] == out[y]) { setCostFor(tab, y, x, getCost(tab, y - 1, x - 1), 0); } //add character setCostFor(tab, y, x, getCost(tab, y - 1, x), 1); //delete character setCostFor(tab, y, x, getCost(tab, y, x - 1), 1); } int main() { int in_len, out_len; cin >> in_len >> in; cin >> out_len >> out; Field **tab = new Field*[out_len]; for (int i = 0; i < out_len; i++) { tab[i] = new Field[in_len]; for (int j = 0; j < in_len; j++) { tab[i][j].cost = INT_MAX; } } //initialize first row tab[0][0].cost = 0; for (int i = 1; i < in_len; i++) { tab[0][i].cost = tab[0][i - 1].cost + 1; } //dynamic - calculate array for (int o = 1; o < out_len; o++) { for (int i = 0; i < in_len; i++) { setMinFor(tab, o, i); } } cout << tab[out_len - 1][in_len - 1].cost << endl; return 0; } |