-
-
Save sebj/9d269e7655c6be8b8bbf to your computer and use it in GitHub Desktop.
Levenshtein to get edit distance between two strings (ignores newlines, but counts spaces)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
- (float)compareString:(NSString *)originalString withString:(NSString *)comparisonString { | |
// Normalize strings | |
[originalString stringByTrimmingCharactersInSet:NSCharacterSet.newlineCharacterSet]; | |
[comparisonString stringByTrimmingCharactersInSet:NSCharacterSet.newlineCharacterSet]; | |
originalString = originalString.lowercaseString; | |
comparisonString = comparisonString.lowercaseString; | |
// Step 1 | |
NSInteger k, i, j, cost, * d, distance; | |
NSInteger n = originalString.length; | |
NSInteger m = comparisonString.length; | |
if (n == 0) { | |
//edit distance is the entire length of the new string | |
return m; | |
} | |
if( n++ != 0 && m++ != 0 ) { | |
d = malloc( sizeof(NSInteger) * m * n ); | |
// Step 2 | |
for( k = 0; k < n; k++) | |
d[k] = k; | |
for( k = 0; k < m; k++) | |
d[ k * n ] = k; | |
// Step 3 and 4 | |
for( i = 1; i < n; i++ ) | |
for( j = 1; j < m; j++ ) { | |
// Step 5 | |
if( [originalString characterAtIndex: i-1] == | |
[comparisonString characterAtIndex: j-1] ) | |
cost = 0; | |
else | |
cost = 1; | |
// Step 6 | |
d[ j * n + i ] = [self smallestOf: d [ (j - 1) * n + i ] + 1 | |
andOf: d[ j * n + i - 1 ] + 1 | |
andOf: d[ (j - 1) * n + i - 1 ] + cost ]; | |
// This conditional adds Damerau transposition to Levenshtein distance | |
if( i>1 && j>1 && [originalString characterAtIndex: i-1] == | |
[comparisonString characterAtIndex: j-2] && | |
[originalString characterAtIndex: i-2] == | |
[comparisonString characterAtIndex: j-1] ) | |
{ | |
d[ j * n + i] = MIN(d[ j * n + i ], d[ (j - 2) * n + i - 2 ] + cost); | |
} | |
} | |
distance = d[ n * m - 1 ]; | |
free(d); | |
return distance; | |
} | |
return 0.0; | |
} | |
// Return the minimum of a, b and c - used by compareString:withString: | |
- (NSInteger)smallestOf:(NSInteger)a andOf:(NSInteger)b andOf:(NSInteger)c { | |
return MIN(a, MIN(b, c)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment