Created
January 2, 2018 02:25
-
-
Save jtejido/cfd08769bd65c49472054425166fed6f to your computer and use it in GitHub Desktop.
LevenshteinDistance extension for php-nlp-tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Utilities; | |
/** | |
* This class implements the LevenshteinDistance distance of two strings or sets. | |
* This accepts strings of arbitrary lengths. | |
* | |
*/ | |
class LevenshteinDistance | |
{ | |
/** | |
* Count the number of positions that A and B differ. | |
* | |
* @param string|array $A | |
* @param string|array $B | |
* @return int The Levenshtein distance of the two strings A and B | |
*/ | |
public function dist($A, $B) | |
{ | |
if(is_array($A)) { | |
$m = count($A); | |
} | |
elseif(is_string($A)) { | |
$m = strlen($A); | |
} | |
else { | |
throw new \InvalidArgumentException( | |
"LevenshteinDistance accepts only strings or arrays" | |
); | |
} | |
if (is_array($B)){ | |
$n = count($B); | |
} | |
else if (is_string($B)){ | |
$n = strlen($B); | |
} | |
else { | |
throw new \InvalidArgumentException( | |
"LevenshteinDistance accepts only strings or arrays" | |
); | |
} | |
for($i=0;$i<=$m;$i++) $d[$i][0] = $i; | |
for($j=0;$j<=$n;$j++) $d[0][$j] = $j; | |
for($i=1;$i<=$m;$i++) { | |
for($j=1;$j<=$n;$j++) { | |
$c = ($A[$i-1] == $B[$j-1])?0:1; | |
$d[$i][$j] = min($d[$i-1][$j]+1,$d[$i][$j-1]+1,$d[$i-1][$j-1]+$c); | |
} | |
} | |
return $d[$m][$n]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment