Created
January 2, 2018 02:27
-
-
Save jtejido/1dc2684f63cd10480e02ea16eb962746 to your computer and use it in GitHub Desktop.
OverlapCoefficientSimilarity extension for php-nlp-tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Utilities; | |
/** | |
* https://en.wikipedia.org/wiki/Overlap_coefficient | |
* AKA Szymkiewicz-Simpson coefficient | |
*/ | |
class OverlapCoefficient | |
{ | |
/** | |
* The similarity returned by this algorithm is a number between 0,1 | |
*/ | |
public function similarity(&$A, &$B) | |
{ | |
$a = array_fill_keys($A,1); | |
$b = array_fill_keys($B,1); | |
$intersect = count(array_intersect_key($a,$b)); | |
$a_count = count($a); | |
$b_count = count($b); | |
return $intersect/min($a_count,$b_count); | |
} | |
public function dist(&$A, &$B) | |
{ | |
return 1-$this->similarity($A,$B); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment