Skip to content

Instantly share code, notes, and snippets.

@jtejido
Created January 2, 2018 02:27
Show Gist options
  • Save jtejido/1dc2684f63cd10480e02ea16eb962746 to your computer and use it in GitHub Desktop.
Save jtejido/1dc2684f63cd10480e02ea16eb962746 to your computer and use it in GitHub Desktop.
OverlapCoefficientSimilarity extension for php-nlp-tools
<?php
namespace App\Utilities;
/**
* https://en.wikipedia.org/wiki/Overlap_coefficient
* AKA Szymkiewicz-Simpson coefficient
*/
class OverlapCoefficient
{
/**
* The similarity returned by this algorithm is a number between 0,1
*/
public function similarity(&$A, &$B)
{
$a = array_fill_keys($A,1);
$b = array_fill_keys($B,1);
$intersect = count(array_intersect_key($a,$b));
$a_count = count($a);
$b_count = count($b);
return $intersect/min($a_count,$b_count);
}
public function dist(&$A, &$B)
{
return 1-$this->similarity($A,$B);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment