Skip to content

Instantly share code, notes, and snippets.

@jtejido
Last active January 11, 2018 14:46
Show Gist options
  • Save jtejido/c5a83bbb830b02fc69a8b7e40a9549f3 to your computer and use it in GitHub Desktop.
Save jtejido/c5a83bbb830b02fc69a8b7e40a9549f3 to your computer and use it in GitHub Desktop.
TermFrequency extension for php-nlp-tools
<?php
namespace App\Library;
use NlpTools\Analysis\FreqDist;
class TermFrequency extends FreqDist
{
const FREQUENCY_MODE = 1;
const BOOLEAN_MODE = 2;
const LOGARITHMIC_MODE = 3;
const AUGMENTED_MODE = 4;
const COUNT_MODE = 5;
public function __construct(array $tokens, $mode=self::FREQUENCY_MODE)
{
parent::__construct($tokens);
$this->mode = $mode;
}
public function getTf($term)
{
$count = $this->getTotalByToken($term);
if(!$count) {
return 0;
} else {
switch($this->mode) {
case self::BOOLEAN_MODE:
return 1;
case self::LOGARITHMIC_MODE:
return (1 + log($count));
case self::AUGMENTED_MODE:
return 0.5 + (0.5 * ($count / current($this->getKeyValues())));
case self::COUNT_MODE:
return $this->getTotalByToken($term);
case self::FREQUENCY_MODE:
default:
return $count/$this->getTotalTokens();
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment