Last active
January 11, 2018 14:46
-
-
Save jtejido/c5a83bbb830b02fc69a8b7e40a9549f3 to your computer and use it in GitHub Desktop.
TermFrequency extension for php-nlp-tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Library; | |
use NlpTools\Analysis\FreqDist; | |
class TermFrequency extends FreqDist | |
{ | |
const FREQUENCY_MODE = 1; | |
const BOOLEAN_MODE = 2; | |
const LOGARITHMIC_MODE = 3; | |
const AUGMENTED_MODE = 4; | |
const COUNT_MODE = 5; | |
public function __construct(array $tokens, $mode=self::FREQUENCY_MODE) | |
{ | |
parent::__construct($tokens); | |
$this->mode = $mode; | |
} | |
public function getTf($term) | |
{ | |
$count = $this->getTotalByToken($term); | |
if(!$count) { | |
return 0; | |
} else { | |
switch($this->mode) { | |
case self::BOOLEAN_MODE: | |
return 1; | |
case self::LOGARITHMIC_MODE: | |
return (1 + log($count)); | |
case self::AUGMENTED_MODE: | |
return 0.5 + (0.5 * ($count / current($this->getKeyValues()))); | |
case self::COUNT_MODE: | |
return $this->getTotalByToken($term); | |
case self::FREQUENCY_MODE: | |
default: | |
return $count/$this->getTotalTokens(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment