Created
October 11, 2010 11:09
-
-
Save datayja/620367 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Gemstone::framework app/lib/utils/inflector.lib.php | |
* | |
* Library to provide a framework for various inflections. | |
* | |
* @author Pavel Lisa <[email protected]> | |
* @copyright 2010 Gemstone Webdesign | |
* @package framework | |
* @subpackage library | |
*/ | |
namespace Gemstone\Utils; | |
use Gemstone; | |
final class Inflector | |
{ | |
/** | |
* Initialized boolean flag | |
* @var bool | |
*/ | |
private static $initialized = false; | |
/** | |
* Hash of plurals => singulars | |
* @var hash | |
*/ | |
private static $singulars = array(); | |
/** | |
* Caching hash of already singularized strings | |
* @var hash | |
*/ | |
private static $singularized_cache = array(); | |
/** | |
* Hash of singulars => plurals | |
* @var hash | |
*/ | |
private static $plurals = array(); | |
/** | |
* Caching hash of already pluralized strings | |
* @var hash | |
*/ | |
private static $pluralized_cache = array(); | |
/** | |
* Array of uncountable words | |
* @var array | |
*/ | |
private static $uncountable = array(); | |
/** | |
* Hash of shortcuts to convert to uppercase | |
* @var hash | |
*/ | |
private static $shortcuts = array(); | |
/** | |
* Hash of already camelized strings (generic mode) | |
* @var hash | |
*/ | |
private static $camelized_generic_cache = array(); | |
/** | |
* Hash of already camelized strings (className mode) | |
* @var hash | |
*/ | |
private static $camelized_className_cache = array(); | |
/** | |
* Hash of already decamelized strings | |
* @var hash | |
*/ | |
private static $decamelized_cache = array(); | |
/** | |
* Initialize inflections for a particular language, or the default set | |
* @param string $inflections The inflections set | |
*/ | |
public static function initialize( $inflections = 'default' ) | |
{ | |
self::$singulars = array(); | |
self::$plurals = array(); | |
self::$uncountable = array(); | |
include Gemstone\Core::$config_libs_dir.'inflector.lib/'.$inflections.'.inflections.php'; | |
self::$initialized = true; | |
} | |
/** | |
* Set a singularization rule | |
* @param string $pattern The plural to match | |
* @param string $replacement The singular to replace plural | |
*/ | |
public static function singular($pattern, $replacement ) | |
{ | |
array_unshift(self::$singulars, array($pattern, $replacement )); | |
} | |
/** | |
* Set a pluralization rule | |
* @param string $pattern The singular to match | |
* @param string $replacement The plural to replace singular | |
*/ | |
public static function plural($pattern, $replacement) | |
{ | |
array_unshift(self::$plurals, array($pattern, $replacement)); | |
} | |
/** | |
* Add an uncountable word | |
* @param string $word Uncountable word | |
* @param string $... | |
*/ | |
public static function uncountable() | |
{ | |
$uncountable_input = func_get_args(); | |
for ( $i = 0; $i < count($uncountable_input); $i++) | |
{ | |
self::$uncountable[] = $uncountable_input[$i]; | |
} | |
} | |
/** | |
* Cross add irregular word | |
* @param string $singular_in Singular form of the irregular word | |
* @param string $plural_in Plural form of the irregular word | |
*/ | |
public static function irregular($singular_in, $plural_in) | |
{ | |
array_unshift(self::$plurals, array($singular_in, $plural_in)); | |
array_unshift(self::$singulars, array($plural_in, $singular_in)); | |
} | |
/** | |
* Add shortcuts for camelize | |
* @param array $input | |
*/ | |
public static function shortcuts( array $input ) | |
{ | |
self::$shortcuts = array_merge( self::$shortcuts, $input ); | |
} | |
/** | |
* Get the singular of a plural word | |
* @param string $word Plural | |
*/ | |
public static function singularize( $word ) | |
{ | |
if (is_null($word) or ( ! is_string($word)) or (strlen($word) == 0) ) | |
{ | |
return null; | |
} | |
elseif (in_array($word, self::$uncountable)) | |
{ | |
return $word; | |
} | |
elseif ( isset( self::$singularized_cache[ $word ] ) ) | |
{ | |
return self::$singularized_cache[ $word ]; | |
} | |
else | |
{ | |
for ( $i = 0; $i < count(self::$singulars); $i++) | |
{ | |
# add a '$' for irregular patterns at the end of regex | |
$regexp_end = ( strpos(self::$singulars[$i][0], '$') === false ) ? '$' : ''; | |
# test the words | |
if (preg_match('/'.self::$singulars[$i][0].$regexp_end.'/i', $word)) | |
{ | |
return self::$singularized_cache[ $word ] = preg_replace('/'.self::$singulars[$i][0].$regexp_end.'/i', self::$singulars[$i][1], $word); | |
} | |
} | |
return self::$singularized_cache[ $word ] = $word; # not detected | |
} | |
} | |
/** | |
* Get the plural of a singular word | |
* @param string $word Singular | |
*/ | |
public static function pluralize( $word ) | |
{ | |
if (is_null($word) or ( ! is_string($word)) or (strlen($word) == 0) ) | |
{ | |
return null; | |
} | |
elseif (in_array($word, self::$uncountable)) | |
{ | |
return $word; | |
} | |
elseif ( isset( self::$pluralized_cache[ $word ] ) ) | |
{ | |
return self::$pluralized_cache[ $word ]; | |
} | |
else | |
{ | |
for ( $i = 0; $i < count(self::$plurals); $i++) | |
{ | |
# add a '$' for irregular patterns at the end of regex | |
$regexp_end = ( strpos(self::$plurals[$i][0], '$') === false ) ? '$' : ''; | |
# test the words | |
if (preg_match('/'.self::$plurals[$i][0].$regexp_end.'/i', $word)) | |
{ | |
return self::$pluralized_cache[ $word ] = preg_replace('/'.self::$plurals[$i][0].$regexp_end.'/i', self::$plurals[$i][1], $word); | |
} | |
} | |
return self::$pluralized_cache[ $word ] = $word; # not detected | |
} | |
} | |
/** | |
* Convert decamelized expression to camelized | |
* @param string $input Decamelized | |
* @throws InflectorException | |
*/ | |
public static function camelize( $input, $mode = 0 ) | |
{ | |
try | |
{ | |
if ( is_string($input) ) | |
{ | |
if ( $mode === 0 ) // generic mode | |
{ | |
# cache hit | |
if ( isset( self::$camelized_generic_cache[ $input ] ) ) | |
{ | |
return self::$camelized_generic_cache[ $input ]; | |
} | |
# cache miss | |
# split input string into chunks by delimiter '_' | |
$tokenized = explode('_', $input); | |
for ( $i=0; $i < count($tokenized); $i++ ) | |
{ | |
if ( isset( self::$shortcuts[ $tokenized[$i] = strtolower($tokenized[$i]) ] ) ) | |
{ | |
$tokenized[$i] = self::$shortcuts[ $tokenized[$i] ] . '_'; | |
} | |
elseif ( strlen( $tokenized[$i] ) >= 1 ) | |
{ | |
# and each first letter of chunk uppercase | |
$tokenized[$i] = ucfirst($tokenized[$i]); | |
} | |
} | |
# return camelized expression | |
return self::$camelized_generic_cache[ $input ] = implode('', $tokenized); | |
} | |
elseif ( $mode === 1 ) // className mode | |
{ | |
# cache hit | |
if ( isset( self::$camelized_className_cache[ $input ] ) ) | |
{ | |
return self::$camelized_className_cache[ $input ]; | |
} | |
# cache miss | |
# split input string into chunks by delimiters | |
$tokenized = preg_split( '~(_|\\\\|/)~', $input, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); | |
for ( $i=0; $i < count($tokenized); $i++ ) | |
{ | |
if ( isset( self::$shortcuts[ $tokenized[$i] = strtolower($tokenized[$i]) ] ) ) | |
{ | |
$tokenized[$i] = self::$shortcuts[ $tokenized[$i] ]; | |
} | |
elseif ( strlen( $tokenized[$i] ) >= 1 ) | |
{ | |
# and each first letter of chunk uppercase | |
$tokenized[$i] = ucfirst($tokenized[$i]); | |
} | |
$tokenized[$i] = strtr($tokenized[$i], array( '/' => '\\' )); // for namespaces support | |
} | |
# return camelized expression | |
return self::$camelized_className_cache[ $input ] = implode('', $tokenized); | |
} | |
} | |
elseif ( is_array($input) ) | |
{ | |
# enables to camelize an array of decamelized strings | |
# note: it may recursively decamelize nested arrays | |
$reply = array(); | |
# camelize each string in input array | |
foreach ( $input as $camelized ) | |
{ | |
$reply[] = self::camelize($camelized); | |
# note that this may throw an exception if just a one entry of input array is not a string or array | |
} | |
return $reply; | |
} | |
else | |
{ | |
throw new \InvalidArgumentException( 'Argument must be either string or an array of strings' ); | |
} | |
} | |
catch ( \InvalidArgumentException $e ) | |
{ | |
throw new InflectorException(300, $e); | |
} | |
} | |
/** | |
* Convert camelized expression to decamelized | |
* @param string $input Camelized | |
* @throws InflectorException | |
*/ | |
public static function decamelize( $input ) | |
{ | |
try | |
{ | |
if ( is_string($input) ) | |
{ | |
# cache hit | |
if ( isset( self::$decamelized_cache[ $input ] ) ) | |
{ | |
return self::$decamelized_cache[ $input ]; | |
} | |
# cache miss | |
# split input string into chunks defined as one uppercase letter and any lowercase letters | |
$tokenized = preg_split('/([A-Z]+[a-z]+)/', $input, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); | |
for ( $i=0; $i < count($tokenized); $i++ ) | |
{ | |
# make each chunk lowercase | |
$tokenized[$i] = strtr(strtolower($tokenized[$i]), array('_' => '')); | |
} | |
# glue the chunks together by '_' | |
return self::$decamelized_cache[ $input ] = implode('_', $tokenized); | |
} | |
elseif ( is_array($input) ) | |
{ | |
# the same as with camelize method - enables decamelization of array of strings | |
# note: recursively as well | |
$reply = array(); | |
foreach ( $input as $decamelized ) | |
{ | |
$reply[] = self::decamelize($decamelized); | |
} | |
return $reply; | |
} | |
else | |
{ | |
throw new \InvalidArgumentException( 'Argument must be either string or an array of strings' ); | |
} | |
} | |
catch ( \InvalidArgumentException $e ) | |
{ | |
throw new InflectorException(300, $e); | |
} | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment