Skip to content

Instantly share code, notes, and snippets.

@datayja
Created October 11, 2010 11:09
Show Gist options
  • Save datayja/620367 to your computer and use it in GitHub Desktop.
Save datayja/620367 to your computer and use it in GitHub Desktop.
<?php
/**
* Gemstone::framework app/lib/utils/inflector.lib.php
*
* Library to provide a framework for various inflections.
*
* @author Pavel Lisa <[email protected]>
* @copyright 2010 Gemstone Webdesign
* @package framework
* @subpackage library
*/
namespace Gemstone\Utils;
use Gemstone;
final class Inflector
{
/**
* Initialized boolean flag
* @var bool
*/
private static $initialized = false;
/**
* Hash of plurals => singulars
* @var hash
*/
private static $singulars = array();
/**
* Caching hash of already singularized strings
* @var hash
*/
private static $singularized_cache = array();
/**
* Hash of singulars => plurals
* @var hash
*/
private static $plurals = array();
/**
* Caching hash of already pluralized strings
* @var hash
*/
private static $pluralized_cache = array();
/**
* Array of uncountable words
* @var array
*/
private static $uncountable = array();
/**
* Hash of shortcuts to convert to uppercase
* @var hash
*/
private static $shortcuts = array();
/**
* Hash of already camelized strings (generic mode)
* @var hash
*/
private static $camelized_generic_cache = array();
/**
* Hash of already camelized strings (className mode)
* @var hash
*/
private static $camelized_className_cache = array();
/**
* Hash of already decamelized strings
* @var hash
*/
private static $decamelized_cache = array();
/**
* Initialize inflections for a particular language, or the default set
* @param string $inflections The inflections set
*/
public static function initialize( $inflections = 'default' )
{
self::$singulars = array();
self::$plurals = array();
self::$uncountable = array();
include Gemstone\Core::$config_libs_dir.'inflector.lib/'.$inflections.'.inflections.php';
self::$initialized = true;
}
/**
* Set a singularization rule
* @param string $pattern The plural to match
* @param string $replacement The singular to replace plural
*/
public static function singular($pattern, $replacement )
{
array_unshift(self::$singulars, array($pattern, $replacement ));
}
/**
* Set a pluralization rule
* @param string $pattern The singular to match
* @param string $replacement The plural to replace singular
*/
public static function plural($pattern, $replacement)
{
array_unshift(self::$plurals, array($pattern, $replacement));
}
/**
* Add an uncountable word
* @param string $word Uncountable word
* @param string $...
*/
public static function uncountable()
{
$uncountable_input = func_get_args();
for ( $i = 0; $i < count($uncountable_input); $i++)
{
self::$uncountable[] = $uncountable_input[$i];
}
}
/**
* Cross add irregular word
* @param string $singular_in Singular form of the irregular word
* @param string $plural_in Plural form of the irregular word
*/
public static function irregular($singular_in, $plural_in)
{
array_unshift(self::$plurals, array($singular_in, $plural_in));
array_unshift(self::$singulars, array($plural_in, $singular_in));
}
/**
* Add shortcuts for camelize
* @param array $input
*/
public static function shortcuts( array $input )
{
self::$shortcuts = array_merge( self::$shortcuts, $input );
}
/**
* Get the singular of a plural word
* @param string $word Plural
*/
public static function singularize( $word )
{
if (is_null($word) or ( ! is_string($word)) or (strlen($word) == 0) )
{
return null;
}
elseif (in_array($word, self::$uncountable))
{
return $word;
}
elseif ( isset( self::$singularized_cache[ $word ] ) )
{
return self::$singularized_cache[ $word ];
}
else
{
for ( $i = 0; $i < count(self::$singulars); $i++)
{
# add a '$' for irregular patterns at the end of regex
$regexp_end = ( strpos(self::$singulars[$i][0], '$') === false ) ? '$' : '';
# test the words
if (preg_match('/'.self::$singulars[$i][0].$regexp_end.'/i', $word))
{
return self::$singularized_cache[ $word ] = preg_replace('/'.self::$singulars[$i][0].$regexp_end.'/i', self::$singulars[$i][1], $word);
}
}
return self::$singularized_cache[ $word ] = $word; # not detected
}
}
/**
* Get the plural of a singular word
* @param string $word Singular
*/
public static function pluralize( $word )
{
if (is_null($word) or ( ! is_string($word)) or (strlen($word) == 0) )
{
return null;
}
elseif (in_array($word, self::$uncountable))
{
return $word;
}
elseif ( isset( self::$pluralized_cache[ $word ] ) )
{
return self::$pluralized_cache[ $word ];
}
else
{
for ( $i = 0; $i < count(self::$plurals); $i++)
{
# add a '$' for irregular patterns at the end of regex
$regexp_end = ( strpos(self::$plurals[$i][0], '$') === false ) ? '$' : '';
# test the words
if (preg_match('/'.self::$plurals[$i][0].$regexp_end.'/i', $word))
{
return self::$pluralized_cache[ $word ] = preg_replace('/'.self::$plurals[$i][0].$regexp_end.'/i', self::$plurals[$i][1], $word);
}
}
return self::$pluralized_cache[ $word ] = $word; # not detected
}
}
/**
* Convert decamelized expression to camelized
* @param string $input Decamelized
* @throws InflectorException
*/
public static function camelize( $input, $mode = 0 )
{
try
{
if ( is_string($input) )
{
if ( $mode === 0 ) // generic mode
{
# cache hit
if ( isset( self::$camelized_generic_cache[ $input ] ) )
{
return self::$camelized_generic_cache[ $input ];
}
# cache miss
# split input string into chunks by delimiter '_'
$tokenized = explode('_', $input);
for ( $i=0; $i < count($tokenized); $i++ )
{
if ( isset( self::$shortcuts[ $tokenized[$i] = strtolower($tokenized[$i]) ] ) )
{
$tokenized[$i] = self::$shortcuts[ $tokenized[$i] ] . '_';
}
elseif ( strlen( $tokenized[$i] ) >= 1 )
{
# and each first letter of chunk uppercase
$tokenized[$i] = ucfirst($tokenized[$i]);
}
}
# return camelized expression
return self::$camelized_generic_cache[ $input ] = implode('', $tokenized);
}
elseif ( $mode === 1 ) // className mode
{
# cache hit
if ( isset( self::$camelized_className_cache[ $input ] ) )
{
return self::$camelized_className_cache[ $input ];
}
# cache miss
# split input string into chunks by delimiters
$tokenized = preg_split( '~(_|\\\\|/)~', $input, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
for ( $i=0; $i < count($tokenized); $i++ )
{
if ( isset( self::$shortcuts[ $tokenized[$i] = strtolower($tokenized[$i]) ] ) )
{
$tokenized[$i] = self::$shortcuts[ $tokenized[$i] ];
}
elseif ( strlen( $tokenized[$i] ) >= 1 )
{
# and each first letter of chunk uppercase
$tokenized[$i] = ucfirst($tokenized[$i]);
}
$tokenized[$i] = strtr($tokenized[$i], array( '/' => '\\' )); // for namespaces support
}
# return camelized expression
return self::$camelized_className_cache[ $input ] = implode('', $tokenized);
}
}
elseif ( is_array($input) )
{
# enables to camelize an array of decamelized strings
# note: it may recursively decamelize nested arrays
$reply = array();
# camelize each string in input array
foreach ( $input as $camelized )
{
$reply[] = self::camelize($camelized);
# note that this may throw an exception if just a one entry of input array is not a string or array
}
return $reply;
}
else
{
throw new \InvalidArgumentException( 'Argument must be either string or an array of strings' );
}
}
catch ( \InvalidArgumentException $e )
{
throw new InflectorException(300, $e);
}
}
/**
* Convert camelized expression to decamelized
* @param string $input Camelized
* @throws InflectorException
*/
public static function decamelize( $input )
{
try
{
if ( is_string($input) )
{
# cache hit
if ( isset( self::$decamelized_cache[ $input ] ) )
{
return self::$decamelized_cache[ $input ];
}
# cache miss
# split input string into chunks defined as one uppercase letter and any lowercase letters
$tokenized = preg_split('/([A-Z]+[a-z]+)/', $input, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
for ( $i=0; $i < count($tokenized); $i++ )
{
# make each chunk lowercase
$tokenized[$i] = strtr(strtolower($tokenized[$i]), array('_' => ''));
}
# glue the chunks together by '_'
return self::$decamelized_cache[ $input ] = implode('_', $tokenized);
}
elseif ( is_array($input) )
{
# the same as with camelize method - enables decamelization of array of strings
# note: recursively as well
$reply = array();
foreach ( $input as $decamelized )
{
$reply[] = self::decamelize($decamelized);
}
return $reply;
}
else
{
throw new \InvalidArgumentException( 'Argument must be either string or an array of strings' );
}
}
catch ( \InvalidArgumentException $e )
{
throw new InflectorException(300, $e);
}
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment