Last active
February 27, 2025 14:13
-
-
Save voku/a0a2ca4f23975b9112d1872d1a7053e3 to your computer and use it in GitHub Desktop.
PHP-CS-FIXER: Ensures regex function calls include a Regex101 reference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types=1); | |
require_once __DIR__ . '/AbstractFixerHelper.php'; | |
use PhpCsFixer\Tokenizer\Tokens; | |
use PhpCsFixer\Tokenizer\Token; | |
use PhpCsFixer\Tokenizer\Analyzer\FunctionsAnalyzer; | |
/** | |
* Ensures regex function calls include a Regex101 reference as a comment. | |
*/ | |
final class Regex101CommentFixer extends AbstractFixerHelper | |
{ | |
private const REGEX_FUNCTIONS = [ | |
'preg_filter', | |
'preg_grep', | |
'preg_match', | |
'preg_match_all', | |
'preg_replace', | |
'preg_replace_callback', | |
'preg_replace_callback_array', | |
'preg_split', | |
]; | |
private FunctionsAnalyzer $functionsAnalyzer; | |
public function __construct() | |
{ | |
parent::__construct(); | |
$this->functionsAnalyzer = new FunctionsAnalyzer(); | |
} | |
public function isCandidate(Tokens $tokens): bool | |
{ | |
return $tokens->isTokenKindFound(T_STRING) && $tokens->isTokenKindFound(T_CONSTANT_ENCAPSED_STRING); | |
} | |
/** | |
* @param Token[]|Tokens $tokens | |
*/ | |
public function applyFix($file, Tokens $tokens): void | |
{ | |
foreach ($tokens as $index => $token) { | |
if (!$token->isGivenKind(T_STRING)) { | |
continue; | |
} | |
$functionName = strtolower($token->getContent()); | |
if (!in_array($functionName, self::REGEX_FUNCTIONS, true)) { | |
continue; | |
} | |
// ✅ DEBUG LOGGING: Verify function detection | |
//echo "📌 Found regex function: {$functionName} at {$index}\n"; | |
if (!$this->functionsAnalyzer->isGlobalFunctionCall($tokens, $index)) { | |
continue; | |
} | |
// Find regex pattern argument | |
$regexIndex = $this->findRegexPattern($tokens, $index); | |
if ($regexIndex === null || !isset($tokens[$regexIndex])) { | |
continue; | |
} | |
$regexPattern = $this->extractRegexPattern($tokens[$regexIndex]->getContent()); | |
if (!$this->isValidRegex($regexPattern)) { | |
continue; | |
} | |
// ✅ DEBUG LOGGING: Verify regex extraction | |
//echo "✅ Extracted regex: {$regexPattern} at {$regexIndex}\n"; | |
// Find statement start | |
$statementStartIndex = $this->findStatementStart($tokens, $index); | |
if ($statementStartIndex === null || !isset($tokens[$statementStartIndex])) { | |
continue; | |
} | |
// ✅ DEBUG LOGGING: Verify statement start index | |
//echo "🔹 Inserting comment above index: {$statementStartIndex}\n"; | |
if ($this->hasExistingComment($tokens, $statementStartIndex)) { | |
//echo "⚠️ Comment already exists for {$functionName} at {$statementStartIndex}\n"; | |
continue; | |
} | |
// Insert comment properly | |
$this->insertCommentAboveStatement($tokens, $statementStartIndex, $regexPattern); | |
} | |
} | |
private function findRegexPattern(Tokens $tokens, int $index): ?int | |
{ | |
$nextIndex = $tokens->getNextTokenOfKind($index, [[T_CONSTANT_ENCAPSED_STRING]]); | |
if ($nextIndex !== null) { | |
return $nextIndex; | |
} | |
// Handle variable-assigned regex | |
$variableIndex = $tokens->getNextTokenOfKind($index, [[T_VARIABLE]]); | |
if ($variableIndex !== null) { | |
$assignmentIndex = $tokens->getPrevTokenOfKind($variableIndex, ['=']); | |
if ($assignmentIndex !== null) { | |
return $tokens->getNextTokenOfKind($assignmentIndex, [[T_CONSTANT_ENCAPSED_STRING]]); | |
} | |
} | |
return null; | |
} | |
private function extractRegexPattern(string $content): string | |
{ | |
return trim($content, "'\""); | |
} | |
private function isValidRegex(string $pattern): bool | |
{ | |
/* @noinspection PhpUsageOfSilenceOperatorInspection | ok here */ | |
@preg_match($pattern, 'test'); | |
return preg_last_error() === PREG_NO_ERROR; | |
} | |
/** | |
* @param Token[]|Tokens $tokens | |
*/ | |
private function insertCommentAboveStatement(Tokens $tokens, int $index, string $pattern): void | |
{ | |
$statementStartIndex = $this->findStatementStart($tokens, $index); | |
if ($statementStartIndex === null || !isset($tokens[$statementStartIndex])) { | |
return; | |
} | |
$indentation = $this->getIndentationLevel($tokens[$statementStartIndex]->getContent()); | |
$infoComment = sprintf( | |
"%s/* INFO: %s */", | |
$indentation, | |
$this->generateRegex101Link($pattern), | |
); | |
// ✅ Ensure whitespace is correctly inserted before comment | |
$tokens->insertAt($statementStartIndex, new Token([T_WHITESPACE, "\n" . $indentation])); | |
$tokens->insertAt($statementStartIndex, new Token([T_COMMENT, $infoComment])); | |
} | |
/** | |
* @param Token[]|Tokens $tokens | |
*/ | |
private function findStatementStart(Tokens $tokens, int $index): ?int | |
{ | |
while ($index > 0) { | |
$prevIndex = $tokens->getPrevMeaningfulToken($index); | |
if ($prevIndex === null || !isset($tokens[$prevIndex])) { | |
return $index; | |
} | |
// ✅ Stop at semicolon, opening brace | |
if ($tokens[$prevIndex]->equalsAny([';', '{', '}'])) { | |
return $index; | |
} | |
// switch/match labels | |
if ($tokens[$prevIndex]->equalsAny([[T_CASE], [T_DEFAULT], [T_MATCH]])) { | |
return null; | |
} | |
$index = $prevIndex; | |
} | |
return $index; | |
} | |
/** | |
* @param Token[]|Tokens $tokens | |
*/ | |
private function hasExistingComment(Tokens $tokens, int $statementStartIndex): bool | |
{ | |
$prevIndex = $tokens->getPrevNonWhitespace($statementStartIndex); | |
if ($prevIndex === null || !isset($tokens[$prevIndex])) { | |
return false; | |
} | |
return $tokens[$prevIndex]->isGivenKind(T_COMMENT) | |
&& | |
str_contains($tokens[$prevIndex]->getContent(), 'https://regex101.com'); | |
} | |
private function getIndentationLevel(string $content): string | |
{ | |
preg_match('/^(\s*)/', $content, $matches); | |
return $matches[1] ?? ' '; | |
} | |
private function generateRegex101Link(string $regex): string | |
{ | |
$regex = mb_substr($regex, 1, -1); | |
return 'https://regex101.com/?regex=' . urlencode($regex) . '&flavor=pcre'; | |
} | |
public function getDocumentation(): string | |
{ | |
return 'Ensures regex function calls include a Regex101 reference.'; | |
} | |
public function getSampleCode(): string | |
{ | |
return <<<'PHP' | |
<?php | |
/* INFO: https://regex101.com/?regex=%2Fpattern%2F&flavor=pcre */ | |
preg_match("/pattern/", $string); | |
PHP; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment