Skip to content

Instantly share code, notes, and snippets.

@voku
Last active February 27, 2025 14:13
Show Gist options
  • Save voku/a0a2ca4f23975b9112d1872d1a7053e3 to your computer and use it in GitHub Desktop.
Save voku/a0a2ca4f23975b9112d1872d1a7053e3 to your computer and use it in GitHub Desktop.
PHP-CS-FIXER: Ensures regex function calls include a Regex101 reference
<?php
declare(strict_types=1);
require_once __DIR__ . '/AbstractFixerHelper.php';
use PhpCsFixer\Tokenizer\Tokens;
use PhpCsFixer\Tokenizer\Token;
use PhpCsFixer\Tokenizer\Analyzer\FunctionsAnalyzer;
/**
* Ensures regex function calls include a Regex101 reference as a comment.
*/
final class Regex101CommentFixer extends AbstractFixerHelper
{
private const REGEX_FUNCTIONS = [
'preg_filter',
'preg_grep',
'preg_match',
'preg_match_all',
'preg_replace',
'preg_replace_callback',
'preg_replace_callback_array',
'preg_split',
];
private FunctionsAnalyzer $functionsAnalyzer;
public function __construct()
{
parent::__construct();
$this->functionsAnalyzer = new FunctionsAnalyzer();
}
public function isCandidate(Tokens $tokens): bool
{
return $tokens->isTokenKindFound(T_STRING) && $tokens->isTokenKindFound(T_CONSTANT_ENCAPSED_STRING);
}
/**
* @param Token[]|Tokens $tokens
*/
public function applyFix($file, Tokens $tokens): void
{
foreach ($tokens as $index => $token) {
if (!$token->isGivenKind(T_STRING)) {
continue;
}
$functionName = strtolower($token->getContent());
if (!in_array($functionName, self::REGEX_FUNCTIONS, true)) {
continue;
}
// ✅ DEBUG LOGGING: Verify function detection
//echo "📌 Found regex function: {$functionName} at {$index}\n";
if (!$this->functionsAnalyzer->isGlobalFunctionCall($tokens, $index)) {
continue;
}
// Find regex pattern argument
$regexIndex = $this->findRegexPattern($tokens, $index);
if ($regexIndex === null || !isset($tokens[$regexIndex])) {
continue;
}
$regexPattern = $this->extractRegexPattern($tokens[$regexIndex]->getContent());
if (!$this->isValidRegex($regexPattern)) {
continue;
}
// ✅ DEBUG LOGGING: Verify regex extraction
//echo "✅ Extracted regex: {$regexPattern} at {$regexIndex}\n";
// Find statement start
$statementStartIndex = $this->findStatementStart($tokens, $index);
if ($statementStartIndex === null || !isset($tokens[$statementStartIndex])) {
continue;
}
// ✅ DEBUG LOGGING: Verify statement start index
//echo "🔹 Inserting comment above index: {$statementStartIndex}\n";
if ($this->hasExistingComment($tokens, $statementStartIndex)) {
//echo "⚠️ Comment already exists for {$functionName} at {$statementStartIndex}\n";
continue;
}
// Insert comment properly
$this->insertCommentAboveStatement($tokens, $statementStartIndex, $regexPattern);
}
}
private function findRegexPattern(Tokens $tokens, int $index): ?int
{
$nextIndex = $tokens->getNextTokenOfKind($index, [[T_CONSTANT_ENCAPSED_STRING]]);
if ($nextIndex !== null) {
return $nextIndex;
}
// Handle variable-assigned regex
$variableIndex = $tokens->getNextTokenOfKind($index, [[T_VARIABLE]]);
if ($variableIndex !== null) {
$assignmentIndex = $tokens->getPrevTokenOfKind($variableIndex, ['=']);
if ($assignmentIndex !== null) {
return $tokens->getNextTokenOfKind($assignmentIndex, [[T_CONSTANT_ENCAPSED_STRING]]);
}
}
return null;
}
private function extractRegexPattern(string $content): string
{
return trim($content, "'\"");
}
private function isValidRegex(string $pattern): bool
{
/* @noinspection PhpUsageOfSilenceOperatorInspection | ok here */
@preg_match($pattern, 'test');
return preg_last_error() === PREG_NO_ERROR;
}
/**
* @param Token[]|Tokens $tokens
*/
private function insertCommentAboveStatement(Tokens $tokens, int $index, string $pattern): void
{
$statementStartIndex = $this->findStatementStart($tokens, $index);
if ($statementStartIndex === null || !isset($tokens[$statementStartIndex])) {
return;
}
$indentation = $this->getIndentationLevel($tokens[$statementStartIndex]->getContent());
$infoComment = sprintf(
"%s/* INFO: %s */",
$indentation,
$this->generateRegex101Link($pattern),
);
// ✅ Ensure whitespace is correctly inserted before comment
$tokens->insertAt($statementStartIndex, new Token([T_WHITESPACE, "\n" . $indentation]));
$tokens->insertAt($statementStartIndex, new Token([T_COMMENT, $infoComment]));
}
/**
* @param Token[]|Tokens $tokens
*/
private function findStatementStart(Tokens $tokens, int $index): ?int
{
while ($index > 0) {
$prevIndex = $tokens->getPrevMeaningfulToken($index);
if ($prevIndex === null || !isset($tokens[$prevIndex])) {
return $index;
}
// ✅ Stop at semicolon, opening brace
if ($tokens[$prevIndex]->equalsAny([';', '{', '}'])) {
return $index;
}
// switch/match labels
if ($tokens[$prevIndex]->equalsAny([[T_CASE], [T_DEFAULT], [T_MATCH]])) {
return null;
}
$index = $prevIndex;
}
return $index;
}
/**
* @param Token[]|Tokens $tokens
*/
private function hasExistingComment(Tokens $tokens, int $statementStartIndex): bool
{
$prevIndex = $tokens->getPrevNonWhitespace($statementStartIndex);
if ($prevIndex === null || !isset($tokens[$prevIndex])) {
return false;
}
return $tokens[$prevIndex]->isGivenKind(T_COMMENT)
&&
str_contains($tokens[$prevIndex]->getContent(), 'https://regex101.com');
}
private function getIndentationLevel(string $content): string
{
preg_match('/^(\s*)/', $content, $matches);
return $matches[1] ?? ' ';
}
private function generateRegex101Link(string $regex): string
{
$regex = mb_substr($regex, 1, -1);
return 'https://regex101.com/?regex=' . urlencode($regex) . '&flavor=pcre';
}
public function getDocumentation(): string
{
return 'Ensures regex function calls include a Regex101 reference.';
}
public function getSampleCode(): string
{
return <<<'PHP'
<?php
/* INFO: https://regex101.com/?regex=%2Fpattern%2F&flavor=pcre */
preg_match("/pattern/", $string);
PHP;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment