Last active
September 13, 2024 20:50
-
-
Save ghostwriter/8ecaeb85fbdedff474e02bab2880ee9d to your computer and use it in GitHub Desktop.
abnf-to-regex generator output (https://github.com/ghostwriter/abnf)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(?(DEFINE) | |
# Basic components | |
(?<ALPHA>[A-Za-z]) | |
(?<BIT>[01]) | |
(?<DIGIT>[0-9]) | |
(?<HEXDIG>[0-9A-Fa-f]) | |
(?<CHAR>[\x01-\x7F]) | |
(?<VCHAR>[\x21-\x7E]) | |
(?<SP>[\x20]) | |
(?<WSP>[\x20\x09]) | |
(?<CR>\x0D) | |
(?<LF>\x0A) | |
(?<HTAB>\x09) | |
(?<DQUOTE>\x22) | |
(?<CTL>[\x00-\x1F\x7F]) | |
(?<OCTET>[\x00-\xFF]) | |
(?<CRLF>(?:\x0D?\x0A)) | |
# Whitespace and Comment handling | |
(?<C_WSP>(?:(?&WSP)|(?:(?&C_NL)(?&WSP)))) | |
(?<C_NL>(?:(?&Comment)|(?&CRLF))) | |
(?<Comment>[\x3B](?:(?&WSP)|(?&VCHAR))*(?&CRLF)) | |
(?<LWSP>(?:([\x20\x09]|[\x0D?\x0A][\x20\x09])*)) | |
# Numerical and Character value definitions | |
(?<BinVal>[\x62](?&BIT)+(?:(?:[\x2E](?&BIT)+)+|(?:[\x2D](?&BIT)+))?) | |
(?<DecVal>[\x64](?&DIGIT)+(?:(?:[\x2E](?&DIGIT)+)+|(?:[\x2D](?&DIGIT)+))?) | |
(?<HexVal>[\x58\x78](?&HEXDIG)+(?:(?:[\x2E](?&HEXDIG)+)+|(?:[\x2D](?&HEXDIG)+))?) | |
(?<NumVal>[\x25](?:(?&BinVal)|(?&DecVal)|(?&HexVal))) | |
(?<CharVal>[\x22](?:[\x20-\x21]|[\x23-\x7E])*[\x22]) | |
(?<ProseVal>[\x3C](?:[\x20-\x3D]|[\x3F-\x7E])*[\x3E]) | |
# Rule identifiers and structure components | |
(?<Rulename>(?&ALPHA)(?:(?&ALPHA)|(?&DIGIT)|[\x2D])*) | |
(?<DefinedAs>(?:(?&C_WSP)*(?:[\x3D]|[\x3D][\x2F])(?&C_WSP)*)) | |
# High-level elements | |
(?<Element>(?:(?&Rulename)|(?&Group)|(?&Option)|(?&CharVal)|(?&NumVal)|(?&ProseVal))) | |
(?<Group>[\x28](?&C_WSP)*(?&Alternation)(?&C_WSP)*[\x29]) | |
(?<Option>[\x5B](?&C_WSP)*(?&Alternation)(?&C_WSP)*[\x5D]) | |
# Repetition, Concatenation, and Alternation | |
(?<Repeat>(?:(?:(?&DIGIT)+|(?:(?&DIGIT)*[\x2A](?&DIGIT)*)))) | |
(?<Repetition>(?:(?&Repeat)?(?&Element))) | |
(?<Concatenation>(?&Repetition)(?:(?&C_WSP)+(?&Repetition))*) | |
(?<Alternation>(?&Concatenation)(?:(?&C_WSP)*[\x2F](?&C_WSP)*(?&Concatenation))*) | |
# Full rule and rule list | |
(?<Rule>(?&Rulename)(?&DefinedAs)(?&Elements)(?&C_NL)) | |
(?<Elements>(?&Alternation)(?&C_WSP)*) | |
(?<RuleList>(?:(?&Rule)|(?:(?&C_WSP)*(?&C_NL)))+) | |
) | |
# Entry point for full rule list | |
^(?&RuleList)$ |
<?php
#BLM
final class ABNFParser
{
private const ABNF_REGEX = <<<'REGEX'
(?(DEFINE)
(?<ALPHA>[A-Za-z])
(?<DIGIT>[0-9])
(?<SP>[\x20])
(?<WSP>[(?&SP)\x09])
(?<CR>\x0D)
(?<LF>\x0A)
(?<Repeat>(?:(?:(?&DIGIT)+(?:(?&DIGIT)*[\x2A](?&DIGIT)*))))
)
REGEX;
public function parse(string $input, string $rule): ?string
{
// Build the query pattern dynamically based on the provided rule
$pattern = sprintf('#%s(?&%s)#Aux', self::ABNF_REGEX, $rule);
if (preg_match($pattern, $input, $matches)) {
return $matches[0];
}
return null;
}
}
# Usage
$parser = new ABNFParser();
$input = '2*4';
$rule = 'Repeat';
$parsedResult = $parser->parse($input, $rule);
var_dump($parsedResult); // Outputs: string(3) "2*4"
https://3v4l.org/Agmbt Outputs: string(3) "2*4"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
with 'ux' modifiers for unicode and enable extended mode