Skip to content

Instantly share code, notes, and snippets.

@ghostwriter
Last active September 13, 2024 20:50
Show Gist options
  • Save ghostwriter/8ecaeb85fbdedff474e02bab2880ee9d to your computer and use it in GitHub Desktop.
Save ghostwriter/8ecaeb85fbdedff474e02bab2880ee9d to your computer and use it in GitHub Desktop.
abnf-to-regex generator output (https://github.com/ghostwriter/abnf)
(?(DEFINE)
# Basic components
(?<ALPHA>[A-Za-z])
(?<BIT>[01])
(?<DIGIT>[0-9])
(?<HEXDIG>[0-9A-Fa-f])
(?<CHAR>[\x01-\x7F])
(?<VCHAR>[\x21-\x7E])
(?<SP>[\x20])
(?<WSP>[\x20\x09])
(?<CR>\x0D)
(?<LF>\x0A)
(?<HTAB>\x09)
(?<DQUOTE>\x22)
(?<CTL>[\x00-\x1F\x7F])
(?<OCTET>[\x00-\xFF])
(?<CRLF>(?:\x0D?\x0A))
# Whitespace and Comment handling
(?<C_WSP>(?:(?&WSP)|(?:(?&C_NL)(?&WSP))))
(?<C_NL>(?:(?&Comment)|(?&CRLF)))
(?<Comment>[\x3B](?:(?&WSP)|(?&VCHAR))*(?&CRLF))
(?<LWSP>(?:([\x20\x09]|[\x0D?\x0A][\x20\x09])*))
# Numerical and Character value definitions
(?<BinVal>[\x62](?&BIT)+(?:(?:[\x2E](?&BIT)+)+|(?:[\x2D](?&BIT)+))?)
(?<DecVal>[\x64](?&DIGIT)+(?:(?:[\x2E](?&DIGIT)+)+|(?:[\x2D](?&DIGIT)+))?)
(?<HexVal>[\x58\x78](?&HEXDIG)+(?:(?:[\x2E](?&HEXDIG)+)+|(?:[\x2D](?&HEXDIG)+))?)
(?<NumVal>[\x25](?:(?&BinVal)|(?&DecVal)|(?&HexVal)))
(?<CharVal>[\x22](?:[\x20-\x21]|[\x23-\x7E])*[\x22])
(?<ProseVal>[\x3C](?:[\x20-\x3D]|[\x3F-\x7E])*[\x3E])
# Rule identifiers and structure components
(?<Rulename>(?&ALPHA)(?:(?&ALPHA)|(?&DIGIT)|[\x2D])*)
(?<DefinedAs>(?:(?&C_WSP)*(?:[\x3D]|[\x3D][\x2F])(?&C_WSP)*))
# High-level elements
(?<Element>(?:(?&Rulename)|(?&Group)|(?&Option)|(?&CharVal)|(?&NumVal)|(?&ProseVal)))
(?<Group>[\x28](?&C_WSP)*(?&Alternation)(?&C_WSP)*[\x29])
(?<Option>[\x5B](?&C_WSP)*(?&Alternation)(?&C_WSP)*[\x5D])
# Repetition, Concatenation, and Alternation
(?<Repeat>(?:(?:(?&DIGIT)+|(?:(?&DIGIT)*[\x2A](?&DIGIT)*))))
(?<Repetition>(?:(?&Repeat)?(?&Element)))
(?<Concatenation>(?&Repetition)(?:(?&C_WSP)+(?&Repetition))*)
(?<Alternation>(?&Concatenation)(?:(?&C_WSP)*[\x2F](?&C_WSP)*(?&Concatenation))*)
# Full rule and rule list
(?<Rule>(?&Rulename)(?&DefinedAs)(?&Elements)(?&C_NL))
(?<Elements>(?&Alternation)(?&C_WSP)*)
(?<RuleList>(?:(?&Rule)|(?:(?&C_WSP)*(?&C_NL)))+)
)
# Entry point for full rule list
^(?&RuleList)$
@ghostwriter
Copy link
Author

with 'ux' modifiers for unicode and enable extended mode

@ghostwriter
Copy link
Author

<?php

#BLM

final class ABNFParser
{
    private const ABNF_REGEX = <<<'REGEX'
    (?(DEFINE)
        (?<ALPHA>[A-Za-z])
        (?<DIGIT>[0-9])
        (?<SP>[\x20])
        (?<WSP>[(?&SP)\x09])
        (?<CR>\x0D)
        (?<LF>\x0A)
        (?<Repeat>(?:(?:(?&DIGIT)+(?:(?&DIGIT)*[\x2A](?&DIGIT)*))))
    )
    REGEX;

    public function parse(string $input, string $rule): ?string
    {
        // Build the query pattern dynamically based on the provided rule
        $pattern = sprintf('#%s(?&%s)#Aux', self::ABNF_REGEX, $rule);
        
        if (preg_match($pattern, $input, $matches)) {
            return $matches[0];
        }

        return null;
    }
}

# Usage
$parser = new ABNFParser();

$input = '2*4';
$rule = 'Repeat';
$parsedResult = $parser->parse($input, $rule);

var_dump($parsedResult); // Outputs: string(3) "2*4"

https://3v4l.org/Agmbt Outputs: string(3) "2*4"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment