Last active
May 5, 2022 01:54
-
-
Save supposedly/4b20bdb4051e81a6b482da9d548cde77 to your computer and use it in GitHub Desktop.
u have become the very thing u swore to destroy (a regex DSL guy)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class _Group: | |
def __init__(self, *patterns, name=None): | |
self._patterns = list(patterns) | |
self._name = name | |
self._force_group = False | |
@property | |
def name(self): | |
if self._name is None: | |
return '?:' | |
if not self._name: | |
return '' | |
return f'?P<{self._name!s}>' | |
def join(self, other): | |
return self.__class__(*self._patterns, other, name=self._name) | |
def num_children(self): | |
total = 0 | |
for pattern in self._patterns: | |
total += pattern.num_children() if isinstance(pattern, _Group) else 1 | |
return total | |
def _to_group(self, separator=None): | |
if separator is None: | |
separator = self.SEPARATOR | |
members = self.to_string(separator) | |
if self.num_children() <= 1 and not self._force_group: | |
return members | |
return f'({self.name}{self.to_string(separator)})' | |
def to_string(self, separator): | |
return separator.join( | |
[p._to_group() if isinstance(p, _Group) else str(p) for p in self._patterns] | |
) | |
def _join(self, *patterns, cls): | |
accumulator = self | |
for pat in patterns: | |
if not isinstance(pat, _Group): | |
pat = cls(pat) | |
if isinstance(accumulator, cls) and not ( | |
accumulator._force_group or pat._force_group | |
): | |
accumulator = accumulator.join(pat) | |
else: | |
accumulator = cls(accumulator, pat, name=None) | |
return accumulator | |
def Or(self, *patterns): | |
return self._join(*patterns, cls=Or) | |
def And(self, *patterns): | |
return self._join(*patterns, cls=And) | |
def Group(self, suffix=None): | |
if suffix is not None: | |
stupid_hack = self.Group().And(suffix) | |
stupid_hack.Named = lambda name: setattr(self, '_name', name) or stupid_hack | |
return stupid_hack | |
self._force_group = True | |
self._name = '' | |
return self | |
def Named(self, name): | |
self._name = name | |
return self | |
class Or(_Group): | |
SEPARATOR = '|' | |
def __init__(self, *patterns, name=None): | |
super().__init__(*patterns, name=name) | |
def __str__(self): | |
return super().to_string(self.SEPARATOR) | |
class And(_Group): | |
SEPARATOR = '' | |
def __init__(self, *patterns, name=None): | |
super().__init__(*patterns, name=name) | |
def __str__(self): | |
return super().to_string(self.SEPARATOR) | |
Regex = Or().And |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> str(Regex('foo').Or('bar').And('baz')) | |
(?:foo|bar)baz | |
>>> str(Regex('foo').Or('bar').Group().And('baz')) | |
(foo|bar)baz | |
>>> str(Regex('foo').And('bar').Or('baz')) # superfluous group is fixable but lazy | |
(?:foobar)|bazz | |
>>> str(Regex('foo').And('bar').Or('baz').Or('whatevers next')) | |
(?:foobar)|baz|whatevers next | |
>>> str(Regex('foo').And('bar').Or('baz').Group().Named('aaa').Or('whatevers next')) | |
(?P<aaa>(?:foobar)|baz)|whatevers next | |
>>> you can also do .Group('whatever suffix') to suffix the group with something (eg '?' or '{1, 2}') | |
Syntax error |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment