Created
May 15, 2026 15:14
-
-
Save ptmcg/b636425d646b8e6d9bf5acf7c9d53401 to your computer and use it in GitHub Desktop.
Simplified use of re.sub by making it a decorator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from typing import Callable, Union | |
| def make_old_style_acronym_sub(match): | |
| return f"{'.'.join(match[0])}." | |
| def make_old_style_acronyms(text: str) -> str: | |
| return re.sub( | |
| r"\b[A-Z]{2,}\b", | |
| make_old_style_acronym_sub, | |
| text | |
| ) | |
| print(make_old_style_acronyms("I used to work at GE but now I work at IBM")) | |
| def simpler_re_sub(regex: str): | |
| # when a decorator takes arguments, it must become a decorator factory | |
| def _inner(func: Callable[[str], str]) -> Callable[[str], str]: | |
| def wrapper(s: str) -> str: | |
| return re.sub(regex, lambda m: func(m[0]), s) | |
| # don't use functools.wraps, since we change the signature | |
| wrapper.__name__ = func.__name__ | |
| wrapper.__doc__ = func.__doc__ | |
| return wrapper | |
| return _inner | |
| def show_args_and_return(fn): | |
| def _inner(*args, **kwargs): | |
| print(f"{args=} {kwargs=}", end=" ") | |
| ret = fn(*args, **kwargs) | |
| print(f"{ret=}") | |
| return ret | |
| return _inner | |
| @simpler_re_sub(r"\b[A-Z]{2,}\b") | |
| @show_args_and_return | |
| def make_old_style_acronyms(s: str) -> str: | |
| return f"{'.'.join(s)}." | |
| print(make_old_style_acronyms("I used to work at GE but now I work at IBM")) | |
| # full implementation, supporting groups and named groups | |
| type UserTransform = Callable | |
| def re_sub( | |
| pattern: str, *, flags: int | re.RegexFlag = 0, count: int = 0 | |
| ) -> Callable[[UserTransform], Callable[[str], str]]: | |
| """ | |
| Return a function that substitutes the pattern in a string. | |
| The decorated function should have one of these signatures: | |
| - a single Python string | |
| - strings for capture groups 0 through n passed as args | |
| - named arguments corresponding to the pattern's named capture group | |
| strings | |
| and return the replacement string for that particular match group. | |
| re.sub will call this function for every match found in the string. | |
| """ | |
| pattern_re = re.compile(pattern, flags=flags) | |
| # determine whether this is an ungrouped, grouped, or named group pattern | |
| has_groups = bool(pattern_re.groups) | |
| has_names = bool(pattern_re.groupindex) | |
| def _inner(func: UserTransform) -> Callable[[str], str]: | |
| """ | |
| Decorator that applies the substitution using the function to | |
| transform every match found in an input string. | |
| """ | |
| if has_names: | |
| sub_fn = lambda m: func(**m.groupdict()) | |
| elif has_groups: | |
| sub_fn = lambda m: func(*m.groups()) | |
| else: | |
| sub_fn = lambda m: func(m[0]) | |
| def wrapper(s: str) -> str: | |
| return pattern_re.sub(sub_fn, s, count=count) | |
| # don't use functools.wraps here because we are changing the signature | |
| wrapper.__name__ = func.__name__ | |
| wrapper.__doc__ = func.__doc__ | |
| return wrapper | |
| return _inner | |
| @re_sub(r"[a-z][A-Z]") | |
| def convert_camel_to_snake1(lower_upper: str) -> str: | |
| """ | |
| Convert a camelCase string to snake_case, taking the lowerUpper characters | |
| passed as a single string argument (unpack into lowercase and uppercase | |
| variables) | |
| """ | |
| # unpack 2-character string into leading lowercase and trailing | |
| # uppercase chars | |
| lowercase, uppercase = lower_upper | |
| return f"{lowercase}_{uppercase.lower()}" | |
| # Using the re_sub decorator to convert camelCase to snake_case, | |
| # taking a regex with capture groups. | |
| @re_sub(r"([a-z])([A-Z])") | |
| def convert_camel_to_snake2(lowercase: str, uppercase: str) -> str: | |
| """ | |
| Convert a camelCase string to snake_case, taking the lowerUpper characters | |
| from the match groups as the lowercase and uppercase positional arguments. | |
| """ | |
| return f"{lowercase}_{uppercase.lower()}" | |
| # Using the re_sub decorator to convert camelCase to snake_case, | |
| # taking a regex with named capture groups. | |
| @re_sub(r"(?P<lc_char>[a-z])(?P<uc_char>[A-Z])") | |
| def convert_camel_to_snake3(lc_char: str, uc_char: str) -> str: | |
| """ | |
| Convert a camelCase string to snake_case, taking the lowerUpper characters | |
| from the named match groups as the named arguments. | |
| """ | |
| return f"{lc_char}_{uc_char.lower()}" | |
| name = "camelCaseVar" | |
| print(name) | |
| print(f"{convert_camel_to_snake1(name)=}") | |
| print(f"{convert_camel_to_snake2(name)=}") | |
| print(f"{convert_camel_to_snake3(name)=}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment