Created
March 22, 2024 10:45
-
-
Save promto-c/e67409bf6ec7b78654292d26afadea88 to your computer and use it in GitHub Desktop.
A Python function to sanitize input strings by replacing non-alphanumeric characters with underscores, stripping leading/trailing underscores or spaces, and prepending an underscore if the string starts with a digit.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def sanitize_string(input_string, prepend_char='_'): | |
""" | |
Replaces any character not in [a-zA-Z0-9_] with an underscore, strips leading and trailing underscores or spaces, | |
and prepends a character if the first character is a digit. | |
Args: | |
input_string (str): The string to be sanitized. | |
prepend_char (str): The character to prepend if the first character is a digit. Defaults to '_'. | |
Returns: | |
str: The sanitized, stripped, and possibly prepended string. | |
Examples: | |
>>> sanitize_string(" 1Example!String with various+chars&to#sanitize. ") | |
'_1Example_String_with_various_chars_to_sanitize' | |
>>> sanitize_string("Example with spaces and_underscores.") | |
'Example_with_spaces_and_underscores' | |
>>> sanitize_string("123numbers start") | |
'_123numbers_start' | |
>>> sanitize_string("no$pecial*Characters!") | |
'no_pecial_Characters' | |
>>> sanitize_string("__Already__Clean__") | |
'Already__Clean' | |
""" | |
# Replace non-standard characters and strip | |
sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', input_string).strip('_ ') | |
# Prepend if the first character is a digit | |
if sanitized and sanitized[0].isdigit(): | |
sanitized = prepend_char + sanitized | |
return sanitized | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment