Last active
November 7, 2019 15:24
-
-
Save internaut/3ab8823856ff5d1f47bd24afe5eaac45 to your computer and use it in GitHub Desktop.
Split a string by multiple characters/strings. Test the function with pytest and hypothesis.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def str_multisplit(s, sep): | |
""" | |
Split string `s` by all characters/strings in `sep`. | |
:param s: a string to split | |
:param sep: sequence or set of characters to use for splitting | |
:return: list of split string parts | |
""" | |
if not isinstance(s, (str, bytes)): | |
raise ValueError('`s` must be of type `str` or `bytes`') | |
if not isinstance(sep, (list, tuple, set)): | |
raise ValueError('`sep` must be of type `list`, `tuple` or `set`') | |
if '' in sep: | |
raise ValueError('`sep` must not contain an empty string') | |
parts = [s] | |
for c in sep: | |
parts_ = [] | |
for p in parts: | |
parts_.extend(p.split(c)) | |
parts = parts_ | |
return parts |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
import pytest | |
from hypothesis import given | |
from hypothesis import strategies as st | |
from multisplit import str_multisplit | |
punct = list(string.punctuation) | |
@pytest.mark.parametrize('s, sep, res', [ | |
('Te;s,t', {';', ','}, ['Te', 's', 't']), | |
('US-Student', punct, ['US', 'Student']), | |
('-main_file.exe,', punct, ['', 'main', 'file', 'exe', '']), | |
]) | |
def test_str_multisplit(s, sep, res): | |
assert str_multisplit(s, sep) == res | |
#@given(s=st.text(), sep=st.lists(st.text(min_size=1, max_size=10))) # <- try this and see how it fails | |
@given(s=st.text(), sep=st.lists(st.characters())) | |
def test_str_multisplit_hypothesis(s, sep): | |
res = str_multisplit(s, sep) | |
# 1. always return a list | |
assert type(res) is list | |
# 2. if argument s is an empty string, result must be [''] | |
if len(s) == 0: | |
assert res == [''] | |
# 3. if sep is an empty sequence, result must be a list containing only the input s, i.e. [s] | |
if len(sep) == 0: | |
assert res == [s] | |
# 4. each substring must ... | |
for p in res: | |
assert p in s # ... be a substring of s, too | |
assert all(c not in p for c in sep) # ... not contain any of separator strings sep | |
# 5. number of substrings in the result equals sum of the occurrences of each *unique* sep-item c in s plus 1 | |
n_asserted_parts = 0 | |
for c in set(sep): | |
n_asserted_parts += s.count(c) | |
assert len(res) == n_asserted_parts + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment