Skip to content

Instantly share code, notes, and snippets.

@dp-quant
Last active January 27, 2021 16:46
Show Gist options
  • Save dp-quant/9d4898cecb9448a209b440d35abdf4fa to your computer and use it in GitHub Desktop.
Save dp-quant/9d4898cecb9448a209b440d35abdf4fa to your computer and use it in GitHub Desktop.
import sys
import re
from collections import defaultdict
try:
testing_string = sys.argv[1]
except IndexError:
exit(1)
print('Initial String: ', testing_string)
# tokenize
tokens = testing_string.split(' ')
print('Received tokens: ', tokens)
seq_counter = defaultdict(int)
unique_tokens = set()
for token in tokens:
# cleanup
seq = token = re.sub('\W+','', token)
# skip short or dups
if len(set(seq)) <= 1:
continue
# make sequence
seq = list(seq)
# make it set to sync
seq.sort()
if token not in unique_tokens:
unique_tokens.add(token) # keep the unique list of words
seq_counter[tuple(seq)] += 1
print('Result:', sum([i - 1 for i in seq_counter.values()]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment