Last active
October 29, 2017 20:43
-
-
Save pstoll/ae73582763540051d321a4eb15304226 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python ./test-seqs.py | |
timing 5 iterations with 1000 items: | |
first 5 ids: ['5aalphabetR84', '2aalphabet3aY', 'BmalphabetACg', 'JMalphabetunQ', 'Gqalphabet7le'] | |
func dedup_orig: 0.00107002258301 | |
func dedup_a1: 0.000247001647949 | |
func dedup_a2: 0.000427961349487 | |
timing 5 iterations with 10000 items: | |
first 5 ids: ['UPalphabetGuF', 'gqalphabet1rI', 'DGalphabetxbx', 'cHalphabetOYB', 'JYalphabetzW2'] | |
func dedup_orig: 0.0133740901947 | |
func dedup_a1: 0.00292015075684 | |
func dedup_a2: 0.00455904006958 | |
timing 5 iterations with 100000 items: | |
first 5 ids: ['NmalphabetLQj', 'uoalphabetd8l', 'ncalphabetkTm', 'tWalphabet77K', 'v6alphabetUBX'] | |
func dedup_orig: 0.16601395607 | |
func dedup_a1: 0.0943830013275 | |
func dedup_a2: 0.103300094604 | |
timing 5 iterations with 1000000 items: | |
first 5 ids: ['RCalphabetApz', 'ymalphabetxqn', '8OalphabetXmI', 'dlalphabetHma', 'YbalphabetCE8'] | |
func dedup_orig: 2.21989107132 | |
func dedup_a1: 1.22671294212 | |
func dedup_a2: 1.61492204666 | |
timing 5 iterations with 10000000 items: | |
first 5 ids: ['YPalphabetHQQ', 'LfalphabetUWA', 'S0alphabet9kW', 'HSalphabet8fM', 'lqalphabetzTV'] | |
func dedup_orig: 22.2884390354 | |
func dedup_a1: 13.5447628498 | |
func dedup_a2: 17.1326370239 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Test the sequence dedeup function in the article at | |
https://blog.algorithmia.com/deep-dive-into-object-detection-with-open-images-using-tensorflow/ | |
vs some other basic implementations | |
This code is placed in the public domain. | |
Author: Perry A Stoll | |
""" | |
import string | |
import random | |
import timeit | |
def id_generator(size=6, chars=string.ascii_letters + string.digits): | |
return ''.join(random.choice(chars) for _ in range(size)) | |
def make_ids(n,s='alphabet'): | |
# add some entropy at the start and end of string | |
# avoid possible odd effects of lots of similar prefix strings | |
ids = [ id_generator(2)+ s + id_generator(3) for x in range(n) ] | |
return ids | |
def wrapper(func, *args, **kwargs): | |
def wrapped(): | |
#print("input len={}".format(len(args[0]))) | |
out = func(*args, **kwargs) | |
#print("output len={}".format(len(out))) | |
return wrapped | |
def dedup_orig(seq): | |
"""Original convoluted list comprehension with explicit set membership check and addition""" | |
seen = set() | |
seen_add = seen.add | |
return [x for x in seq if not (x in seen or seen_add(x))] | |
def dedup_a1(seq): | |
"""Assuming you just need a new sequence, leave it as a set.""" | |
return set(seq) | |
def dedup_a2(seq): | |
"""If you really need a list, add the extra step to conver the set to a list""" | |
return list(set(seq)) | |
funcs = [dedup_orig, dedup_a1, dedup_a2] | |
def fname(f): | |
try: | |
return f.func_name | |
except: | |
return f.__name__ | |
else: | |
return '' | |
def test(): | |
for item_count in [10**3,10**4,10**5,10**6, 10**7]: | |
ids = make_ids(item_count) | |
iters = 5 | |
print("timing {} iterations with {} items:".format(iters, item_count)) | |
print("first 5 ids: {}".format(ids[:5])) | |
for func in funcs: | |
wrapped = wrapper(func, ids) | |
timing = timeit.timeit( wrapped, number=iters) | |
print("\tfunc {}: {}".format(fname(func),timing)) | |
if __name__ == '__main__': | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment