Last active
October 24, 2015 22:55
-
-
Save jjmalina/3c122c3a21a4f8e23f67 to your computer and use it in GitHub Desktop.
Transducers in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
transducers | |
~~~~~~~~~~~ | |
Lazy evaluation of things over a stream. In this case we do a word count | |
""" | |
import types | |
import operator | |
from collections import defaultdict | |
def identity(function=lambda x: x): | |
def apply_(iterable): | |
for k, v in iterable: | |
yield k, v | |
return apply_ | |
def map_(fn): | |
def apply_(iterable): | |
for key, value in iterable: | |
result = fn(key, value) | |
if isinstance(result, types.GeneratorType): | |
for k, v in result: | |
yield k, v | |
else: | |
yield result[0], result[1] | |
return apply_ | |
def filter_(fn): | |
def apply_(iterable): | |
for key, value in iterable: | |
if fn(key, value): | |
yield key, value | |
return apply_ | |
class Index(object): | |
def __init__(self): | |
self.keys = defaultdict(list) | |
def reduce_(fn): | |
index = Index() | |
def apply_(iterable): | |
for key, value in iterable: | |
index.keys[key].append(value) | |
grouped = sorted(index.keys.iteritems(), key=operator.itemgetter(0)) | |
for key, value in grouped: | |
k_, v_ = fn(key, value) | |
yield k_, v_ | |
index.keys = defaultdict(list) | |
return apply_ | |
def sentences(): | |
return [ | |
"This module implements a number of iterator building blocks inspired by constructs from APL, Haskell, and SML. Each has been recast in a form suitable for Python.", | |
"The module standardizes a core set of fast, memory efficient tools that are useful by themselves or in combination. Together, they form an “iterator algebra” making it possible to construct specialized tools succinctly and efficiently in pure Python.", | |
"For instance, SML provides a tabulation tool: tabulate(f) which produces a sequence f(0), f(1), .... The same effect can be achieved in Python by combining imap() and count() to form imap(f, count()).", | |
"These tools and their built-in counterparts also work well with the high-speed functions in the operator module. For example, the multiplication operator can be mapped across two vectors to form an efficient dot-product: sum(imap(operator.mul, vector1, vector2)).", | |
] | |
def main(): | |
pipeline = [ | |
identity(), | |
map_(lambda key, sentence: ((word, word) for word in sentence.split(' '))), | |
filter_(lambda word, value: word.lower()[0] == 't'), | |
# we could just set the key to zero and be done in the next reduce step but | |
# if you want a word frequency then just comment out the last two steps | |
map_(lambda key, value: (key, 1)), | |
reduce_(lambda word, values: (word, sum(values))), | |
map_(lambda word, count: (0, count)), | |
reduce_(lambda key, values: (key, sum(values))) | |
] | |
iterator = enumerate(sentences()) | |
for transformation in pipeline: | |
iterator = transformation(iterator) | |
results = list(iterator) | |
assert results == [(0, 22)] | |
print(results) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment