jjmalina · October 24, 2015 22:55
diff --git a/transducers.py b/transducers.py
 # -*- coding: utf-8 -*-
 """
    transducers
    ~~~~~~~~~~~

    Lazy evaluation of things over a stream. In this case we do a word count
 """

 import types
 import operator
 from collections import defaultdict


 def identity(function=lambda x: x):
    def apply_(iterable):
        for k, v in iterable:
            yield k, v
    return apply_


 def map_(fn):
    def apply_(iterable):
        for key, value in iterable:
            result = fn(key, value)
            if isinstance(result, types.GeneratorType):
                for k, v in result:
                    yield k, v
            else:
                yield result[0], result[1]
    return apply_


 def filter_(fn):
    def apply_(iterable):
        for key, value in iterable:
            if fn(key, value):
                yield key, value
    return apply_


 class Index(object):
    def __init__(self):
        self.keys = defaultdict(list)


 def reduce_(fn):
    index = Index()

    def apply_(iterable):
        for key, value in iterable:
            index.keys[key].append(value)
        grouped = sorted(index.keys.iteritems(), key=operator.itemgetter(0))
        for key, value in grouped:
            k_, v_ = fn(key, value)
            yield k_, v_
        index.keys = defaultdict(list)
    return apply_


 def sentences():
    return [
        "This module implements a number of iterator building blocks inspired by constructs from APL, Haskell, and SML. Each has been recast in a form suitable for Python.",
        "The module standardizes a core set of fast, memory efficient tools that are useful by themselves or in combination. Together, they form an “iterator algebra” making it possible to construct specialized tools succinctly and efficiently in pure Python.",
        "For instance, SML provides a tabulation tool: tabulate(f) which produces a sequence f(0), f(1), .... The same effect can be achieved in Python by combining imap() and count() to form imap(f, count()).",
        "These tools and their built-in counterparts also work well with the high-speed functions in the operator module. For example, the multiplication operator can be mapped across two vectors to form an efficient dot-product: sum(imap(operator.mul, vector1, vector2)).",
    ]


 def main():
    pipeline = [
        identity(),
        map_(lambda key, sentence: ((word, word) for word in sentence.split(' '))),
        filter_(lambda word, value: word.lower()[0] == 't'),
        # we could just set the key to zero and be done in the next reduce step but 
        # if you want a word frequency then just comment out the last two steps
        map_(lambda key, value: (key, 1)), 
        reduce_(lambda word, values: (word, sum(values))),
        map_(lambda word, count: (0, count)),
        reduce_(lambda key, values: (key, sum(values)))
    ]
    iterator = enumerate(sentences())
    for transformation in pipeline:
        iterator = transformation(iterator)
    results = list(iterator)
    assert results == [(0, 22)]
    print(results)


 if __name__ == '__main__':
    main()
	# -- coding: utf-8 --
	"""
	transducers
	~~~~~~~~~~~

	Lazy evaluation of things over a stream. In this case we do a word count
	"""

	import types
	import operator
	from collections import defaultdict


	def identity(function=lambda x: x):
	def apply_(iterable):
	for k, v in iterable:
	yield k, v
	return apply_


	def map_(fn):
	def apply_(iterable):
	for key, value in iterable:
	result = fn(key, value)
	if isinstance(result, types.GeneratorType):
	for k, v in result:
	yield k, v
	else:
	yield result[0], result[1]
	return apply_


	def filter_(fn):
	def apply_(iterable):
	for key, value in iterable:
	if fn(key, value):
	yield key, value
	return apply_


	class Index(object):
	def __init__(self):
	self.keys = defaultdict(list)


	def reduce_(fn):
	index = Index()

	def apply_(iterable):
	for key, value in iterable:
	index.keys[key].append(value)
	grouped = sorted(index.keys.iteritems(), key=operator.itemgetter(0))
	for key, value in grouped:
	k_, v_ = fn(key, value)
	yield k_, v_
	index.keys = defaultdict(list)
	return apply_


	def sentences():
	return [
	"This module implements a number of iterator building blocks inspired by constructs from APL, Haskell, and SML. Each has been recast in a form suitable for Python.",
	"The module standardizes a core set of fast, memory efficient tools that are useful by themselves or in combination. Together, they form an “iterator algebra” making it possible to construct specialized tools succinctly and efficiently in pure Python.",
	"For instance, SML provides a tabulation tool: tabulate(f) which produces a sequence f(0), f(1), .... The same effect can be achieved in Python by combining imap() and count() to form imap(f, count()).",
	"These tools and their built-in counterparts also work well with the high-speed functions in the operator module. For example, the multiplication operator can be mapped across two vectors to form an efficient dot-product: sum(imap(operator.mul, vector1, vector2)).",
	]


	def main():
	pipeline = [
	identity(),
	map_(lambda key, sentence: ((word, word) for word in sentence.split(' '))),
	filter_(lambda word, value: word.lower()[0] == 't'),
	# we could just set the key to zero and be done in the next reduce step but
	# if you want a word frequency then just comment out the last two steps
	map_(lambda key, value: (key, 1)),
	reduce_(lambda word, values: (word, sum(values))),
	map_(lambda word, count: (0, count)),
	reduce_(lambda key, values: (key, sum(values)))
	]
	iterator = enumerate(sentences())
	for transformation in pipeline:
	iterator = transformation(iterator)
	results = list(iterator)
	assert results == [(0, 22)]
	print(results)


	if __name__ == '__main__':
	main()