Last active
August 29, 2015 14:10
-
-
Save jarohen/9f124eae848e026dff87 to your computer and use it in GitHub Desktop.
November 2014 Thoughtworks dojo - Transducers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; We can write map and filter in terms of reduce | |
(defn my-map [f coll] | |
(reduce (fn [acc el] | |
(conj acc (f el))) | |
[] | |
coll)) | |
(defn my-filter [pred coll] | |
(reduce (fn [acc el] | |
(if (pred el) | |
(conj acc el) | |
acc)) | |
[] | |
coll)) | |
;; In the reducing function, 'conj' is the only thing related to | |
;; sequences - everything else is the 'essence' of map/filter. | |
;; Let's pass 'conj' in: | |
(defn mapping [f] | |
(fn [rf] | |
(fn [acc el] | |
(rf acc (f el))))) | |
(defn filtering [pred] | |
(fn [rf] | |
(fn [acc el] | |
(if (pred el) | |
(rf acc el) | |
acc)))) | |
;; These are 'transducers' - functions that wrap a reducing function | |
;; (like Ring middleware wraps a Ring handler, if you like). They take | |
;; a reducing function (:: a -> b -> a, if you're Haskell-y inclined) | |
;; and return another reducing function, with the map/filter | |
;; transformation applied beforehand. | |
;; Some transformations are stateful - they need to (in | |
;; partition-all's case) keep track of how many elements we've seen so | |
;; far in this partition. | |
;; We do this by closing the reducing-function over a state atom. | |
;; For partition-all, we also need to know when the input sequence is | |
;; exhausted, so that we can append the final partition - this is done | |
;; with a 1-arg overload of the reducing functions. | |
;; All transducers should implement this, even if only to forward | |
;; through to the reducing function that they're wrapping. | |
;; In partition-all's case, we first reduce any remaining elements | |
;; through the nested rf's 2-arg function, then call through to its | |
;; 1-arg function so that the nested rf can clean up as well. | |
;; Excuse the (ab)use of Clojure's STM here - Clojure core does this | |
;; with new 'volatiles' (and a LinkedList, IIRC), and is far less | |
;; buggy... | |
(defn partition-all [n] | |
(fn [rf] | |
(let [!coll (atom [])] | |
(fn | |
([] | |
(rf)) | |
([acc] | |
(let [coll @!coll | |
res (if (empty? coll) | |
acc | |
(rf acc coll))] | |
(rf res))) | |
([acc el] | |
(swap! !coll conj el) | |
(if (= n (count @!coll)) | |
(let [res (rf acc @!coll)] | |
(reset! !coll []) | |
res) | |
acc)))))) | |
;; Jamie then invented a variation of map - that applies its function | |
;; to every element of singularly nested sequences: | |
;; mmap :: (a -> b) -> [[a]] -> [[b]] | |
;; [[1 2] [3 4]] ->> (mmap inc) ->> [[2 3] [4 5]] | |
;; In traditional Clojure: | |
(defn mmap [f ccoll] | |
(map #(map f %) ccoll)) | |
(mmap inc [[1 2] [3 4]]) | |
;; As a transducer: | |
(defn mmap [f] | |
(fn [rf] | |
(fn [acc el] | |
(rf acc (map f el))))) | |
;; Mathieu then talked about the state behind the partition-all | |
;; transducer - wouldn't it be great if it could be stateless? | |
;; It could (maybe), if you're prepared to go a state-monad-like route | |
;; - returning a pair of the accumulator and the updated reducing | |
;; function: | |
;; (untested, but looks ok!) | |
(defn partition-all [n] | |
(fn [rf] | |
(letfn [(partition-all* [rf partial-coll] | |
(fn | |
([] (rf)) | |
([acc] | |
(let [[res new-rf] (if (empty? partial-coll) | |
[acc rf] | |
(rf acc partial-coll))] | |
(new-rf res))) | |
([acc el] | |
(let [new-coll (conj partial-coll el)] | |
(if (= (count new-coll) n) | |
(let [[new-acc new-rf] (rf acc new-coll)] | |
[new-acc #(partition-all* new-rf [])]) | |
[acc #(partition-all* rf new-coll)])))))] | |
(partition-all* rf [])))) | |
;; Again, for those who like Haskell type signatures, the 2-arg version is typed recursively: | |
;; StatelessTransducer :: (a -> b -> (a, StatelessTransducer)) -> (a -> b -> (a, StatelessTransducer)) | |
;; You'd also have to update 'into', 'transduce', 'sequence' etc to | |
;; handle the pairs - this is left as an exercise to the reader... | |
;; Cheers! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment