matthewdowney · September 6, 2018 19:55 · nivekuil · Aug 7, 2021
diff --git a/sorting-transducer.clj b/sorting-transducer.clj
 (defn xf-sort
  "A sorting transducer. Mostly a syntactic improvement to allow composition of
  sorting with the standard transducers, but also provides a slight performance
  increase over transducing, sorting, and then continuing to transduce."
  ([]
   (xf-sort compare))
  ([cmp]
   (fn [rf]
     (let [temp-list (java.util.ArrayList.)]
       (fn
         ([]
          (rf))
         ([xs]
          (reduce rf xs (sort cmp (vec (.toArray temp-list)))))
         ([xs x]
          (.add temp-list x)
          xs))))))

 (comment
  "For example, take some random numbers, multiple each by ten, 
  subtract 5, sort them by magnitude, and keep the even ones. "
  (sequence
    (comp 
      (map (partial * 10)) 
      (map #(- % 5))
      (xf-sort) 
      (filter (comp even? int))
    (repeatedly 10000 rand))))


 ;; Benchmark it vs a non-transducing approach
 (let [nums (doall (repeatedly 100000 rand))
      xform (comp (map (partial * 10)) (xf-sort) (filter (comp even? int)))]
  ;; Time using transducer
  (->>
    (with-out-str
      (criterium/quick-bench
        (count (sequence xform nums))))
    (println "Transducer benchmarks:\n"))

  ;; Time using standard sort
  (->>
    (with-out-str
      (criterium/quick-bench
        (count
          (->> nums
               (map (partial * 10))
               sort
               (filter (comp even? int))))))
    (println "Sequential benchmarks:\n")))

 ;; => #'core/xf-sort
 ;; Transducer benchmarks:
 ;;  Evaluation count : 12 in 6 samples of 2 calls.
 ;;              Execution time mean : 67.704236 ms
 ;;     Execution time std-deviation : 1.142721 ms
 ;;    Execution time lower quantile : 66.509228 ms ( 2.5%)
 ;;    Execution time upper quantile : 69.370735 ms (97.5%)
 ;;                    Overhead used : 1.999151 ns
 ;; Found 1 outliers in 6 samples (16.6667 %)
 ;;     low-severe   1 (16.6667 %)
 ;; Variance from outliers : 13.8889 % Variance is moderately inflated by outliers

 ;; Sequential benchmarks:
 ;; Evaluation count : 12 in 6 samples of 2 calls.
 ;;             Execution time mean : 124.729223 ms
 ;;    Execution time std-deviation : 27.696905 ms
 ;;   Execution time lower quantile : 101.435307 ms ( 2.5%)
 ;;   Execution time upper quantile : 157.780473 ms (97.5%)
 ;;                   Overhead used : 1.999151 ns
 ;;
 ;; => nil
	(defn xf-sort
	"A sorting transducer. Mostly a syntactic improvement to allow composition of
	sorting with the standard transducers, but also provides a slight performance
	increase over transducing, sorting, and then continuing to transduce."
	([]
	(xf-sort compare))
	([cmp]
	(fn [rf]
	(let [temp-list (java.util.ArrayList.)]
	(fn
	([]
	(rf))
	([xs]
	(reduce rf xs (sort cmp (vec (.toArray temp-list)))))
	([xs x]
	(.add temp-list x)
	xs))))))

	(comment
	"For example, take some random numbers, multiple each by ten,
	subtract 5, sort them by magnitude, and keep the even ones. "
	(sequence
	(comp
	(map (partial * 10))
	(map #(- % 5))
	(xf-sort)
	(filter (comp even? int))
	(repeatedly 10000 rand))))


	;; Benchmark it vs a non-transducing approach
	(let [nums (doall (repeatedly 100000 rand))
	xform (comp (map (partial * 10)) (xf-sort) (filter (comp even? int)))]
	;; Time using transducer
	(->>
	(with-out-str
	(criterium/quick-bench
	(count (sequence xform nums))))
	(println "Transducer benchmarks:\n"))

	;; Time using standard sort
	(->>
	(with-out-str
	(criterium/quick-bench
	(count
	(->> nums
	(map (partial * 10))
	sort
	(filter (comp even? int))))))
	(println "Sequential benchmarks:\n")))

	;; => #'core/xf-sort
	;; Transducer benchmarks:
	;; Evaluation count : 12 in 6 samples of 2 calls.
	;; Execution time mean : 67.704236 ms
	;; Execution time std-deviation : 1.142721 ms
	;; Execution time lower quantile : 66.509228 ms ( 2.5%)
	;; Execution time upper quantile : 69.370735 ms (97.5%)
	;; Overhead used : 1.999151 ns
	;; Found 1 outliers in 6 samples (16.6667 %)
	;; low-severe 1 (16.6667 %)
	;; Variance from outliers : 13.8889 % Variance is moderately inflated by outliers

	;; Sequential benchmarks:
	;; Evaluation count : 12 in 6 samples of 2 calls.
	;; Execution time mean : 124.729223 ms
	;; Execution time std-deviation : 27.696905 ms
	;; Execution time lower quantile : 101.435307 ms ( 2.5%)
	;; Execution time upper quantile : 157.780473 ms (97.5%)
	;; Overhead used : 1.999151 ns
	;;
	;; => nil