Welcome! Please see the About page for a little more info on how this works.

+4 votes
in Transducers by
edited by

Looking at clojure.walk/walk's implementation it looks like there's a good opportunity to improve its performance for vectors and maps by using a transducer for the coll? case:

   (coll? form) (outer (into (empty form) (map inner form))) ; old
   (coll? form) (outer (into (empty form) (map inner) form)) ; new

Another opportunity is replacing the cond dispatch with a protocol.

Also see this Jira: faster, more flexible dispatch for clojure.walk

See full implementation and benchmarks below:

Walk with transducer implementation

(defn walk*
  [inner outer form]
  (cond
    (list? form) (outer (apply list (map inner form)))
    (instance? clojure.lang.IMapEntry form)
    (outer (clojure.lang.MapEntry/create (inner (key form)) (inner (val form))))
    (seq? form) (outer (doall (map inner form)))
    (instance? clojure.lang.IRecord form)
    (outer (reduce (fn [r x] (conj r (inner x))) form form))
    (coll? form) (outer (into (empty form) (map inner) form))
    :else (outer form)))

(defn postwalk*
  [f form]
  (walk* (fn [form'] (postwalk* f form')) f form ))

Protocol implementation (with transducer):

(defprotocol IWalk
  (-walk [form inner outer]))

(extend-protocol IWalk

  clojure.lang.PersistentList
  (-walk [form inner outer]
    (outer (apply list (map inner form))))

  clojure.lang.PersistentQueue
  (-walk [form inner outer]
    (outer (apply list (map inner form))))

  clojure.lang.MapEntry
  (-walk [form inner outer]
    (outer (clojure.lang.MapEntry/create (inner (key form)) (inner (val form)))))

  clojure.lang.LazySeq
  (-walk [form inner outer]
    (outer (doall (map inner form))))

  clojure.lang.PersistentVector
  (-walk [form inner outer]
    (outer (into (empty form) (map inner) form)))

  clojure.lang.PersistentArrayMap
  (-walk [form inner outer]
    (outer (into (empty form) (map inner) form)))

  clojure.lang.PersistentHashMap
  (-walk [form inner outer]
    (outer (into (empty form) (map inner) form)))

  clojure.lang.PersistentHashSet
  (-walk [form inner outer]
    (outer (into (empty form) (map inner) form)))

  Object
  (-walk [form inner outer]
    (if (instance? clojure.lang.IRecord form)
      (outer (reduce (fn [r x] (conj r (inner x))) form form))
      (outer form)))

  nil
  (-walk [form inner outer]
    (outer form)))

(defn postwalk
  [f form]
  (-walk form (fn [form'] (postwalk f form')) f))

Benchmark results (with criterium)

(require '[criterium.core :as cc])

(def form
  '(1 2 [3 4 5] {:a 6 7 8} [9 [10]] #{:b 7}))


(do
  (cc/bench (postwalk identity form))
  (cc/bench (postwalk* identity form))
  (cc/bench (walk/postwalk identity form)))

;;; Evaluation count : 8413020 in 60 samples of 140217 calls.
;;;              Execution time mean : 7.287719 µs
;;;     Execution time std-deviation : 128.290658 ns
;;;    Execution time lower quantile : 7.119399 µs ( 2.5%)
;;;    Execution time upper quantile : 7.509465 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 1 outliers in 60 samples (1.6667 %)
;;; 	low-severe	 1 (1.6667 %)
;;;  Variance from outliers : 6.2932 % Variance is slightly inflated by outliers
;;; Evaluation count : 7252680 in 60 samples of 120878 calls.
;;;              Execution time mean : 8.393008 µs
;;;     Execution time std-deviation : 140.292941 ns
;;;    Execution time lower quantile : 8.222419 µs ( 2.5%)
;;;    Execution time upper quantile : 8.724502 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 3 outliers in 60 samples (5.0000 %)
;;; 	low-severe	 2 (3.3333 %)
;;; 	low-mild	 1 (1.6667 %)
;;;  Variance from outliers : 6.2524 % Variance is slightly inflated by outliers
;;; Evaluation count : 5888880 in 60 samples of 98148 calls.
;;;              Execution time mean : 10.259563 µs
;;;     Execution time std-deviation : 344.368716 ns
;;;    Execution time lower quantile : 10.017438 µs ( 2.5%)
;;;    Execution time upper quantile : 10.594850 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 2 outliers in 60 samples (3.3333 %)
;;; 	low-severe	 1 (1.6667 %)
;;; 	low-mild	 1 (1.6667 %)
;;;  Variance from outliers : 20.5816 % Variance is moderately inflated by outliers

(def form
  '(defn walk*
     [inner outer form]
     (cond
       (list? form) (outer (apply list (map inner form)))
       (instance? clojure.lang.IMapEntry form)
       (outer (clojure.lang.MapEntry/create (inner (key form)) (inner (val form))))
       (seq? form) (outer (doall (map inner form)))
       (instance? clojure.lang.IRecord form)
       (outer (reduce (fn [r x] (conj r (inner x))) form form))
       (coll? form) (outer (into (empty form) (map inner) form))
       :else (outer form))))


(do
  (cc/bench (postwalk identity form))
  (cc/bench (postwalk* identity form))
  (cc/bench (walk/postwalk identity form)))

;;; Evaluation count : 1812840 in 60 samples of 30214 calls.
;;;              Execution time mean : 33.196956 µs
;;;     Execution time std-deviation : 961.919396 ns
;;;    Execution time lower quantile : 32.063979 µs ( 2.5%)
;;;    Execution time upper quantile : 34.546564 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 6 outliers in 60 samples (10.0000 %)
;;; 	low-severe	 2 (3.3333 %)
;;; 	low-mild	 1 (1.6667 %)
;;; 	high-mild	 3 (5.0000 %)
;;;  Variance from outliers : 15.8051 % Variance is moderately inflated by outliers
;;; Evaluation count : 1653840 in 60 samples of 27564 calls.
;;;              Execution time mean : 36.626230 µs
;;;     Execution time std-deviation : 441.227719 ns
;;;    Execution time lower quantile : 35.798588 µs ( 2.5%)
;;;    Execution time upper quantile : 37.373995 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;; Evaluation count : 1728600 in 60 samples of 28810 calls.
;;;              Execution time mean : 35.173883 µs
;;;     Execution time std-deviation : 400.776590 ns
;;;    Execution time lower quantile : 34.697017 µs ( 2.5%)
;;;    Execution time upper quantile : 35.825413 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 1 outliers in 60 samples (1.6667 %)
;;; 	low-severe	 1 (1.6667 %)
;;;  Variance from outliers : 1.6389 % Variance is slightly inflated by outliers

(def form
  {1 {2 {3 {4 {5 {6 {7 {8 {9 {10 11}}}}}}}}}})


(do
  (cc/bench (postwalk identity form))
  (cc/bench (postwalk* identity form))
  (cc/bench (walk/postwalk identity form)))

;;; Evaluation count : 6947100 in 60 samples of 115785 calls.
;;;              Execution time mean : 8.809319 µs
;;;     Execution time std-deviation : 163.576702 ns
;;;    Execution time lower quantile : 8.627843 µs ( 2.5%)
;;;    Execution time upper quantile : 9.126265 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 2 outliers in 60 samples (3.3333 %)
;;; 	low-severe	 2 (3.3333 %)
;;;  Variance from outliers : 7.8088 % Variance is slightly inflated by outliers
;;; Evaluation count : 6457380 in 60 samples of 107623 calls.
;;;              Execution time mean : 9.628546 µs
;;;     Execution time std-deviation : 171.963701 ns
;;;    Execution time lower quantile : 9.316393 µs ( 2.5%)
;;;    Execution time upper quantile : 9.976758 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 5 outliers in 60 samples (8.3333 %)
;;; 	low-severe	 2 (3.3333 %)
;;; 	low-mild	 2 (3.3333 %)
;;; 	high-mild	 1 (1.6667 %)
;;;  Variance from outliers : 7.7664 % Variance is slightly inflated by outliers
;;; Evaluation count : 5483100 in 60 samples of 91385 calls.
;;;              Execution time mean : 11.064318 µs
;;;     Execution time std-deviation : 167.430489 ns
;;;    Execution time lower quantile : 10.854539 µs ( 2.5%)
;;;    Execution time upper quantile : 11.447064 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 2 outliers in 60 samples (3.3333 %)
;;; 	low-severe	 1 (1.6667 %)
;;; 	low-mild	 1 (1.6667 %)
;;;  Variance from outliers : 1.6389 % Variance is slightly inflated by outliers

(def form
  [1 [2 [3 [4 [5 [6 [7 [8 [9 [10 11]]]]]]]]]])

(do
  (cc/bench (postwalk identity form))
  (cc/bench (postwalk* identity form))
  (cc/bench (walk/postwalk identity form)))

;;; Evaluation count : 7627620 in 60 samples of 127127 calls.
;;;              Execution time mean : 7.770194 µs
;;;     Execution time std-deviation : 81.222440 ns
;;;    Execution time lower quantile : 7.610275 µs ( 2.5%)
;;;    Execution time upper quantile : 7.913045 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;; Evaluation count : 6941880 in 60 samples of 115698 calls.
;;;              Execution time mean : 8.726047 µs
;;;     Execution time std-deviation : 133.165422 ns
;;;    Execution time lower quantile : 8.557593 µs ( 2.5%)
;;;    Execution time upper quantile : 8.961663 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 1 outliers in 60 samples (1.6667 %)
;;; 	low-severe	 1 (1.6667 %)
;;;  Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
;;; Evaluation count : 5045520 in 60 samples of 84092 calls.
;;;              Execution time mean : 12.051122 µs
;;;     Execution time std-deviation : 223.757365 ns
;;;    Execution time lower quantile : 11.799274 µs ( 2.5%)
;;;    Execution time upper quantile : 12.768594 µs (97.5%)
;;;                    Overhead used : 9.033571 ns
;;;
;;; Found 3 outliers in 60 samples (5.0000 %)
;;; 	low-severe	 1 (1.6667 %)
;;; 	low-mild	 2 (3.3333 %)
;;;  Variance from outliers : 7.8088 
...