Looking at clojure.walk/walk
's implementation it looks like there's a good opportunity to improve its performance for vectors and maps by using a transducer for the coll?
case:
(coll? form) (outer (into (empty form) (map inner form))) ; old
(coll? form) (outer (into (empty form) (map inner) form)) ; new
Another opportunity is replacing the cond
dispatch with a protocol.
Also see this Jira: faster, more flexible dispatch for clojure.walk
See full implementation and benchmarks below:
Walk with transducer implementation
(defn walk*
[inner outer form]
(cond
(list? form) (outer (apply list (map inner form)))
(instance? clojure.lang.IMapEntry form)
(outer (clojure.lang.MapEntry/create (inner (key form)) (inner (val form))))
(seq? form) (outer (doall (map inner form)))
(instance? clojure.lang.IRecord form)
(outer (reduce (fn [r x] (conj r (inner x))) form form))
(coll? form) (outer (into (empty form) (map inner) form))
:else (outer form)))
(defn postwalk*
[f form]
(walk* (fn [form'] (postwalk* f form')) f form ))
Protocol implementation (with transducer):
(defprotocol IWalk
(-walk [form inner outer]))
(extend-protocol IWalk
clojure.lang.PersistentList
(-walk [form inner outer]
(outer (apply list (map inner form))))
clojure.lang.PersistentQueue
(-walk [form inner outer]
(outer (apply list (map inner form))))
clojure.lang.MapEntry
(-walk [form inner outer]
(outer (clojure.lang.MapEntry/create (inner (key form)) (inner (val form)))))
clojure.lang.LazySeq
(-walk [form inner outer]
(outer (doall (map inner form))))
clojure.lang.PersistentVector
(-walk [form inner outer]
(outer (into (empty form) (map inner) form)))
clojure.lang.PersistentArrayMap
(-walk [form inner outer]
(outer (into (empty form) (map inner) form)))
clojure.lang.PersistentHashMap
(-walk [form inner outer]
(outer (into (empty form) (map inner) form)))
clojure.lang.PersistentHashSet
(-walk [form inner outer]
(outer (into (empty form) (map inner) form)))
Object
(-walk [form inner outer]
(if (instance? clojure.lang.IRecord form)
(outer (reduce (fn [r x] (conj r (inner x))) form form))
(outer form)))
nil
(-walk [form inner outer]
(outer form)))
(defn postwalk
[f form]
(-walk form (fn [form'] (postwalk f form')) f))
Benchmark results (with criterium)
(require '[criterium.core :as cc])
(def form
'(1 2 [3 4 5] {:a 6 7 8} [9 [10]] #{:b 7}))
(do
(cc/bench (postwalk identity form))
(cc/bench (postwalk* identity form))
(cc/bench (walk/postwalk identity form)))
;;; Evaluation count : 8413020 in 60 samples of 140217 calls.
;;; Execution time mean : 7.287719 µs
;;; Execution time std-deviation : 128.290658 ns
;;; Execution time lower quantile : 7.119399 µs ( 2.5%)
;;; Execution time upper quantile : 7.509465 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 1 outliers in 60 samples (1.6667 %)
;;; low-severe 1 (1.6667 %)
;;; Variance from outliers : 6.2932 % Variance is slightly inflated by outliers
;;; Evaluation count : 7252680 in 60 samples of 120878 calls.
;;; Execution time mean : 8.393008 µs
;;; Execution time std-deviation : 140.292941 ns
;;; Execution time lower quantile : 8.222419 µs ( 2.5%)
;;; Execution time upper quantile : 8.724502 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 3 outliers in 60 samples (5.0000 %)
;;; low-severe 2 (3.3333 %)
;;; low-mild 1 (1.6667 %)
;;; Variance from outliers : 6.2524 % Variance is slightly inflated by outliers
;;; Evaluation count : 5888880 in 60 samples of 98148 calls.
;;; Execution time mean : 10.259563 µs
;;; Execution time std-deviation : 344.368716 ns
;;; Execution time lower quantile : 10.017438 µs ( 2.5%)
;;; Execution time upper quantile : 10.594850 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 2 outliers in 60 samples (3.3333 %)
;;; low-severe 1 (1.6667 %)
;;; low-mild 1 (1.6667 %)
;;; Variance from outliers : 20.5816 % Variance is moderately inflated by outliers
(def form
'(defn walk*
[inner outer form]
(cond
(list? form) (outer (apply list (map inner form)))
(instance? clojure.lang.IMapEntry form)
(outer (clojure.lang.MapEntry/create (inner (key form)) (inner (val form))))
(seq? form) (outer (doall (map inner form)))
(instance? clojure.lang.IRecord form)
(outer (reduce (fn [r x] (conj r (inner x))) form form))
(coll? form) (outer (into (empty form) (map inner) form))
:else (outer form))))
(do
(cc/bench (postwalk identity form))
(cc/bench (postwalk* identity form))
(cc/bench (walk/postwalk identity form)))
;;; Evaluation count : 1812840 in 60 samples of 30214 calls.
;;; Execution time mean : 33.196956 µs
;;; Execution time std-deviation : 961.919396 ns
;;; Execution time lower quantile : 32.063979 µs ( 2.5%)
;;; Execution time upper quantile : 34.546564 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 6 outliers in 60 samples (10.0000 %)
;;; low-severe 2 (3.3333 %)
;;; low-mild 1 (1.6667 %)
;;; high-mild 3 (5.0000 %)
;;; Variance from outliers : 15.8051 % Variance is moderately inflated by outliers
;;; Evaluation count : 1653840 in 60 samples of 27564 calls.
;;; Execution time mean : 36.626230 µs
;;; Execution time std-deviation : 441.227719 ns
;;; Execution time lower quantile : 35.798588 µs ( 2.5%)
;;; Execution time upper quantile : 37.373995 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;; Evaluation count : 1728600 in 60 samples of 28810 calls.
;;; Execution time mean : 35.173883 µs
;;; Execution time std-deviation : 400.776590 ns
;;; Execution time lower quantile : 34.697017 µs ( 2.5%)
;;; Execution time upper quantile : 35.825413 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 1 outliers in 60 samples (1.6667 %)
;;; low-severe 1 (1.6667 %)
;;; Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
(def form
{1 {2 {3 {4 {5 {6 {7 {8 {9 {10 11}}}}}}}}}})
(do
(cc/bench (postwalk identity form))
(cc/bench (postwalk* identity form))
(cc/bench (walk/postwalk identity form)))
;;; Evaluation count : 6947100 in 60 samples of 115785 calls.
;;; Execution time mean : 8.809319 µs
;;; Execution time std-deviation : 163.576702 ns
;;; Execution time lower quantile : 8.627843 µs ( 2.5%)
;;; Execution time upper quantile : 9.126265 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 2 outliers in 60 samples (3.3333 %)
;;; low-severe 2 (3.3333 %)
;;; Variance from outliers : 7.8088 % Variance is slightly inflated by outliers
;;; Evaluation count : 6457380 in 60 samples of 107623 calls.
;;; Execution time mean : 9.628546 µs
;;; Execution time std-deviation : 171.963701 ns
;;; Execution time lower quantile : 9.316393 µs ( 2.5%)
;;; Execution time upper quantile : 9.976758 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 5 outliers in 60 samples (8.3333 %)
;;; low-severe 2 (3.3333 %)
;;; low-mild 2 (3.3333 %)
;;; high-mild 1 (1.6667 %)
;;; Variance from outliers : 7.7664 % Variance is slightly inflated by outliers
;;; Evaluation count : 5483100 in 60 samples of 91385 calls.
;;; Execution time mean : 11.064318 µs
;;; Execution time std-deviation : 167.430489 ns
;;; Execution time lower quantile : 10.854539 µs ( 2.5%)
;;; Execution time upper quantile : 11.447064 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 2 outliers in 60 samples (3.3333 %)
;;; low-severe 1 (1.6667 %)
;;; low-mild 1 (1.6667 %)
;;; Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
(def form
[1 [2 [3 [4 [5 [6 [7 [8 [9 [10 11]]]]]]]]]])
(do
(cc/bench (postwalk identity form))
(cc/bench (postwalk* identity form))
(cc/bench (walk/postwalk identity form)))
;;; Evaluation count : 7627620 in 60 samples of 127127 calls.
;;; Execution time mean : 7.770194 µs
;;; Execution time std-deviation : 81.222440 ns
;;; Execution time lower quantile : 7.610275 µs ( 2.5%)
;;; Execution time upper quantile : 7.913045 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;; Evaluation count : 6941880 in 60 samples of 115698 calls.
;;; Execution time mean : 8.726047 µs
;;; Execution time std-deviation : 133.165422 ns
;;; Execution time lower quantile : 8.557593 µs ( 2.5%)
;;; Execution time upper quantile : 8.961663 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 1 outliers in 60 samples (1.6667 %)
;;; low-severe 1 (1.6667 %)
;;; Variance from outliers : 1.6389 % Variance is slightly inflated by outliers
;;; Evaluation count : 5045520 in 60 samples of 84092 calls.
;;; Execution time mean : 12.051122 µs
;;; Execution time std-deviation : 223.757365 ns
;;; Execution time lower quantile : 11.799274 µs ( 2.5%)
;;; Execution time upper quantile : 12.768594 µs (97.5%)
;;; Overhead used : 9.033571 ns
;;;
;;; Found 3 outliers in 60 samples (5.0000 %)
;;; low-severe 1 (1.6667 %)
;;; low-mild 2 (3.3333 %)
;;; Variance from outliers : 7.8088