Last active
October 11, 2020 20:01
-
-
Save oxinabox/83bf01b3be7ff442cd13843ea7c029f3 to your computer and use it in GitHub Desktop.
Conditionally setting a column in dataframes.jl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DataFrames, BenchmarkTools, DataFramesMeta | |
eg_df(n=100_000) = DataFrame(a=rand(1:10, n), b=rand('a':'z', n), c=rand('A':'Z', n)) | |
# for eachrow loop | |
# 7.132 ms (239156 allocations: 4.10 MiB) | |
@btime let | |
df = $(eg_df()) | |
for row in eachrow(df) | |
if row.a == 1 | |
row.b = row.c | |
end | |
end | |
end; | |
# masking | |
# 112.436 μs (25 allocations: 117.98 KiB) | |
@btime let | |
df = $(eg_df()) | |
df[df.a .== 1, :b] .== df[df.a .== 1, :c] | |
end | |
# For indexed | |
# 6.678 ms (249279 allocations: 3.96 MiB) | |
@btime let | |
df = $(eg_df()) | |
for ii in 1:nrow(df) | |
if df[ii, :a] == 1 | |
df[ii, :b] = df[ii, :c] | |
end | |
end | |
end; | |
# For indexed inbounds | |
# 4.335 ms (139204 allocations: 2.28 MiB) | |
@btime let | |
df = $(eg_df()) | |
@inbounds for ii in 1:nrow(df) | |
if df[ii, :a] == 1 | |
df[ii, :b] = df[ii, :c] | |
end | |
end | |
df | |
end; | |
# Make a function | |
# 22.892 μs (0 allocations: 0 bytes) | |
function set_b!(a, b, c) | |
@inbounds for ii in eachindex(a) | |
if a[ii] == 1 | |
b[ii] = c[ii] | |
end | |
end | |
end | |
@btime let | |
df = $(eg_df()) | |
set_b!(df.a, df.b, df.c) | |
end; | |
# byrow! | |
# 248.458 μs (24 allocations: 1.53 MiB) | |
@btime let | |
df = $(eg_df()) | |
@byrow! df begin | |
:b = :a == 1 ? :b : :c | |
end | |
end; | |
# @transform | |
# 109.362 μs (26 allocations: 1.91 MiB) | |
@btime let | |
df = $(eg_df()) | |
@transform(df, b = ifelse.(:a .== 1, :b, :c)) | |
end; | |
# transform ternery | |
# 103.689 μs (97 allocations: 1.53 MiB) | |
@btime let | |
df = $(eg_df()) | |
transform(df, [:a, :b, :c] => ByRow((a, b,c) -> a == 1 ? b : c) => :b) | |
end; | |
# transform ifelse | |
# 100.875 μs (83 allocations: 1.53 MiB) | |
@btime let | |
df = $(eg_df()) | |
transform(df, [:a, :b, :c] => ByRow((a, b,c) -> ifelse(a == 1, b, c)) => :b) | |
end; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make a function | |
# 23 μs (0 allocations: 0 bytes) | |
# transform ifelse | |
# 101 μs (83 allocations: 1.53 MiB) | |
# transform ternery | |
# 104 μs (97 allocations: 1.53 MiB) | |
# @transform | |
# 109 μs (26 allocations: 1.91 MiB) | |
# masking | |
# 112 μs (25 allocations: 117.98 KiB) | |
# byrow! | |
# 248 μs (24 allocations: 1.53 MiB) | |
# For indexed inbounds | |
# 4335 μs (139204 allocations: 2.28 MiB) | |
# For indexed | |
# 6678 μs (249279 allocations: 3.96 MiB) | |
# for eachrow loop | |
# 7132 μs (239156 allocations: 4.10 MiB) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment