Created
May 26, 2022 01:42
-
-
Save emdash/9f037d95ac8e3d491f5fe9eb2ddd077b to your computer and use it in GitHub Desktop.
Bash IO Benchmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -eo pipefail | |
shopt -s inherit_errexit | |
while getopts "s" option; do | |
case "${option}" in | |
s) declare -r "SLOW_MODE"=1;; | |
*) : ;; | |
esac | |
done | |
# Generates a folder with up to "$1" files with random names. | |
# | |
# Each file's contents is randomly set to either FOO or BAR. | |
function make_dir { | |
rm -rf data | |
mkdir data | |
if test -v SLOW_MODE; then | |
__make_dir_slowly | |
else | |
./gen_uuids.py make_dir "$1" | |
fi | |
} | |
# Some naive shell code which does the same thing as ./gen_uuids.py, | |
# but orders of magnitude more slowly. | |
function make_dir_slowly { | |
rm -rf data | |
mkdir data | |
pushd data > /dev/null | |
local -i i=0 | |
local -i n="$1" | |
while test "${i}" -lt "${n}" | |
do | |
if test "$((RANDOM & 1))" -eq 0 | |
then | |
echo "FOO" > "$(uuid -v4)" | |
else | |
echo "BAR" > "$(uuid -v4)" | |
fi | |
i="$(( i + 1 ))" | |
done | |
popd > /dev/null | |
} | |
# Just count to N, so we can compare the performance of python and | |
# bash at raw computation. | |
function just_count { | |
rm -rf data | |
mkdir data | |
pushd data > /dev/null | |
local -i i=0 | |
local -i n="$1" | |
while test "${i}" -lt "${n}" | |
do | |
i="$(( i + 1 ))" | |
done | |
popd > /dev/null | |
} | |
# Just generate UUIDS on stdout | |
# | |
# Here we are essentially measuring the relative cost of invoking a | |
# subprocess. | |
function generate_uuids { | |
local -i i=0 | |
local -i n="$1" | |
echo "got here ${i} ${n}" | |
while test "${i}" -lt "${n}" | |
do | |
echo uuid -v4 | |
i="$(( i + 1 ))" | |
done | |
} | |
# Uses a glob plus a for loop to filter based on the file contents. | |
function filter_state_using_glob { | |
for file in data/*; | |
do | |
read -r contents < "${file}" | |
if test "${contents}" = "FOO" | |
then | |
echo "${file}" | |
fi | |
done | |
} | |
# uses grep to do the same thing. | |
function filter_state_using_grep { | |
grep -l -r -x FOO data | |
} | |
# uses while read to do the same thing. | |
function filter_state_using_while { | |
find data | while IFS='' read -r path | |
do | |
IFS='' read -r contents < "${path}" | |
if test "${contents}" = "FOO" | |
then | |
echo "${path}" | |
fi | |
done | |
} | |
function filter_state_using_mapfile { | |
declare -a files | |
local state | |
mapfile -t files < <(find data -printf '%P') | |
for file in "${files[@]}" | |
do | |
IFS='' read -r state < "${file}/state" | |
if test "${state}" = "FOO" | |
then | |
echo "${file}" | |
fi | |
done | |
} | |
function test_case { | |
echo "Generating Test Data for Size: ${size}" | |
echo | |
time generate_data "$1" | |
echo | |
li echo "Using Glob:" | |
time using_glob > /dev/null | |
echo | |
echo "Using grep" | |
time using_grep > /dev/null | |
echo | |
echo "Using os.listdir to walk the dir" | |
time ./using_listdir.py > /dev/null | |
echo | |
echo "Using find | read_stdin.py" | |
time find data -mindepth 1 | ./read_stdin.py > /dev/null | |
} | |
function run_all { | |
for size in 100 1000 10000 100000; do | |
test_case "${size}" | |
done | |
} | |
if test -z "$*"; then | |
run_all | |
else | |
"$@" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment