Skip to content

Instantly share code, notes, and snippets.

@emdash
Created May 26, 2022 01:42
Show Gist options
  • Save emdash/9f037d95ac8e3d491f5fe9eb2ddd077b to your computer and use it in GitHub Desktop.
Save emdash/9f037d95ac8e3d491f5fe9eb2ddd077b to your computer and use it in GitHub Desktop.
Bash IO Benchmarks
set -eo pipefail
shopt -s inherit_errexit
while getopts "s" option; do
case "${option}" in
s) declare -r "SLOW_MODE"=1;;
*) : ;;
esac
done
# Generates a folder with up to "$1" files with random names.
#
# Each file's contents is randomly set to either FOO or BAR.
function make_dir {
rm -rf data
mkdir data
if test -v SLOW_MODE; then
__make_dir_slowly
else
./gen_uuids.py make_dir "$1"
fi
}
# Some naive shell code which does the same thing as ./gen_uuids.py,
# but orders of magnitude more slowly.
function make_dir_slowly {
rm -rf data
mkdir data
pushd data > /dev/null
local -i i=0
local -i n="$1"
while test "${i}" -lt "${n}"
do
if test "$((RANDOM & 1))" -eq 0
then
echo "FOO" > "$(uuid -v4)"
else
echo "BAR" > "$(uuid -v4)"
fi
i="$(( i + 1 ))"
done
popd > /dev/null
}
# Just count to N, so we can compare the performance of python and
# bash at raw computation.
function just_count {
rm -rf data
mkdir data
pushd data > /dev/null
local -i i=0
local -i n="$1"
while test "${i}" -lt "${n}"
do
i="$(( i + 1 ))"
done
popd > /dev/null
}
# Just generate UUIDS on stdout
#
# Here we are essentially measuring the relative cost of invoking a
# subprocess.
function generate_uuids {
local -i i=0
local -i n="$1"
echo "got here ${i} ${n}"
while test "${i}" -lt "${n}"
do
echo uuid -v4
i="$(( i + 1 ))"
done
}
# Uses a glob plus a for loop to filter based on the file contents.
function filter_state_using_glob {
for file in data/*;
do
read -r contents < "${file}"
if test "${contents}" = "FOO"
then
echo "${file}"
fi
done
}
# uses grep to do the same thing.
function filter_state_using_grep {
grep -l -r -x FOO data
}
# uses while read to do the same thing.
function filter_state_using_while {
find data | while IFS='' read -r path
do
IFS='' read -r contents < "${path}"
if test "${contents}" = "FOO"
then
echo "${path}"
fi
done
}
function filter_state_using_mapfile {
declare -a files
local state
mapfile -t files < <(find data -printf '%P')
for file in "${files[@]}"
do
IFS='' read -r state < "${file}/state"
if test "${state}" = "FOO"
then
echo "${file}"
fi
done
}
function test_case {
echo "Generating Test Data for Size: ${size}"
echo
time generate_data "$1"
echo
li echo "Using Glob:"
time using_glob > /dev/null
echo
echo "Using grep"
time using_grep > /dev/null
echo
echo "Using os.listdir to walk the dir"
time ./using_listdir.py > /dev/null
echo
echo "Using find | read_stdin.py"
time find data -mindepth 1 | ./read_stdin.py > /dev/null
}
function run_all {
for size in 100 1000 10000 100000; do
test_case "${size}"
done
}
if test -z "$*"; then
run_all
else
"$@"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment