|
#!/usr/bin/env bash |
|
set -uo pipefail |
|
IFS=$'\n\t' |
|
set +o noclobber |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @file filetype.sh |
|
# @brief A script to classify files based on their types and extensions. |
|
# |
|
# @details |
|
# This script recursively examines files and directories, determines their types |
|
# based on file extensions and metadata (using the `file` command), and classifies |
|
# them into human-readable categories. It can identify files like scripts, images, |
|
# archives, and device files, as well as providing additional context for symbolic |
|
# links and executables. |
|
# |
|
# The script supports various features: |
|
# - File type classification using an associative array (`FILETYPE_MAP`) based on |
|
# file extensions. |
|
# - Ability to handle symbolic links by resolving their targets. |
|
# - Recursive traversal of directories up to a specified depth. |
|
# - Handling of special file types (block devices, named pipes, etc.). |
|
# - Checking if a file is executable or should be marked as executable. |
|
# |
|
# @usage |
|
# To use the script, simply provide one or more file paths or directories as |
|
# arguments. You can also specify a maximum depth for recursive directory traversal |
|
# (optional). If no depth is specified, the default value is 10. |
|
# |
|
# Example: |
|
# ./filetype.sh 2 /path/to/directory |
|
# |
|
# @license |
|
# This script is distributed under the MIT License. |
|
# |
|
# @author |
|
# Lee Bussy |
|
# |
|
# @version |
|
# 1.0 |
|
# ----------------------------------------------------------------------------- |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @var FILETYPE_MAXDEPTH |
|
# @brief The maximum depth for recursive file and directory examination. |
|
# @details The default value is 10 but can be overridden by setting the |
|
# FILETYPE_MAXDEPTH environment variable. |
|
# ----------------------------------------------------------------------------- |
|
readonly FILETYPE_MAXDEPTH=${FILETYPE_MAXDEPTH:-10} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @var FILETYPE_MAP |
|
# @brief Associative array mapping file extensions to human-readable file types. |
|
# @details This map is used for quick classification of files based on their |
|
# extensions. It covers common file types such as configuration files, |
|
# scripts, images, and document formats. |
|
# ----------------------------------------------------------------------------- |
|
declare -A FILETYPE_MAP=( |
|
["cfg"]="Configuration file" |
|
["conf"]="Configuration file" |
|
["ini"]="Configuration file" |
|
["md"]="Markdown file" |
|
["sh"]="Shell script" |
|
["py"]="Python script" |
|
["pl"]="Perl script" |
|
["rb"]="Ruby script" |
|
["js"]="JavaScript file" |
|
["json"]="JSON file" |
|
["yml"]="YAML file" |
|
["yaml"]="YAML file" |
|
["xml"]="XML file" |
|
["html"]="HTML file" |
|
["css"]="CSS file" |
|
["c"]="C source file" |
|
["cpp"]="C++ source file" |
|
["h"]="C header file" |
|
["hpp"]="C++ header file" |
|
["java"]="Java source file" |
|
["class"]="Java compiled file" |
|
["jar"]="Java archive file" |
|
["exe"]="Windows executable" |
|
["bat"]="Batch file" |
|
["php"]="PHP script" |
|
["svg"]="Scalable Vector Graphics file" |
|
["jpg"]="JPEG image file" |
|
["jpeg"]="JPEG image file" |
|
["jfif"]="JPEG image file" |
|
["gif"]="GIF image file" |
|
["png"]="PNG image file" |
|
["webp"]="WebP image file" |
|
["heif"]="HEIF image file" |
|
["avif"]="AVIF image file" |
|
["tiff"]="TIFF image file" |
|
["bmp"]="BMP image file" |
|
["ppm"]="PPM image file" |
|
["pgm"]="PGM image file" |
|
["pbm"]="PBM image file" |
|
["pnm"]="PNM image file" |
|
["ico"]="Icon file" |
|
["pdf"]="Portable Document Format" |
|
["service"]="Systemd service file" |
|
["timer"]="Systemd timer file" |
|
) |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Classify a single file or symbolic link. |
|
# |
|
# @param $1 The file path to classify. |
|
# |
|
# @return Outputs the classification as a pipe-delimited string. |
|
# ----------------------------------------------------------------------------- |
|
examine_single_file() { |
|
local path |
|
path=$(realpath "$1") # Normalize the path |
|
local extension="${path##*.}" # Extract the file extension |
|
|
|
if [[ -h "$path" ]]; then |
|
local target |
|
target=$(readlink -f "$path") |
|
local type_info |
|
type_info=$(file -b "$target") |
|
classify_symbolic_link "$path" "$type_info" "$target" |
|
return |
|
fi |
|
|
|
local type_info |
|
type_info=$(file -b "$path") |
|
|
|
if [[ -f "$path" ]]; then |
|
# Handle via FILETYPE_MAP or fallback |
|
if [[ "${FILETYPE_MAP[$extension]+_}" ]]; then |
|
printf "%s|%s|%s\n" "${FILETYPE_MAP[$extension]}" "$path" "$type_info" |
|
else |
|
classify_fallback "$path" "$type_info" |
|
fi |
|
elif [[ -b "$path" || -c "$path" || -p "$path" || -S "$path" ]]; then |
|
classify_special_file "$path" "$type_info" |
|
else |
|
printf "Unknown file|%s|%s\n" "$path" "$type_info" |
|
fi |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Classify a file based on fallback criteria when no specific type is determined. |
|
# |
|
# @param $1 The path to the file being classified. |
|
# @param $2 The type information derived from the `file` command. |
|
# |
|
# @return Outputs the classification as a pipe-delimited string. |
|
# ----------------------------------------------------------------------------- |
|
classify_fallback() { |
|
local path="$1" |
|
local type_info="$2" |
|
case "$type_info" in |
|
*"text"*) |
|
printf "Text file|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*"ELF"*) |
|
printf "Binary file (ELF)|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*"image data"*) |
|
printf "Image file|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*"archive"*) |
|
printf "Archive file|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*"compressed"*) |
|
printf "Compressed file|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*"device"*) |
|
printf "Device file|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*) |
|
printf "Regular file|%s|%s\n" "$path" "$type_info" |
|
;; |
|
esac |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Classify a file as a special file type (block, character, pipe, or socket). |
|
# |
|
# @param $1 The path to the file being classified. |
|
# @param $2 The type information derived from the `file` command. |
|
# |
|
# @return Outputs the classification as a pipe-delimited string. |
|
# ----------------------------------------------------------------------------- |
|
classify_special_file() { |
|
local path="$1" |
|
local type_info="$2" |
|
if [[ -b "$path" ]]; then |
|
printf "Block special|%s|%s\n" "$path" "$type_info" |
|
elif [[ -c "$path" ]]; then |
|
printf "Character device|%s|%s\n" "$path" "$type_info" |
|
elif [[ -p "$path" ]]; then |
|
printf "Named pipe|%s|%s\n" "$path" "$type_info" |
|
elif [[ -S "$path" ]]; then |
|
printf "Socket|%s|%s\n" "$path" "$type_info" |
|
else |
|
printf "Unknown type|%s|%s\n" "$path" "$type_info" |
|
fi |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Classify a Python file based on its shebang (Python2, Python3, or generic Python). |
|
# |
|
# @param $1 The path to the Python file. |
|
# @param $2 The type information derived from the `file` command. |
|
# |
|
# @return Outputs the classification as a pipe-delimited string. |
|
# ----------------------------------------------------------------------------- |
|
parse_python_version() { |
|
local path="$1" |
|
local type_info="$2" |
|
local shebang |
|
shebang=$(head -n 1 "$path" 2>/dev/null) |
|
case "$shebang" in |
|
*python3*) |
|
printf "Python3 script|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*python2*) |
|
printf "Python2 script|%s|%s\n" "$path" "$type_info" |
|
;; |
|
*) |
|
printf "Python script|%s|%s\n" "$path" "$type_info" |
|
;; |
|
esac |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Examine the type of a file or directory recursively, respecting depth. |
|
# |
|
# @param $1 The maximum depth for recursion. |
|
# @param $2 The current depth (internal use). |
|
# @param $@ The paths to examine. |
|
# |
|
# @return Recursively calls itself for directories; outputs classifications for files. |
|
# ----------------------------------------------------------------------------- |
|
examine_file_type() { |
|
local max_depth current_depth |
|
if [[ "$1" =~ ^[0-9]+$ ]]; then |
|
max_depth="$1" |
|
shift |
|
else |
|
max_depth="$FILETYPE_MAXDEPTH" |
|
fi |
|
|
|
if [[ "$1" =~ ^[0-9]+$ ]]; then |
|
current_depth="$1" |
|
shift |
|
else |
|
current_depth=0 |
|
fi |
|
|
|
if (( current_depth > max_depth )); then |
|
return 0 |
|
fi |
|
|
|
if [[ $# -eq 0 ]]; then |
|
printf "Usage: examine_file_type [max_depth] <file_or_directory> [<more_files_or_directories>...]\n" >&2 |
|
return 1 |
|
fi |
|
|
|
for path in "$@"; do |
|
if [[ ! -e "$path" ]]; then |
|
printf "Error|Path '%s' does not exist.\n" "$path" >&2 |
|
continue |
|
fi |
|
|
|
# Normalize path using realpath |
|
local normalized_path |
|
normalized_path=$(realpath "$path") |
|
|
|
if [[ -d "$normalized_path" ]]; then |
|
classify_directory "$normalized_path" |
|
for subpath in "$normalized_path"/*; do |
|
if [[ -e "$subpath" ]]; then |
|
examine_file_type "$max_depth" "$(( current_depth + 1 ))" "$subpath" |
|
fi |
|
done |
|
continue |
|
fi |
|
|
|
examine_single_file "$normalized_path" |
|
done |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Print symbolic link details along with the resolved target information. |
|
# |
|
# @param $1 The symbolic link path. |
|
# @param $2 The type information derived from the `file` command. |
|
# @param $3 The resolved target path of the symbolic link. |
|
# |
|
# @return Outputs the symbolic link details as a pipe-delimited string. |
|
# ----------------------------------------------------------------------------- |
|
classify_symbolic_link() { |
|
local path="$1" |
|
local type_info="$2" |
|
local target="$3" |
|
|
|
printf "Symbolic link|%s -> %s|%s\n" "$path" "$target" "$type_info" |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Determine if a file is executable or should be marked executable. |
|
# |
|
# @details This function checks if a file is executable or meets criteria for |
|
# being marked executable. The checks include: |
|
# - File existence and regularity. |
|
# - Existing execute permissions. |
|
# - Presence of a valid shebang for script files. |
|
# - Content type indicating executable formats like ELF binaries. |
|
# |
|
# @param $1 The path to the file to be evaluated. |
|
# |
|
# @return 0 if the file is executable or should be executable, 1 otherwise. |
|
# |
|
# @example |
|
# if is_executable "/path/to/file"; then |
|
# echo "File is executable or should be marked executable." |
|
# else |
|
# echo "File is not executable." |
|
# fi |
|
# ----------------------------------------------------------------------------- |
|
is_executable() { |
|
local file="$1" |
|
|
|
# Ensure the file exists and is a regular file |
|
if [[ ! -f "$file" ]]; then |
|
printf "[INFO] File '%s' does not exist or is not a regular file.\n" "$file" >&2 |
|
return 1 |
|
fi |
|
|
|
# Quick exit: File is already executable |
|
if [[ -x "$file" ]]; then |
|
printf "[INFO] File '%s' is already executable.\n" "$file" >&2 |
|
return 0 |
|
fi |
|
|
|
# Extract the file extension |
|
local extension="${file##*.}" |
|
|
|
# Check the file's content or metadata |
|
local type_info |
|
type_info=$(file -b "$file") |
|
|
|
# Evaluate based on content or extension |
|
case "$extension" in |
|
py|pl|sh|rb|php|js) |
|
# Check for a valid shebang |
|
local shebang |
|
shebang=$(head -n 1 "$file" 2>/dev/null) |
|
if [[ "$shebang" =~ ^#! ]]; then |
|
printf "[INFO] File '%s' has a valid shebang.\n" "$file" >&2 |
|
return 0 # Should be executable |
|
fi |
|
;; |
|
*) |
|
# Specific handling for ELF binaries and known formats |
|
if [[ "$type_info" == *"ELF"* || "$type_info" == *"executable"* ]]; then |
|
printf "[INFO] File '%s' is an ELF binary or recognized executable.\n" "$file" >&2 |
|
return 0 # Should be executable |
|
fi |
|
;; |
|
esac |
|
|
|
# Fallback for files with non-standard extensions |
|
printf "[INFO] File '%s' does not meet executable criteria.\n" "$file" >&2 |
|
return 1 |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Classify and print directory information. |
|
# |
|
# @param $1 The path of the directory. |
|
# |
|
# @return Outputs the directory classification as a pipe-delimited string. |
|
# ----------------------------------------------------------------------------- |
|
classify_directory() { |
|
local path |
|
path=$(realpath "$1") # Normalize the path |
|
printf "Directory|%s\n" "$path" |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Main logic of the script. |
|
# @details Acts as the primary entry point for processing file types. It delegates |
|
# the arguments to the `examine_file_type` function for classification |
|
# and analysis of the provided files or directories. |
|
# |
|
# @param $@ Command-line arguments passed to the script. These should include: |
|
# - Maximum depth for directory traversal (optional). |
|
# - Files or directories to analyze. |
|
# |
|
# @return Exit status of the `examine_file_type` function. Zero indicates success, |
|
# and non-zero indicates an error. |
|
# |
|
# @example |
|
# ./filetype.sh 2 /path/to/directory |
|
# ----------------------------------------------------------------------------- |
|
_main() { |
|
examine_file_type "$@" |
|
} |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Wrapper function to invoke the script's main logic. |
|
# @details The `main` function is a standard convention in scripts to encapsulate |
|
# execution logic, allowing easier testing and extension. |
|
# |
|
# @param $@ Command-line arguments passed to the script. |
|
# |
|
# @return Exit status of the `_main` function. |
|
# ----------------------------------------------------------------------------- |
|
main() { _main "$@"; } |
|
|
|
# ----------------------------------------------------------------------------- |
|
# @brief Entry point for the script. |
|
# @details Executes the `main` function with command-line arguments. The `exit` |
|
# status is set to the return value of the `main` function. |
|
# |
|
# @param $@ Command-line arguments passed to the script. |
|
# |
|
# @return Exit status of the script (inherited from the `main` function). |
|
# ----------------------------------------------------------------------------- |
|
main "$@" |
|
exit $? |