|
#!/usr/bin/env bash |
|
# --- |
|
#!/bin/bash -x |
|
|
|
echo -e "### WELCOME TO $(basename $0)\n" |
|
|
|
DRY_RUN="" |
|
if [[ "${1}" == "--dry-run" || "${1}" == "-n" ]] |
|
then |
|
DRY_RUN=1 |
|
shift |
|
echo -e "[DRY-RUN] Mode enabled. No files will be created, copied, or renamed.\n" |
|
fi |
|
|
|
run() { |
|
if [[ -n "${DRY_RUN}" ]] |
|
then |
|
echo "[DRY-RUN] Would run: $*" |
|
else |
|
"$@" |
|
fi |
|
} |
|
|
|
# Absolute system paths — immune to aliases like 'date -> gdate' from coreutils |
|
_SYS_DATE=/bin/date |
|
_SYS_STAT=/usr/bin/stat |
|
|
|
log_entry() { |
|
[[ -n "${DRY_RUN}" ]] && return 0 |
|
echo "$(${_SYS_DATE} '+%Y-%m-%dT%H:%M:%S') $*" >> "${LOG_FILE}" |
|
} |
|
|
|
if [[ -z ${DEST_DIR} ]] |
|
then |
|
echo "Error: DEST_DIR is not set. Usage: DEST_DIR=/path/to/dest ${0} [--dry-run] <file_or_directory>" |
|
echo -e "\a" |
|
exit 1 |
|
fi |
|
|
|
echo -e "Destination dir set as ${DEST_DIR}" |
|
LOG_FILE="${DEST_DIR}/sortfiles.log" |
|
|
|
if [[ -n "${1}" ]] |
|
then |
|
echo -n "Analyzing ${1}... " |
|
if [[ -e "${1}" ]] |
|
then |
|
echo OK |
|
else |
|
echo -e "FAIL!" |
|
exit 1 |
|
fi |
|
else |
|
echo -e "Argument not set. Usage: ${0} [--dry-run] <file_or_directory>" |
|
echo -e "\a" |
|
exit 1 |
|
fi |
|
|
|
echo -ne "Checking dependencies: " |
|
for DEP in awk exiftool jq mkdir rsync |
|
do |
|
echo -ne "$DEP " |
|
if command -v $DEP > /dev/null 2>&1 |
|
then |
|
echo -ne "OK\t " |
|
else |
|
echo "Failed to find $DEP command. Exitting now" |
|
echo -e "\a" |
|
exit 1 |
|
fi |
|
done |
|
# Auto-detect sha256 tool (shasum on macOS, sha256sum on Linux) |
|
echo -ne "sha256 " |
|
if command -v shasum > /dev/null 2>&1 |
|
then |
|
_SHA256_CMD="shasum -a 256" |
|
echo -ne "OK(shasum)\t " |
|
elif command -v sha256sum > /dev/null 2>&1 |
|
then |
|
_SHA256_CMD="sha256sum" |
|
echo -ne "OK(sha256sum)\t " |
|
else |
|
echo "Failed to find shasum or sha256sum. Exitting now" |
|
echo -e "\a" |
|
exit 1 |
|
fi |
|
echo -e \\n |
|
|
|
# Detect BSD vs GNU behavior using system paths (immune to aliases) |
|
if ${_SYS_DATE} --version >/dev/null 2>&1 |
|
then |
|
_DATE_IS_GNU=1 |
|
else |
|
_DATE_IS_GNU=0 |
|
fi |
|
|
|
if ${_SYS_STAT} --version >/dev/null 2>&1 |
|
then |
|
_STAT_IS_GNU=1 |
|
else |
|
_STAT_IS_GNU=0 |
|
fi |
|
|
|
# --- Helper functions --- |
|
|
|
sha256() { |
|
${_SHA256_CMD} "${1}" | awk '{print $1}' |
|
} |
|
|
|
extract_date_from_filename() { |
|
local NAME="${1}" |
|
# Android: YYYYMMDD_HHMMSS |
|
if [[ "${NAME}" =~ ^([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2}) ]] |
|
then |
|
echo "${BASH_REMATCH[1]}:${BASH_REMATCH[2]}:${BASH_REMATCH[3]} ${BASH_REMATCH[4]}:${BASH_REMATCH[5]}:${BASH_REMATCH[6]}" |
|
# WhatsApp: IMG-YYYYMMDD-WA / VID-YYYYMMDD-WA |
|
elif [[ "${NAME}" =~ ^[A-Za-z]+-([0-9]{4})([0-9]{2})([0-9]{2})-WA ]] |
|
then |
|
echo "${BASH_REMATCH[1]}:${BASH_REMATCH[2]}:${BASH_REMATCH[3]} 00:00:00" |
|
# Screenshot / generic: YYYY-MM-DD |
|
elif [[ "${NAME}" =~ ^([0-9]{4})-([0-9]{2})-([0-9]{2}) ]] |
|
then |
|
echo "${BASH_REMATCH[1]}:${BASH_REMATCH[2]}:${BASH_REMATCH[3]} 00:00:00" |
|
fi |
|
# Returns empty if no pattern matched |
|
} |
|
|
|
get_birthtime() { |
|
local FILE="${1}" |
|
if [[ ${_STAT_IS_GNU} -eq 1 ]] |
|
then |
|
local BTIME |
|
BTIME=$(${_SYS_STAT} -c %W "${FILE}" 2>/dev/null) |
|
if [[ -n "${BTIME}" && "${BTIME}" -gt 0 ]] |
|
then |
|
${_SYS_DATE} -d "@${BTIME}" "+%Y:%m:%d %H:%M:%S" 2>/dev/null |
|
else |
|
# Birth time not supported by filesystem — fall back to mtime |
|
BTIME=$(${_SYS_STAT} -c %Y "${FILE}" 2>/dev/null) |
|
${_SYS_DATE} -d "@${BTIME}" "+%Y:%m:%d %H:%M:%S" 2>/dev/null |
|
fi |
|
else |
|
# BSD stat: birth time |
|
${_SYS_STAT} -f %SB -t "%Y:%m:%d %H:%M:%S" "${FILE}" 2>/dev/null |
|
fi |
|
} |
|
|
|
# Extensions processed in batch mode (case-insensitive) |
|
BATCH_EXTENSIONS=("jpg" "jpeg" "cr2" "cr3" "mov" "mp4" "heic" "nef" "arw" "orf" "rw2" "tif" "tiff" "png") |
|
|
|
# Save user-provided MAKER_NAME (env override) so it can be restored per file in batch |
|
USER_MAKER_NAME="${MAKER_NAME}" |
|
|
|
process_file() { |
|
local INPUT_FILE="${1}" |
|
local MAKER_NAME="${USER_MAKER_NAME}" |
|
local DEVICE_NAME="" |
|
|
|
local EXIF_JSON |
|
EXIF_JSON="$(LANG=C exiftool -json "${INPUT_FILE}")" |
|
|
|
if [[ -n "${EXIF_JSON}" ]] |
|
then |
|
echo -e "Parsing information on file ${INPUT_FILE}... " |
|
local FILE_NAME |
|
FILE_NAME=$(echo "$EXIF_JSON" | jq -r '.[0].FileName // ""') |
|
local FILE_EXTENSION |
|
FILE_EXTENSION=$(echo "$EXIF_JSON" | jq -r '.[0].FileTypeExtension // "" | ascii_downcase') |
|
# Fallback: derive extension from filename when exiftool cannot determine file type |
|
if [[ -z "${FILE_EXTENSION}" ]] |
|
then |
|
FILE_EXTENSION="${FILE_NAME##*.}" |
|
FILE_EXTENSION="${FILE_EXTENSION,,}" |
|
fi |
|
|
|
if [[ -z $MAKER_NAME ]] |
|
then |
|
MAKER_NAME=$(echo "$EXIF_JSON" | jq -r '.[0].Make // ""') |
|
fi |
|
# Fallback: detect WhatsApp by filename pattern, else Unknown |
|
if [[ -z "${MAKER_NAME}" ]] |
|
then |
|
[[ "${FILE_NAME}" =~ -WA[0-9]+ ]] && MAKER_NAME="WhatsApp" || MAKER_NAME="Unknown" |
|
fi |
|
|
|
if [[ ${FILE_EXTENSION} == "mov" ]] |
|
then |
|
DEVICE_NAME=$(echo "$EXIF_JSON" | jq -r '.[0].Model // ""') |
|
fi |
|
|
|
if [[ $FILE_EXTENSION == "cr2" ]] |
|
then |
|
DEVICE_NAME=$(echo "$EXIF_JSON" | jq -r '.[0].CameraModelName // ""') |
|
fi |
|
|
|
### Device name fallback: Model → CameraModelName |
|
if [[ -z $DEVICE_NAME ]] |
|
then |
|
DEVICE_NAME=$(echo "$EXIF_JSON" | jq -r '.[0].Model // ""') |
|
fi |
|
if [[ -z $DEVICE_NAME ]] |
|
then |
|
DEVICE_NAME=$(echo "$EXIF_JSON" | jq -r '.[0].CameraModelName // ""') |
|
fi |
|
# Final fallback: WhatsApp detection or Unknown |
|
if [[ -z "${DEVICE_NAME}" ]] |
|
then |
|
[[ "${FILE_NAME}" =~ -WA[0-9]+ ]] && DEVICE_NAME="WhatsApp" || DEVICE_NAME="Unknown" |
|
fi |
|
|
|
# Skip files with no identifiable camera — avoids polluting Unknown/Unknown/ |
|
if [[ "${MAKER_NAME}" == "Unknown" && "${DEVICE_NAME}" == "Unknown" ]] |
|
then |
|
echo "SKIPPED (no camera EXIF): ${INPUT_FILE}" |
|
log_entry "SKIPPED ${INPUT_FILE} (no camera EXIF)" |
|
return 0 |
|
fi |
|
|
|
local CREATE_DATE |
|
|
|
# WhatsApp: filename date is more reliable than EXIF/btime (btime reflects download, not capture) |
|
if [[ "${MAKER_NAME}" == "WhatsApp" ]] |
|
then |
|
CREATE_DATE=$(extract_date_from_filename "${FILE_NAME}") |
|
[[ -n "${CREATE_DATE}" ]] && echo " (date sourced from filename — WhatsApp priority)" |
|
fi |
|
|
|
# For all others (or if WhatsApp filename had no parseable date): try EXIF first |
|
if [[ -z "${CREATE_DATE}" ]] |
|
then |
|
CREATE_DATE=$(echo "$EXIF_JSON" | jq -r '.[0].CreateDate // ""') |
|
# Treat zeroed-out dates (e.g. "0000:00:00 00:00:00") as missing |
|
[[ "${CREATE_DATE}" == 0000* ]] && CREATE_DATE="" |
|
fi |
|
|
|
# Fallback cascade: filename pattern → filesystem birth time |
|
if [[ -z "${CREATE_DATE}" ]] |
|
then |
|
CREATE_DATE=$(extract_date_from_filename "${FILE_NAME}") |
|
[[ -n "${CREATE_DATE}" ]] && echo " (date sourced from filename)" |
|
fi |
|
if [[ -z "${CREATE_DATE}" ]] |
|
then |
|
CREATE_DATE=$(get_birthtime "${INPUT_FILE}") |
|
[[ -n "${CREATE_DATE}" ]] && echo " (date sourced from filesystem birth time)" |
|
fi |
|
if [[ -z "${CREATE_DATE}" ]] |
|
then |
|
echo "FAILED. Could not determine date for ${INPUT_FILE}" |
|
log_entry "FAILED ${INPUT_FILE} (no date available)" |
|
echo -e "\a" |
|
return 1 |
|
fi |
|
|
|
local FILE_YEAR="${CREATE_DATE:0:4}" |
|
local FILE_MONTH="${CREATE_DATE:5:2}" |
|
local FILE_DAY="${CREATE_DATE:8:2}" |
|
local FILE_HOUR="${CREATE_DATE:11:2}" |
|
local FILE_MINUTE="${CREATE_DATE:14:2}" |
|
local FILE_SECOND="${CREATE_DATE:17:2}" |
|
|
|
for INFORMATION in FILE_NAME FILE_EXTENSION MAKER_NAME DEVICE_NAME FILE_YEAR FILE_MONTH FILE_DAY FILE_HOUR FILE_MINUTE FILE_SECOND |
|
do |
|
echo -ne "${INFORMATION}: " |
|
if [[ -z $(eval "echo \${$INFORMATION}") ]] |
|
then |
|
echo "FAILED. Variable \$${INFORMATION} is empty" |
|
echo -e "\a" |
|
return 1 |
|
else |
|
echo -ne $(eval "echo \${$INFORMATION}") \\n |
|
fi |
|
done |
|
else |
|
echo -e "No EXIF data found (or exiftool failed) on file ${INPUT_FILE}" |
|
log_entry "FAILED ${INPUT_FILE} (no EXIF data)" |
|
echo -e "\a" |
|
return 1 |
|
fi |
|
echo |
|
|
|
local FILE_NAME_FULL="${DEVICE_NAME// /_}-${FILE_YEAR}-${FILE_MONTH}-${FILE_DAY}-${FILE_HOUR}-${FILE_MINUTE}-${FILE_SECOND}.${FILE_EXTENSION}" |
|
local FILE_NAME_SHORT="${FILE_HOUR}-${FILE_MINUTE}-${FILE_SECOND}.${FILE_EXTENSION}" |
|
local FINAL_DEST_DIR="${DEST_DIR}/${MAKER_NAME}/${DEVICE_NAME}/${FILE_YEAR}/${FILE_MONTH}/${FILE_DAY}" |
|
|
|
if [[ -e "${FINAL_DEST_DIR}/${FILE_NAME_FULL}" ]] |
|
then |
|
local CHECKSUM_EXISTING |
|
CHECKSUM_EXISTING=$(sha256 "${FINAL_DEST_DIR}/${FILE_NAME_FULL}") |
|
local CHECKSUM_SOURCE |
|
CHECKSUM_SOURCE=$(sha256 "${INPUT_FILE}") |
|
if [[ "${CHECKSUM_EXISTING}" == "${CHECKSUM_SOURCE}" ]] |
|
then |
|
echo "SKIPPED: ${FINAL_DEST_DIR}/${FILE_NAME_FULL} already exists and is identical." |
|
log_entry "SKIPPED ${INPUT_FILE} (identical: ${FINAL_DEST_DIR}/${FILE_NAME_FULL})" |
|
return 0 |
|
fi |
|
# Different content with same timestamp (e.g. burst mode) — find a free suffixed name |
|
local COLLISION_BASE="${FILE_NAME_FULL%.*}" |
|
local COLLISION_EXT="${FILE_NAME_FULL##*.}" |
|
local SUFFIX=1 |
|
while [[ -e "${FINAL_DEST_DIR}/${COLLISION_BASE}_${SUFFIX}.${COLLISION_EXT}" ]] |
|
do |
|
(( SUFFIX++ )) |
|
done |
|
FILE_NAME_FULL="${COLLISION_BASE}_${SUFFIX}.${COLLISION_EXT}" |
|
echo "COLLISION: Different file with same timestamp. Will use name ${FILE_NAME_FULL}" |
|
log_entry "COLLISION ${INPUT_FILE} -> ${FINAL_DEST_DIR}/${FILE_NAME_FULL}" |
|
fi |
|
|
|
echo -n "Creating final destination dir ${FINAL_DEST_DIR}: " |
|
if run mkdir -p "${FINAL_DEST_DIR}" |
|
then |
|
echo -e "OK" |
|
else |
|
echo -e "FAILED. Exitting now" |
|
log_entry "FAILED ${INPUT_FILE} (mkdir failed: ${FINAL_DEST_DIR})" |
|
echo -e "\a" |
|
return 1 |
|
fi |
|
|
|
echo -n "Copying file ${INPUT_FILE}: " |
|
if run rsync -aq --ignore-existing -- "${INPUT_FILE}" "${FINAL_DEST_DIR}" > /dev/null 2>&1 |
|
then |
|
echo -ne "OK" |
|
if [[ -z "${DRY_RUN}" ]] |
|
then |
|
echo -n " | Verifying checksum: " |
|
local CHECKSUM_SRC |
|
CHECKSUM_SRC=$(sha256 "${INPUT_FILE}") |
|
local CHECKSUM_DST |
|
CHECKSUM_DST=$(sha256 "${FINAL_DEST_DIR}/${FILE_NAME}") |
|
if [[ "${CHECKSUM_SRC}" != "${CHECKSUM_DST}" ]] |
|
then |
|
echo "FAILED (checksum mismatch). Removing corrupt copy and aborting." |
|
log_entry "FAILED ${INPUT_FILE} (checksum mismatch after copy)" |
|
run rm -f -- "${FINAL_DEST_DIR}/${FILE_NAME}" |
|
echo -e "\a" |
|
return 1 |
|
fi |
|
echo -ne "OK" |
|
fi |
|
echo -ne " | Now renaming to ${FILE_NAME_FULL}: " |
|
if run mv -- "${FINAL_DEST_DIR}/${FILE_NAME}" "${FINAL_DEST_DIR}/${FILE_NAME_FULL}" |
|
then |
|
echo "OK" |
|
log_entry "COPIED ${INPUT_FILE} -> ${FINAL_DEST_DIR}/${FILE_NAME_FULL}" |
|
else |
|
echo "Full file renaming failed!" |
|
if run mv -- "${FINAL_DEST_DIR}/${FILE_NAME}" "${FINAL_DEST_DIR}/${FILE_NAME_SHORT}" |
|
then |
|
echo "File successfully renamed to ${FILE_NAME_SHORT}" |
|
else |
|
echo "Short file renaming failed. Keeping name as ${FILE_NAME})" |
|
fi |
|
fi |
|
else |
|
echo "File copy failed! Exitting now" |
|
log_entry "FAILED ${INPUT_FILE} (rsync error)" |
|
echo -e \\a |
|
return 1 |
|
fi |
|
} |
|
|
|
# --- Dispatch: single file or directory --- |
|
|
|
if [[ -d "${1}" ]] |
|
then |
|
echo -e "Batch mode: scanning directory ${1}\n" |
|
FIND_ARGS=() |
|
for EXT in "${BATCH_EXTENSIONS[@]}" |
|
do |
|
FIND_ARGS+=(-o -iname "*.${EXT}") |
|
done |
|
# Remove leading -o from the array |
|
unset 'FIND_ARGS[0]' |
|
|
|
TOTAL=0 |
|
ERRORS=0 |
|
while IFS= read -r -d '' FILE |
|
do |
|
echo "--- [${FILE}]" |
|
(( TOTAL++ )) |
|
if ! process_file "${FILE}" |
|
then |
|
(( ERRORS++ )) |
|
fi |
|
echo |
|
done < <(find "${1}" -not -path '*/.Trash*' -not -path '*/.$RECYCLE.BIN*' -not -name '._*' -type f \( "${FIND_ARGS[@]}" \) -print0 | sort -z) |
|
|
|
echo "Batch complete: ${TOTAL} file(s) processed, ${ERRORS} error(s)." |
|
log_entry "SUMMARY batch complete: ${TOTAL} processed, ${ERRORS} error(s). Log: ${LOG_FILE}" |
|
[[ ${ERRORS} -eq 0 ]] |
|
else |
|
process_file "${1}" |
|
fi |