Last active
January 9, 2024 06:31
-
-
Save etiennecollin/69ae54b18fde317be0a9fed2a362c9d7 to your computer and use it in GitHub Desktop.
The File System Formatter script is a Python program that automates the process of formatting file and directory names within a file system. It applies a set of rules to ensure consistent naming conventions and improve organization. The script allows users to customize the formatting options and provides options for dry run and verbose output.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# Author: Etienne Collin | |
# Date: 2023/12/20 | |
# Email: [email protected] | |
################################################################################ | |
# The File System Formatter script is a Python program that automates the process of formatting file and | |
# directory names within a file system. It applies a set of rules to ensure consistent naming conventions and | |
# improve organization. The script allows users to customize the formatting options and provides options for | |
# dry run and verbose output. | |
################################################################################ | |
# Key Features: | |
# - To prevent a directory from being formatted, add a file named ".donotformat" to the directory | |
# - Renames files and directories to meet specified formatting rules | |
# - Excludes certain files, extensions, and characters from the formatting process | |
# - Treat files with a certain extensions as directories | |
# - Deletes specified files from the file system | |
# - Handles special characters, accents, and spaces in names | |
# - Supports both dry run mode (previewing changes) and actual formatting | |
# - Provides verbose output to display the changes made during formatting | |
################################################################################ | |
# Usage: | |
# - Run the script with the desired command-line arguments specifying the paths to the directories or files | |
# to be formatted. | |
# - Follow the prompts to choose dry run mode, verbose output, and confirmation of the formatting operation. | |
# - The script will recursively format the contents of the specified directories, renaming files and directories | |
# according to the specified rules. | |
# - Optionally, specified files can be deleted from the file system. | |
# - The script provides a summary of the formatting process, including the number of files and directories | |
# renamed or deleted. | |
################################################################################ | |
# Note: It is recommended to review the formatting rules and backup important files before running the script | |
# to avoid any unintended changes to the file system. | |
################################################################################ | |
import os | |
import re | |
import sys | |
from unidecode import unidecode | |
EXCLUDED_FILES = [ | |
"Cargo.lock", | |
"Cargo.toml", | |
"LICENSE", | |
"CNAME", | |
"README.md", | |
"target", | |
"debug", | |
"src", | |
"lib", | |
"out", | |
"bin", | |
] | |
EXCLUDED_EXTENSIONS = [ | |
".class", | |
".java", | |
".iml", | |
".d", | |
".o", | |
".rmeta", | |
".qpf", | |
".qsf", | |
".qip", | |
".qdf", | |
".qws", | |
".qps", | |
".bsf", | |
".bdf", | |
".v", | |
".rpt", | |
] | |
TREAT_EXTENSION_AS_DIR = [ | |
".zip", | |
] | |
DELETE_FILES = [".DS_Store", "indent.log"] | |
BAD_CHARS = ',&=+@#^$%*!`"?|<>:' | |
FILE_SEPARATOR = "_" | |
DIR_SEPARATOR = "-" | |
global verbose, dryRun, filesRenamed, dirRenamed, filesDeleted | |
verbose = False | |
dryRun = False | |
filesRenamed = 0 | |
dirRenamed = 0 | |
filesDeleted = 0 | |
def isElementFormatted(element: str, isElementAFile: bool): | |
nameExtension = os.path.splitext(element) | |
generalConditions = ( | |
(element.islower() or element.isdigit()) | |
and element.isascii() | |
and " " not in element | |
and not containsCharsNotSurroundedByDigits(nameExtension[0], ".") | |
) | |
if isElementAFile and nameExtension[1] not in TREAT_EXTENSION_AS_DIR: | |
return ( | |
generalConditions | |
and DIR_SEPARATOR not in element | |
and not substringOfRepeatedCharsExists(element, FILE_SEPARATOR) | |
and all([False if char in nameExtension[0] else True for char in BAD_CHARS]) | |
) | |
else: | |
return ( | |
element == "." | |
or generalConditions | |
and FILE_SEPARATOR not in element | |
and not substringOfRepeatedCharsExists(element, DIR_SEPARATOR) | |
and all([False if char in element else True for char in BAD_CHARS]) | |
) | |
def renameElement(elementPath: str, separator: str): | |
# Get name of element | |
element = os.path.basename(elementPath) | |
# Rewrite in lowercase and remove accents | |
normalizedElement = unidecode(element.lower()) | |
fileName = normalizedElement | |
extension = "" | |
# If file, split into name and extension | |
if os.path.isfile(elementPath): | |
fileName, extension = os.path.splitext(normalizedElement) | |
# Remove spaces in extension | |
extension = re.sub(r"[\s]", "", extension) | |
# Delete ' in file name | |
fileName = re.sub(r"[']", "", fileName) | |
# Replace _ with separator except for the first character | |
fileName = fileName[0] + re.sub(r"[_]", separator, fileName[1:]) | |
# Replace bad characters with separator | |
fileName = re.sub(rf"[\s{BAD_CHARS}]", separator, fileName) | |
# Replace all dots and dashes that are not surrounded by digits with a separator | |
fileName = re.sub(r"(?<!\d)[-\.]|[-\.](?!\d)", separator, fileName) | |
# Replace all substrings of separator with a single separator | |
fileName = re.sub(rf"({separator})\1+", separator, fileName) | |
# Get full name of renamed element | |
renamedElement = fileName + extension | |
# Return elementPath if element is not renamed | |
if element == renamedElement: | |
return elementPath | |
# Rename element | |
renamedElementPath = os.path.join(os.path.dirname(elementPath), renamedElement) | |
if not dryRun: | |
try: | |
os.rename(elementPath, renamedElementPath) | |
except OSError: | |
print(f"| Error renaming {elementPath} --> {renamedElementPath}") | |
return elementPath | |
# Print changes | |
if verbose or dryRun: | |
print(f"| Renamed {elementPath} --> {renamedElement}") | |
return renamedElementPath | |
def containsCharsNotSurroundedByDigits(string: str, chars: str): | |
# Check if string contains characters not surrounded by digits | |
return bool(re.search(rf"(?<!\d)[{chars}]|[{chars}](?!\d)", string)) | |
def substringOfRepeatedCharsExists(string: str, chars: str): | |
# Check if substring of repeated characters exists | |
return bool(re.search(rf"([{chars}])\1+", string)) | |
def formatDirectory(path: str): | |
global dirRenamed, filesRenamed, filesDeleted | |
elements = os.listdir(path) | |
if ".donotformat" in elements: | |
return | |
for element in elements: | |
# Get path of element | |
elementPath = os.path.join(path, element) | |
# Delete files specified in DELETE_FILES | |
if os.path.isfile(elementPath) and element in DELETE_FILES: | |
# Delete file | |
if not dryRun: | |
os.remove(elementPath) | |
# Increment deleted files counter | |
filesDeleted += 1 | |
if verbose or dryRun: | |
print(f"| Deleted {element}") | |
continue | |
# Ignore hidden files and excluded files/extensions | |
if element[0] == "." or element in EXCLUDED_FILES or os.path.splitext(element)[1] in EXCLUDED_EXTENSIONS: | |
continue | |
# Check if element is a directory or a file | |
if os.path.isdir(elementPath): | |
# Rename directory if it is not yet formatted... | |
if not isElementFormatted(element, False): | |
# Rename directory | |
renamedElementPath = renameElement(elementPath, DIR_SEPARATOR) | |
# If directory is renamed, increment renamed directories counter | |
if elementPath != renamedElementPath: | |
dirRenamed += 1 | |
# Recursively rename contents of directory | |
if dryRun: | |
formatDirectory(elementPath) | |
else: | |
formatDirectory(renamedElementPath) | |
else: | |
# Recursively rename contents of directory | |
formatDirectory(elementPath) | |
elif os.path.isfile(elementPath) and not isElementFormatted(element, True): | |
# Rename file | |
renamedElementPath = renameElement( | |
elementPath, | |
FILE_SEPARATOR if os.path.splitext(element)[1] not in TREAT_EXTENSION_AS_DIR else DIR_SEPARATOR, | |
) | |
# If file is renamed, increment renamed files counter | |
if elementPath != renamedElementPath: | |
filesRenamed += 1 | |
def initializeFormatter(path: str): | |
global dirRenamed, filesRenamed | |
# Remove trailing slash | |
if path[-1] == "/": | |
path = path[:-1] | |
element = os.path.basename(path) | |
# Check if element is a directory or a file and if it is formatted | |
if os.path.isdir(path): | |
if not isElementFormatted(element, False): | |
# Rename directory | |
renamedPath = renameElement(path, DIR_SEPARATOR) | |
# If directory is renamed, increment renamed directories counter | |
if path != renamedPath: | |
dirRenamed += 1 | |
# Format contents of directory | |
if dryRun: | |
formatDirectory(path) | |
else: | |
formatDirectory(renamedPath) | |
else: | |
# Recursively rename contents of directory | |
formatDirectory(path) | |
elif os.path.isfile(path) and not isElementFormatted(element, True): | |
# Rename file | |
renamedPath = renameElement( | |
path, | |
FILE_SEPARATOR if os.path.splitext(element)[1] not in TREAT_EXTENSION_AS_DIR else DIR_SEPARATOR, | |
) | |
# If file is renamed, continue | |
if path != renamedPath: | |
# Increment renamed files counter | |
filesRenamed += 1 | |
def launcher(): | |
global verbose, dryRun | |
print("------------") | |
while True: | |
runDry = input("| Do you want to run a dry run? (y/n): ").lower() | |
if runDry == "y": | |
dryRun = True | |
break | |
elif runDry == "n": | |
dryRun = False | |
break | |
else: | |
print("| Invalid input") | |
if not dryRun: | |
while True: | |
runVerbose = input("| Do you want to print the changes? (y/n): ").lower() | |
if runVerbose == "y": | |
verbose = True | |
break | |
elif runVerbose == "n": | |
verbose = False | |
break | |
else: | |
print("| Invalid input") | |
while True: | |
confirmFormat = input("| Are you sure you want to format your file system? (y/n): ").lower() | |
if confirmFormat == "y": | |
break | |
elif confirmFormat == "n": | |
print("| Your file system has not been formatted") | |
return | |
else: | |
print("| Invalid input") | |
print("------------") | |
for arg in sys.argv[1:]: | |
try: | |
initializeFormatter(str(arg)) | |
print(f'| Done formatting "{str(arg)}"') | |
except FileNotFoundError: | |
print("| File not found") | |
print(f'| Error encountered while formatting "{str(arg)}"') | |
except NotADirectoryError: | |
print("| Directory not found") | |
print(f'| Error encountered while formatting "{str(arg)}"') | |
print("------------") | |
print("| Your file system has properly been formatted") | |
print(f"| Files renamed: {filesRenamed}") | |
print(f"| Directories renamed: {dirRenamed}") | |
print(f"| Files deleted: {filesDeleted}") | |
print("------------") | |
if dryRun: | |
print("| Script ran in dry run mode") | |
print("------------") | |
launcher() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment