Skip to content

Instantly share code, notes, and snippets.

@dimitrieh
Last active June 11, 2025 16:17
Show Gist options
  • Save dimitrieh/6e269b7feede572497e51e9106b5fee3 to your computer and use it in GitHub Desktop.
Save dimitrieh/6e269b7feede572497e51e9106b5fee3 to your computer and use it in GitHub Desktop.
Bash script to check on keywords in codebase per month across time range and exports to csv
#!/bin/bash
# Make sure its executable with chmod +x script.sh
# Ideally run in an isolated env like a codespace
# Make sure no files already known to git are modified
# --- Configuration ---
REPO_PATH="./" # Path to your Git repository. ./ is current directory.
KEYWORDS=("Checkbox" "AvatarDropdown" "Button") # Array of keywords
PACKAGE_NAME="antd" # Package name to filter imports
START_DATE="2021-07-01" # YYYY-MM-DD
END_DATE="2023-07-31" # YYYY-MM-DD
OUTPUT_CSV_FILE="keyword_analysis.csv" #optionally add path to save the output CSV file before the filename
# ---------------------
echo "Starting Git keyword analysis for repository: $REPO_PATH"
echo "Keywords: ${KEYWORDS[*]}"
echo "Timespan: $START_DATE to $END_DATE"
# --- Validate repository path ---
if [ ! -d "$REPO_PATH" ]; then
echo "Error: Repository path '$REPO_PATH' does not exist." >&2
exit 1
fi
if [ ! -d "$REPO_PATH/.git" ]; then
echo "Error: '$REPO_PATH' is not a Git repository." >&2
exit 1
fi
# Store the current working directory to return to later
ORIGINAL_CWD=$(pwd)
# Change to the repository directory
cd "$REPO_PATH" || { echo "Error: Could not change directory to $REPO_PATH. Exiting." >&2; exit 1; }
# --- Get current git state before modification ---
ORIGINAL_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null)
ORIGINAL_HEAD=$(git rev-parse HEAD 2>/dev/null)
if [ $? -ne 0 ]; then
echo "Could not get original Git state. Exiting." >&2
exit 1
fi
# --- Prepare CSV header ---
MONTHS=()
CURRENT_DATE="$START_DATE"
END_MONTH_DATE=$(date -d "$END_DATE" +%Y-%m)
while true; do
MONTH_YEAR_STR=$(date -d "$CURRENT_DATE" +%Y-%m)
MONTHS+=("$MONTH_YEAR_STR")
if [[ "$MONTH_YEAR_STR" == "$END_MONTH_DATE" ]]; then
break
fi
# Move to the next month
CURRENT_DATE=$(date -d "$CURRENT_DATE +1 month" +%Y-%m-01)
done
# Write CSV header
echo -n "Keyword" > "$OUTPUT_CSV_FILE"
for month in "${MONTHS[@]}"; do
echo -n ",$month" >> "$OUTPUT_CSV_FILE"
done
echo "" >> "$OUTPUT_CSV_FILE"
# --- Prepare array of monthly commits ---
MONTHLY_COMMITS=()
for month_str in "${MONTHS[@]}"; do
END_OF_MONTH_DATETIME=$(date -d "$month_str-01 +1 month -1 second" -Iseconds)
# Find the last commit in or before this month
LAST_COMMIT_HASH=$(git log --pretty=format:%H --before="$END_OF_MONTH_DATETIME" --max-count=1 2>/dev/null)
MONTHLY_COMMITS+=("$LAST_COMMIT_HASH")
if [ -z "$LAST_COMMIT_HASH" ]; then
echo "No commit found for $month_str (up to $END_OF_MONTH_DATETIME)."
else
echo "Commit for $month_str: $LAST_COMMIT_HASH"
fi
# This array will have empty string for months with no commit
# The index matches the MONTHS array
# This allows us to later iterate and know which commit to use for each month
# and which months to skip
done
# done # End of keyword loop
# --- Main analysis loop ---
for keyword in "${KEYWORDS[@]}"; do
echo "Processing React component import: '$keyword'"
ROW_DATA=("$keyword") # Start with the keyword for this row
for i in "${!MONTHS[@]}"; do
month_str="${MONTHS[$i]}"
commit_hash="${MONTHLY_COMMITS[$i]}"
echo " Analyzing month: $month_str"
if [ -z "$commit_hash" ]; then
echo " No commit found for $month_str. Count for '$keyword' will be 0."
ROW_DATA+=(0)
continue
fi
echo " Checking out commit: $commit_hash..."
if ! git checkout -q "$commit_hash" &>/dev/null; then
echo " Error checking out commit $commit_hash. Skipping." >&2
ROW_DATA+=(0)
continue
fi
echo " Counting import of '$keyword' from package '$PACKAGE_NAME'..."
# Only count if the keyword is imported from the specific PACKAGE_NAME
IMPORT_COUNT=0
# Named import: import { ...keyword... } from 'PACKAGE_NAME';
NAMED_IMPORTS=$(git grep -h -I -E "^import[[:space:]]+\{[^}]*\b$keyword\b[^}]*\}[[:space:]]+from[[:space:]]+[\'\"]$PACKAGE_NAME[\'\"]" -- '*.js' '*.jsx' '*.ts' '*.tsx' 2>/dev/null | wc -l)
# Default import: import Keyword from 'PACKAGE_NAME';
DEFAULT_IMPORTS=$(git grep -h -I -E "^import[[:space:]]+$keyword([[:space:]]+|,|$)from[[:space:]]+[\'\"]$PACKAGE_NAME[\'\"]" -- '*.js' '*.jsx' '*.ts' '*.tsx' 2>/dev/null | wc -l)
# Aliased named import: import { ... as ... } from 'PACKAGE_NAME';
ALIAS_IMPORTS=$(git grep -h -I -E "^import[[:space:]]+\{[^}]*\b$keyword[[:space:]]+as[[:space:]]+[^}]+\}[[:space:]]+from[[:space:]]+[\'\"]$PACKAGE_NAME[\'\"]" -- '*.js' '*.jsx' '*.ts' '*.tsx' 2>/dev/null | wc -l)
IMPORT_COUNT=$((NAMED_IMPORTS + DEFAULT_IMPORTS + ALIAS_IMPORTS))
echo " Import count for '$keyword' from '$PACKAGE_NAME': $IMPORT_COUNT"
ROW_DATA+=("$IMPORT_COUNT")
done # End of month loop
# Append keyword's row data to CSV
IFS=',' # Set Internal Field Separator for echo
echo "${ROW_DATA[*]}" >> "$OUTPUT_CSV_FILE"
unset IFS # Unset it to avoid unexpected behavior later
done # End of keyword loop
# --- Revert to original git state ---
echo "Reverting to original Git state..."
# First, try to go back to the original branch
if ! git checkout -q "$ORIGINAL_BRANCH" &>/dev/null; then
echo "Warning: Could not checkout original branch '$ORIGINAL_BRANCH'. Attempting to checkout original HEAD." >&2
# If branch checkout fails, go back to the exact commit hash
if ! git checkout -q "$ORIGINAL_HEAD" &>/dev/null; then
echo "Error: Could not revert to original Git state. Please manually checkout '$ORIGINAL_HEAD' or your desired branch." >&2
else
echo "Successfully reverted to original HEAD: $ORIGINAL_HEAD"
fi
else
echo "Successfully reverted to original branch: $ORIGINAL_BRANCH"
fi
# Change back to the original working directory
cd "$ORIGINAL_CWD" || { echo "Error: Could not change back to original directory $ORIGINAL_CWD." >&2; exit 1; }
echo -e "\nAnalysis complete. Results saved to '$OUTPUT_CSV_FILE'"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment