Last active
June 11, 2025 16:17
-
-
Save dimitrieh/6e269b7feede572497e51e9106b5fee3 to your computer and use it in GitHub Desktop.
Bash script to check on keywords in codebase per month across time range and exports to csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Make sure its executable with chmod +x script.sh | |
# Ideally run in an isolated env like a codespace | |
# Make sure no files already known to git are modified | |
# --- Configuration --- | |
REPO_PATH="./" # Path to your Git repository. ./ is current directory. | |
KEYWORDS=("Checkbox" "AvatarDropdown" "Button") # Array of keywords | |
PACKAGE_NAME="antd" # Package name to filter imports | |
START_DATE="2021-07-01" # YYYY-MM-DD | |
END_DATE="2023-07-31" # YYYY-MM-DD | |
OUTPUT_CSV_FILE="keyword_analysis.csv" #optionally add path to save the output CSV file before the filename | |
# --------------------- | |
echo "Starting Git keyword analysis for repository: $REPO_PATH" | |
echo "Keywords: ${KEYWORDS[*]}" | |
echo "Timespan: $START_DATE to $END_DATE" | |
# --- Validate repository path --- | |
if [ ! -d "$REPO_PATH" ]; then | |
echo "Error: Repository path '$REPO_PATH' does not exist." >&2 | |
exit 1 | |
fi | |
if [ ! -d "$REPO_PATH/.git" ]; then | |
echo "Error: '$REPO_PATH' is not a Git repository." >&2 | |
exit 1 | |
fi | |
# Store the current working directory to return to later | |
ORIGINAL_CWD=$(pwd) | |
# Change to the repository directory | |
cd "$REPO_PATH" || { echo "Error: Could not change directory to $REPO_PATH. Exiting." >&2; exit 1; } | |
# --- Get current git state before modification --- | |
ORIGINAL_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) | |
ORIGINAL_HEAD=$(git rev-parse HEAD 2>/dev/null) | |
if [ $? -ne 0 ]; then | |
echo "Could not get original Git state. Exiting." >&2 | |
exit 1 | |
fi | |
# --- Prepare CSV header --- | |
MONTHS=() | |
CURRENT_DATE="$START_DATE" | |
END_MONTH_DATE=$(date -d "$END_DATE" +%Y-%m) | |
while true; do | |
MONTH_YEAR_STR=$(date -d "$CURRENT_DATE" +%Y-%m) | |
MONTHS+=("$MONTH_YEAR_STR") | |
if [[ "$MONTH_YEAR_STR" == "$END_MONTH_DATE" ]]; then | |
break | |
fi | |
# Move to the next month | |
CURRENT_DATE=$(date -d "$CURRENT_DATE +1 month" +%Y-%m-01) | |
done | |
# Write CSV header | |
echo -n "Keyword" > "$OUTPUT_CSV_FILE" | |
for month in "${MONTHS[@]}"; do | |
echo -n ",$month" >> "$OUTPUT_CSV_FILE" | |
done | |
echo "" >> "$OUTPUT_CSV_FILE" | |
# --- Prepare array of monthly commits --- | |
MONTHLY_COMMITS=() | |
for month_str in "${MONTHS[@]}"; do | |
END_OF_MONTH_DATETIME=$(date -d "$month_str-01 +1 month -1 second" -Iseconds) | |
# Find the last commit in or before this month | |
LAST_COMMIT_HASH=$(git log --pretty=format:%H --before="$END_OF_MONTH_DATETIME" --max-count=1 2>/dev/null) | |
MONTHLY_COMMITS+=("$LAST_COMMIT_HASH") | |
if [ -z "$LAST_COMMIT_HASH" ]; then | |
echo "No commit found for $month_str (up to $END_OF_MONTH_DATETIME)." | |
else | |
echo "Commit for $month_str: $LAST_COMMIT_HASH" | |
fi | |
# This array will have empty string for months with no commit | |
# The index matches the MONTHS array | |
# This allows us to later iterate and know which commit to use for each month | |
# and which months to skip | |
done | |
# done # End of keyword loop | |
# --- Main analysis loop --- | |
for keyword in "${KEYWORDS[@]}"; do | |
echo "Processing React component import: '$keyword'" | |
ROW_DATA=("$keyword") # Start with the keyword for this row | |
for i in "${!MONTHS[@]}"; do | |
month_str="${MONTHS[$i]}" | |
commit_hash="${MONTHLY_COMMITS[$i]}" | |
echo " Analyzing month: $month_str" | |
if [ -z "$commit_hash" ]; then | |
echo " No commit found for $month_str. Count for '$keyword' will be 0." | |
ROW_DATA+=(0) | |
continue | |
fi | |
echo " Checking out commit: $commit_hash..." | |
if ! git checkout -q "$commit_hash" &>/dev/null; then | |
echo " Error checking out commit $commit_hash. Skipping." >&2 | |
ROW_DATA+=(0) | |
continue | |
fi | |
echo " Counting import of '$keyword' from package '$PACKAGE_NAME'..." | |
# Only count if the keyword is imported from the specific PACKAGE_NAME | |
IMPORT_COUNT=0 | |
# Named import: import { ...keyword... } from 'PACKAGE_NAME'; | |
NAMED_IMPORTS=$(git grep -h -I -E "^import[[:space:]]+\{[^}]*\b$keyword\b[^}]*\}[[:space:]]+from[[:space:]]+[\'\"]$PACKAGE_NAME[\'\"]" -- '*.js' '*.jsx' '*.ts' '*.tsx' 2>/dev/null | wc -l) | |
# Default import: import Keyword from 'PACKAGE_NAME'; | |
DEFAULT_IMPORTS=$(git grep -h -I -E "^import[[:space:]]+$keyword([[:space:]]+|,|$)from[[:space:]]+[\'\"]$PACKAGE_NAME[\'\"]" -- '*.js' '*.jsx' '*.ts' '*.tsx' 2>/dev/null | wc -l) | |
# Aliased named import: import { ... as ... } from 'PACKAGE_NAME'; | |
ALIAS_IMPORTS=$(git grep -h -I -E "^import[[:space:]]+\{[^}]*\b$keyword[[:space:]]+as[[:space:]]+[^}]+\}[[:space:]]+from[[:space:]]+[\'\"]$PACKAGE_NAME[\'\"]" -- '*.js' '*.jsx' '*.ts' '*.tsx' 2>/dev/null | wc -l) | |
IMPORT_COUNT=$((NAMED_IMPORTS + DEFAULT_IMPORTS + ALIAS_IMPORTS)) | |
echo " Import count for '$keyword' from '$PACKAGE_NAME': $IMPORT_COUNT" | |
ROW_DATA+=("$IMPORT_COUNT") | |
done # End of month loop | |
# Append keyword's row data to CSV | |
IFS=',' # Set Internal Field Separator for echo | |
echo "${ROW_DATA[*]}" >> "$OUTPUT_CSV_FILE" | |
unset IFS # Unset it to avoid unexpected behavior later | |
done # End of keyword loop | |
# --- Revert to original git state --- | |
echo "Reverting to original Git state..." | |
# First, try to go back to the original branch | |
if ! git checkout -q "$ORIGINAL_BRANCH" &>/dev/null; then | |
echo "Warning: Could not checkout original branch '$ORIGINAL_BRANCH'. Attempting to checkout original HEAD." >&2 | |
# If branch checkout fails, go back to the exact commit hash | |
if ! git checkout -q "$ORIGINAL_HEAD" &>/dev/null; then | |
echo "Error: Could not revert to original Git state. Please manually checkout '$ORIGINAL_HEAD' or your desired branch." >&2 | |
else | |
echo "Successfully reverted to original HEAD: $ORIGINAL_HEAD" | |
fi | |
else | |
echo "Successfully reverted to original branch: $ORIGINAL_BRANCH" | |
fi | |
# Change back to the original working directory | |
cd "$ORIGINAL_CWD" || { echo "Error: Could not change back to original directory $ORIGINAL_CWD." >&2; exit 1; } | |
echo -e "\nAnalysis complete. Results saved to '$OUTPUT_CSV_FILE'" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment