Created
March 5, 2025 08:00
-
-
Save harshadsatra/e5a717654fe1f3e19dfa5b5bf6c56b44 to your computer and use it in GitHub Desktop.
Git History to CSV - Enhanced Branch Detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
##################################################################### | |
# Git History to CSV - Enhanced Branch Detection | |
##################################################################### | |
# | |
# DESCRIPTION: | |
# This script generates a CSV file containing the history of git commits | |
# with comprehensive branch information. It tracks which commits belong to | |
# which branches by using multiple detection methods, ensuring accurate | |
# branch attribution even for commits without direct references. | |
# | |
# FEATURES: | |
# - Enhanced branch detection (uses both direct refs and branch ancestry) | |
# - Supports all branches or specific branch filtering | |
# - Date range filtering capability | |
# - Multiple sorting options | |
# - Detailed branch statistics | |
# - Debug mode for troubleshooting | |
# - Handles complex commit messages with commas and quotes | |
# | |
# USAGE: | |
# ./git_history_to_csv.sh [options] | |
# | |
# OPTIONS: | |
# -o FILE Specify output file (default: git_history.csv) | |
# -a Include all branches (default) | |
# -c Current branch only | |
# -b BRANCHES Specific branches only (comma-separated list) | |
# -s DATE Include commits since DATE (format: YYYY-MM-DD) | |
# -u DATE Include commits until DATE (format: YYYY-MM-DD) | |
# -d ORDER Sort order: 'default', 'author-date', or 'committer-date' | |
# -v Enable debug output | |
# -h Display help message | |
# | |
# EXAMPLES: | |
# # Put thiss script in your root project directory | |
# # Generate CSV with all branches (default) | |
# ./git_history_to_csv.sh | |
# | |
# # Specify output file | |
# ./git_history_to_csv.sh -o project_history.csv | |
# | |
# # Only include commits from specific branches | |
# ./git_history_to_csv.sh -b main,develop | |
# | |
# # Include commits from the last 30 days | |
# ./git_history_to_csv.sh -s "$(date -d '30 days ago' +%Y-%m-%d)" | |
# | |
# # Debug mode with date range and specific output file | |
# ./git_history_to_csv.sh -v -s 2023-01-01 -u 2023-12-31 -o year_2023.csv | |
# | |
##################################################################### | |
# Check if git is installed | |
if ! command -v git &> /dev/null; then | |
echo "Error: git is not installed or not in the PATH" >&2 | |
exit 1 | |
fi | |
# Check if we're in a git repository | |
if ! git rev-parse --is-inside-work-tree &> /dev/null; then | |
echo "Error: Not a git repository" >&2 | |
exit 1 | |
fi | |
# Default output file name | |
OUTPUT_FILE="git_history.csv" | |
# Default values for options | |
ALL_BRANCHES=true | |
SPECIFIC_BRANCHES="" | |
SINCE_DATE="" | |
UNTIL_DATE="" | |
SORT_ORDER="default" # Can be 'default', 'author-date', 'committer-date' | |
DEBUG=false | |
# Function to display usage information | |
show_usage() { | |
echo "Usage: $0 [options]" | |
echo "Options:" | |
echo " -o FILE Specify output file (default: git_history.csv)" | |
echo " -a Include all branches (default)" | |
echo " -c Current branch only" | |
echo " -b BRANCHES Specific branches only (comma-separated list)" | |
echo " -s DATE Include commits since DATE (format: YYYY-MM-DD)" | |
echo " -u DATE Include commits until DATE (format: YYYY-MM-DD)" | |
echo " -d ORDER Sort order: 'default', 'author-date', or 'committer-date'" | |
echo " -v Enable debug output" | |
echo " -h Display this help message" | |
exit 1 | |
} | |
# Parse command line arguments | |
while getopts ":o:acb:s:u:d:vh" opt; do | |
case $opt in | |
o) | |
OUTPUT_FILE="$OPTARG" | |
;; | |
a) | |
ALL_BRANCHES=true | |
SPECIFIC_BRANCHES="" | |
;; | |
c) | |
ALL_BRANCHES=false | |
SPECIFIC_BRANCHES="" | |
;; | |
b) | |
ALL_BRANCHES=false | |
SPECIFIC_BRANCHES="$OPTARG" | |
;; | |
s) | |
SINCE_DATE="$OPTARG" | |
;; | |
u) | |
UNTIL_DATE="$OPTARG" | |
;; | |
d) | |
SORT_ORDER="$OPTARG" | |
;; | |
v) | |
DEBUG=true | |
;; | |
h) | |
show_usage | |
;; | |
\?) | |
echo "Invalid option: -$OPTARG" >&2 | |
show_usage | |
;; | |
:) | |
echo "Option -$OPTARG requires an argument." >&2 | |
show_usage | |
;; | |
esac | |
done | |
# Debug function | |
debug() { | |
if [ "$DEBUG" = true ]; then | |
echo "[DEBUG] $*" >&2 | |
fi | |
} | |
# Function to get branch information from a commit hash | |
get_branches_for_commit() { | |
local commit_hash="$1" | |
local branches="" | |
local local_branches="" | |
local remote_branches="" | |
debug "Getting branches for commit $commit_hash" | |
# Get all branches containing this commit | |
while IFS= read -r branch; do | |
# Skip empty lines | |
if [ -z "$branch" ]; then | |
continue | |
fi | |
# Trim leading whitespace | |
branch="${branch## }" | |
# Skip the current/HEAD marker (* prefix) | |
branch="${branch#* }" | |
debug " Found branch: $branch" | |
# Separate local and remote branches | |
if [[ "$branch" == *"/"* ]] && [[ "$branch" != "HEAD" ]]; then | |
remote_branches="$remote_branches,$branch" | |
else | |
local_branches="$local_branches,$branch" | |
fi | |
done < <(git branch --all --contains "$commit_hash" 2>/dev/null) | |
# Remove leading comma | |
local_branches="${local_branches#,}" | |
remote_branches="${remote_branches#,}" | |
debug " Local branches: $local_branches" | |
debug " Remote branches: $remote_branches" | |
# Prefer local branches over remote branches | |
if [ -n "$local_branches" ]; then | |
branches="$local_branches" | |
elif [ -n "$remote_branches" ]; then | |
branches="$remote_branches" | |
fi | |
echo "$branches" | |
} | |
# Function to extract branch from reference | |
extract_branch_from_refs() { | |
local refs="$1" | |
local branch="(none)" | |
debug "Extracting branch from refs: $refs" | |
# Extract branch name from refs using string operations instead of regex | |
if [ -n "$refs" ]; then | |
# Try to find "HEAD -> branch" pattern | |
if [[ "$refs" == *"HEAD -> "* ]]; then | |
# Extract the branch name after "HEAD -> " | |
head_branch="${refs#*HEAD -> }" | |
# Take everything up to the next comma or end of string | |
branch="${head_branch%%,*}" | |
branch="${branch%% *}" # Remove any trailing space | |
debug " Found HEAD -> branch: $branch" | |
# Try to find "tag: tagname" pattern | |
elif [[ "$refs" == *"tag: "* ]]; then | |
# Extract the tag name after "tag: " | |
tag_name="${refs#*tag: }" | |
# Take everything up to the next comma or end of string | |
tag_name="${tag_name%%,*}" | |
tag_name="${tag_name%% *}" # Remove any trailing space | |
branch="tag: $tag_name" | |
debug " Found tag: $branch" | |
# Check for individual branch names | |
elif [[ "$refs" == *","* ]]; then | |
# Split by comma and look for local branches first | |
local found_local=false | |
IFS=',' read -ra REFS <<< "$refs" | |
for ref in "${REFS[@]}"; do | |
ref="${ref## }" # Remove leading spaces | |
# Skip remotes if we find a local branch | |
if [[ "$ref" != *"/"* ]] && [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then | |
branch="$ref" | |
found_local=true | |
debug " Found local branch in list: $branch" | |
break | |
fi | |
done | |
# If no local branch found, use the first remote branch | |
if [ "$found_local" = false ]; then | |
for ref in "${REFS[@]}"; do | |
ref="${ref## }" # Remove leading spaces | |
if [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then | |
branch="$ref" | |
debug " Using remote branch: $branch" | |
break | |
fi | |
done | |
fi | |
# Take the single ref as fallback | |
else | |
branch="${refs## }" # Remove leading space | |
branch="${branch%% *}" # Remove anything after a space | |
debug " Using single ref: $branch" | |
fi | |
else | |
debug " No refs found in git log output" | |
fi | |
echo "$branch" | |
} | |
# Construct the git command with the specified options | |
GIT_CMD="git log" | |
# Handle branch selection | |
if [ "$ALL_BRANCHES" = true ]; then | |
GIT_CMD="$GIT_CMD --all" | |
elif [ -n "$SPECIFIC_BRANCHES" ]; then | |
# Convert comma-separated list to space-separated for git | |
BRANCH_LIST=$(echo "$SPECIFIC_BRANCHES" | sed 's/,/ /g') | |
GIT_CMD="$GIT_CMD $BRANCH_LIST" | |
fi | |
# Add date range filters if specified | |
if [ -n "$SINCE_DATE" ]; then | |
GIT_CMD="$GIT_CMD --since=$SINCE_DATE" | |
fi | |
if [ -n "$UNTIL_DATE" ]; then | |
GIT_CMD="$GIT_CMD --until=$UNTIL_DATE" | |
fi | |
# Add sorting options | |
case "$SORT_ORDER" in | |
"author-date") | |
GIT_CMD="$GIT_CMD --author-date-order" | |
;; | |
"committer-date") | |
GIT_CMD="$GIT_CMD --date-order" | |
;; | |
*) | |
# Default sort order, no additional flag needed | |
;; | |
esac | |
# Add headers to the CSV file | |
echo "Date,Author,Branch,Message,CommitHash" > "$OUTPUT_FILE" | |
# Display the options being used | |
echo "Generating CSV with the following options:" | |
echo "Output file: $OUTPUT_FILE" | |
if [ "$ALL_BRANCHES" = true ]; then | |
echo "Branches: All branches" | |
elif [ -n "$SPECIFIC_BRANCHES" ]; then | |
echo "Branches: $SPECIFIC_BRANCHES" | |
else | |
echo "Branches: Current branch only" | |
fi | |
[ -n "$SINCE_DATE" ] && echo "Since date: $SINCE_DATE" | |
[ -n "$UNTIL_DATE" ] && echo "Until date: $UNTIL_DATE" | |
echo "Sort order: $SORT_ORDER" | |
if [ "$DEBUG" = true ]; then | |
echo "Debug mode: Enabled" | |
fi | |
echo "Processing..." | |
# Get git log and format it as CSV, now including the commit hash (%H) | |
$GIT_CMD --pretty=format:'%ad,%an,%D,\"%s\",%H' --date=short | | |
while IFS= read -r line; do | |
# Parse the line properly, handling quoted message field | |
date=$(echo "$line" | cut -d, -f1) | |
author=$(echo "$line" | cut -d, -f2) | |
refs=$(echo "$line" | cut -d, -f3) | |
# Extract the commit hash (last field) | |
commit_hash=$(echo "$line" | awk -F, '{print $NF}') | |
# Get the message (everything between the 4th field and the last field) | |
message_with_commas=$(echo "$line" | awk -F, '{ | |
# Remove the first three fields and the last field | |
for(i=4; i<NF; i++) { | |
printf "%s", $i | |
if(i<NF-1) printf "," | |
} | |
print "" | |
}') | |
debug "Processing commit $commit_hash from $date by $author" | |
# Extract branch from refs first | |
branch=$(extract_branch_from_refs "$refs") | |
# If no branch found from refs (%D) or branch is "(none)", try fallback method | |
if [ "$branch" = "(none)" ] || [ -z "$branch" ]; then | |
debug "No branch info from refs, trying git branch --contains fallback" | |
branches_from_git=$(get_branches_for_commit "$commit_hash") | |
if [ -n "$branches_from_git" ]; then | |
branch="$branches_from_git" | |
debug "Found branches using fallback: $branch" | |
else | |
debug "Fallback also returned no branches" | |
fi | |
fi | |
# Print the processed line, escaping any commas in the message field | |
# Replace commas inside message with escaped commas | |
message_escaped="${message_with_commas//,/\\,}" | |
debug "Final branch assignment: $branch" | |
echo "$date,$author,$branch,$message_escaped,$commit_hash" | |
done >> "$OUTPUT_FILE" | |
# Check if the command was successful | |
if [ $? -eq 0 ]; then | |
echo "Git history successfully exported to $OUTPUT_FILE" | |
else | |
echo "Error: Failed to export git history" >&2 | |
exit 1 | |
fi | |
# Count the number of commits exported | |
COMMIT_COUNT=$(wc -l < "$OUTPUT_FILE") | |
COMMIT_COUNT=$((COMMIT_COUNT - 1)) # Subtract 1 for the header line | |
echo "Total commits exported: $COMMIT_COUNT" | |
# List branch statistics | |
if [ "$ALL_BRANCHES" = true ] || [ -n "$SPECIFIC_BRANCHES" ]; then | |
echo "" | |
echo "Commit count by branch:" | |
tail -n +2 "$OUTPUT_FILE" | cut -d, -f3 | sort | uniq -c | sort -nr | | |
while read -r count branch; do | |
branch="${branch#\"}" # Remove leading quote if present | |
branch="${branch%\"}" # Remove trailing quote if present | |
printf " %-20s %s\n" "$branch:" "$count commits" | |
done | |
fi | |
exit 0 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment