Skip to content

Instantly share code, notes, and snippets.

@harshadsatra
Created March 5, 2025 08:00
Show Gist options
  • Save harshadsatra/e5a717654fe1f3e19dfa5b5bf6c56b44 to your computer and use it in GitHub Desktop.
Save harshadsatra/e5a717654fe1f3e19dfa5b5bf6c56b44 to your computer and use it in GitHub Desktop.
Git History to CSV - Enhanced Branch Detection
#!/bin/bash
#####################################################################
# Git History to CSV - Enhanced Branch Detection
#####################################################################
#
# DESCRIPTION:
# This script generates a CSV file containing the history of git commits
# with comprehensive branch information. It tracks which commits belong to
# which branches by using multiple detection methods, ensuring accurate
# branch attribution even for commits without direct references.
#
# FEATURES:
# - Enhanced branch detection (uses both direct refs and branch ancestry)
# - Supports all branches or specific branch filtering
# - Date range filtering capability
# - Multiple sorting options
# - Detailed branch statistics
# - Debug mode for troubleshooting
# - Handles complex commit messages with commas and quotes
#
# USAGE:
# ./git_history_to_csv.sh [options]
#
# OPTIONS:
# -o FILE Specify output file (default: git_history.csv)
# -a Include all branches (default)
# -c Current branch only
# -b BRANCHES Specific branches only (comma-separated list)
# -s DATE Include commits since DATE (format: YYYY-MM-DD)
# -u DATE Include commits until DATE (format: YYYY-MM-DD)
# -d ORDER Sort order: 'default', 'author-date', or 'committer-date'
# -v Enable debug output
# -h Display help message
#
# EXAMPLES:
# # Put thiss script in your root project directory
# # Generate CSV with all branches (default)
# ./git_history_to_csv.sh
#
# # Specify output file
# ./git_history_to_csv.sh -o project_history.csv
#
# # Only include commits from specific branches
# ./git_history_to_csv.sh -b main,develop
#
# # Include commits from the last 30 days
# ./git_history_to_csv.sh -s "$(date -d '30 days ago' +%Y-%m-%d)"
#
# # Debug mode with date range and specific output file
# ./git_history_to_csv.sh -v -s 2023-01-01 -u 2023-12-31 -o year_2023.csv
#
#####################################################################
# Check if git is installed
if ! command -v git &> /dev/null; then
echo "Error: git is not installed or not in the PATH" >&2
exit 1
fi
# Check if we're in a git repository
if ! git rev-parse --is-inside-work-tree &> /dev/null; then
echo "Error: Not a git repository" >&2
exit 1
fi
# Default output file name
OUTPUT_FILE="git_history.csv"
# Default values for options
ALL_BRANCHES=true
SPECIFIC_BRANCHES=""
SINCE_DATE=""
UNTIL_DATE=""
SORT_ORDER="default" # Can be 'default', 'author-date', 'committer-date'
DEBUG=false
# Function to display usage information
show_usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo " -o FILE Specify output file (default: git_history.csv)"
echo " -a Include all branches (default)"
echo " -c Current branch only"
echo " -b BRANCHES Specific branches only (comma-separated list)"
echo " -s DATE Include commits since DATE (format: YYYY-MM-DD)"
echo " -u DATE Include commits until DATE (format: YYYY-MM-DD)"
echo " -d ORDER Sort order: 'default', 'author-date', or 'committer-date'"
echo " -v Enable debug output"
echo " -h Display this help message"
exit 1
}
# Parse command line arguments
while getopts ":o:acb:s:u:d:vh" opt; do
case $opt in
o)
OUTPUT_FILE="$OPTARG"
;;
a)
ALL_BRANCHES=true
SPECIFIC_BRANCHES=""
;;
c)
ALL_BRANCHES=false
SPECIFIC_BRANCHES=""
;;
b)
ALL_BRANCHES=false
SPECIFIC_BRANCHES="$OPTARG"
;;
s)
SINCE_DATE="$OPTARG"
;;
u)
UNTIL_DATE="$OPTARG"
;;
d)
SORT_ORDER="$OPTARG"
;;
v)
DEBUG=true
;;
h)
show_usage
;;
\?)
echo "Invalid option: -$OPTARG" >&2
show_usage
;;
:)
echo "Option -$OPTARG requires an argument." >&2
show_usage
;;
esac
done
# Debug function
debug() {
if [ "$DEBUG" = true ]; then
echo "[DEBUG] $*" >&2
fi
}
# Function to get branch information from a commit hash
get_branches_for_commit() {
local commit_hash="$1"
local branches=""
local local_branches=""
local remote_branches=""
debug "Getting branches for commit $commit_hash"
# Get all branches containing this commit
while IFS= read -r branch; do
# Skip empty lines
if [ -z "$branch" ]; then
continue
fi
# Trim leading whitespace
branch="${branch## }"
# Skip the current/HEAD marker (* prefix)
branch="${branch#* }"
debug " Found branch: $branch"
# Separate local and remote branches
if [[ "$branch" == *"/"* ]] && [[ "$branch" != "HEAD" ]]; then
remote_branches="$remote_branches,$branch"
else
local_branches="$local_branches,$branch"
fi
done < <(git branch --all --contains "$commit_hash" 2>/dev/null)
# Remove leading comma
local_branches="${local_branches#,}"
remote_branches="${remote_branches#,}"
debug " Local branches: $local_branches"
debug " Remote branches: $remote_branches"
# Prefer local branches over remote branches
if [ -n "$local_branches" ]; then
branches="$local_branches"
elif [ -n "$remote_branches" ]; then
branches="$remote_branches"
fi
echo "$branches"
}
# Function to extract branch from reference
extract_branch_from_refs() {
local refs="$1"
local branch="(none)"
debug "Extracting branch from refs: $refs"
# Extract branch name from refs using string operations instead of regex
if [ -n "$refs" ]; then
# Try to find "HEAD -> branch" pattern
if [[ "$refs" == *"HEAD -> "* ]]; then
# Extract the branch name after "HEAD -> "
head_branch="${refs#*HEAD -> }"
# Take everything up to the next comma or end of string
branch="${head_branch%%,*}"
branch="${branch%% *}" # Remove any trailing space
debug " Found HEAD -> branch: $branch"
# Try to find "tag: tagname" pattern
elif [[ "$refs" == *"tag: "* ]]; then
# Extract the tag name after "tag: "
tag_name="${refs#*tag: }"
# Take everything up to the next comma or end of string
tag_name="${tag_name%%,*}"
tag_name="${tag_name%% *}" # Remove any trailing space
branch="tag: $tag_name"
debug " Found tag: $branch"
# Check for individual branch names
elif [[ "$refs" == *","* ]]; then
# Split by comma and look for local branches first
local found_local=false
IFS=',' read -ra REFS <<< "$refs"
for ref in "${REFS[@]}"; do
ref="${ref## }" # Remove leading spaces
# Skip remotes if we find a local branch
if [[ "$ref" != *"/"* ]] && [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then
branch="$ref"
found_local=true
debug " Found local branch in list: $branch"
break
fi
done
# If no local branch found, use the first remote branch
if [ "$found_local" = false ]; then
for ref in "${REFS[@]}"; do
ref="${ref## }" # Remove leading spaces
if [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then
branch="$ref"
debug " Using remote branch: $branch"
break
fi
done
fi
# Take the single ref as fallback
else
branch="${refs## }" # Remove leading space
branch="${branch%% *}" # Remove anything after a space
debug " Using single ref: $branch"
fi
else
debug " No refs found in git log output"
fi
echo "$branch"
}
# Construct the git command with the specified options
GIT_CMD="git log"
# Handle branch selection
if [ "$ALL_BRANCHES" = true ]; then
GIT_CMD="$GIT_CMD --all"
elif [ -n "$SPECIFIC_BRANCHES" ]; then
# Convert comma-separated list to space-separated for git
BRANCH_LIST=$(echo "$SPECIFIC_BRANCHES" | sed 's/,/ /g')
GIT_CMD="$GIT_CMD $BRANCH_LIST"
fi
# Add date range filters if specified
if [ -n "$SINCE_DATE" ]; then
GIT_CMD="$GIT_CMD --since=$SINCE_DATE"
fi
if [ -n "$UNTIL_DATE" ]; then
GIT_CMD="$GIT_CMD --until=$UNTIL_DATE"
fi
# Add sorting options
case "$SORT_ORDER" in
"author-date")
GIT_CMD="$GIT_CMD --author-date-order"
;;
"committer-date")
GIT_CMD="$GIT_CMD --date-order"
;;
*)
# Default sort order, no additional flag needed
;;
esac
# Add headers to the CSV file
echo "Date,Author,Branch,Message,CommitHash" > "$OUTPUT_FILE"
# Display the options being used
echo "Generating CSV with the following options:"
echo "Output file: $OUTPUT_FILE"
if [ "$ALL_BRANCHES" = true ]; then
echo "Branches: All branches"
elif [ -n "$SPECIFIC_BRANCHES" ]; then
echo "Branches: $SPECIFIC_BRANCHES"
else
echo "Branches: Current branch only"
fi
[ -n "$SINCE_DATE" ] && echo "Since date: $SINCE_DATE"
[ -n "$UNTIL_DATE" ] && echo "Until date: $UNTIL_DATE"
echo "Sort order: $SORT_ORDER"
if [ "$DEBUG" = true ]; then
echo "Debug mode: Enabled"
fi
echo "Processing..."
# Get git log and format it as CSV, now including the commit hash (%H)
$GIT_CMD --pretty=format:'%ad,%an,%D,\"%s\",%H' --date=short |
while IFS= read -r line; do
# Parse the line properly, handling quoted message field
date=$(echo "$line" | cut -d, -f1)
author=$(echo "$line" | cut -d, -f2)
refs=$(echo "$line" | cut -d, -f3)
# Extract the commit hash (last field)
commit_hash=$(echo "$line" | awk -F, '{print $NF}')
# Get the message (everything between the 4th field and the last field)
message_with_commas=$(echo "$line" | awk -F, '{
# Remove the first three fields and the last field
for(i=4; i<NF; i++) {
printf "%s", $i
if(i<NF-1) printf ","
}
print ""
}')
debug "Processing commit $commit_hash from $date by $author"
# Extract branch from refs first
branch=$(extract_branch_from_refs "$refs")
# If no branch found from refs (%D) or branch is "(none)", try fallback method
if [ "$branch" = "(none)" ] || [ -z "$branch" ]; then
debug "No branch info from refs, trying git branch --contains fallback"
branches_from_git=$(get_branches_for_commit "$commit_hash")
if [ -n "$branches_from_git" ]; then
branch="$branches_from_git"
debug "Found branches using fallback: $branch"
else
debug "Fallback also returned no branches"
fi
fi
# Print the processed line, escaping any commas in the message field
# Replace commas inside message with escaped commas
message_escaped="${message_with_commas//,/\\,}"
debug "Final branch assignment: $branch"
echo "$date,$author,$branch,$message_escaped,$commit_hash"
done >> "$OUTPUT_FILE"
# Check if the command was successful
if [ $? -eq 0 ]; then
echo "Git history successfully exported to $OUTPUT_FILE"
else
echo "Error: Failed to export git history" >&2
exit 1
fi
# Count the number of commits exported
COMMIT_COUNT=$(wc -l < "$OUTPUT_FILE")
COMMIT_COUNT=$((COMMIT_COUNT - 1)) # Subtract 1 for the header line
echo "Total commits exported: $COMMIT_COUNT"
# List branch statistics
if [ "$ALL_BRANCHES" = true ] || [ -n "$SPECIFIC_BRANCHES" ]; then
echo ""
echo "Commit count by branch:"
tail -n +2 "$OUTPUT_FILE" | cut -d, -f3 | sort | uniq -c | sort -nr |
while read -r count branch; do
branch="${branch#\"}" # Remove leading quote if present
branch="${branch%\"}" # Remove trailing quote if present
printf " %-20s %s\n" "$branch:" "$count commits"
done
fi
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment