Last active
July 4, 2023 19:51
-
-
Save Chadys/81a27a7c6cda3bdc4c7038934433c87e to your computer and use it in GitHub Desktop.
Gitlab user all commits export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import logging | |
import os | |
from operator import itemgetter | |
from pathlib import Path | |
import pytz as pytz | |
import requests | |
import argparse | |
import datetime | |
LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() | |
logging.basicConfig(level=LOGLEVEL) | |
logger = logging.getLogger(__name__) | |
""" | |
Create weekly csv files containing all commits authored by the given user, | |
sorted by date | |
""" | |
def get_projects(gitlab_url: str, headers: dict) -> list: | |
logger.info(f"Getting all registered projects in gitlab instance {gitlab_url}") | |
projects = [] | |
page = "1" | |
while True: | |
r = requests.get( | |
f"https://{gitlab_url}/api/v4/projects", | |
headers=headers, | |
params={"page": page, "per_page": 100}, | |
) | |
json_response = r.json() | |
projects.extend( | |
[ | |
{"id": project["id"], "name": project["name_with_namespace"]} | |
for project in json_response | |
] | |
) | |
page = r.headers["X-Next-Page"] | |
if not page: | |
break | |
return projects | |
def normalize_date(date_str: str, timezone: pytz.timezone): | |
date = datetime.datetime.fromisoformat(date_str) | |
return date.astimezone(timezone).strftime("%Y-%m-%d-%X") | |
def get_commits( | |
gitlab_url: str, | |
headers: dict, | |
projects: list, | |
author: str, | |
start_date: datetime.datetime, | |
end_date: datetime.datetime, | |
timezone: pytz.timezone, | |
) -> list: | |
commits = [] | |
# Note: Gitlab filters date on committed_date and not authored_date | |
params = { | |
"per_page": 100, | |
"author": author, | |
"all": True, | |
"since": start_date.isoformat(), | |
"until": end_date.isoformat(), | |
} | |
for project in projects: | |
page = "1" | |
while True: | |
params["page"] = page | |
r = requests.get( | |
f"https://{gitlab_url}/api/v4/projects/{project['id']}/repository/commits", | |
headers=headers, | |
params=params, | |
) | |
json_response = r.json() | |
commits.extend( | |
[ | |
{ | |
"project": project["name"], | |
"date": normalize_date(commit["authored_date"], timezone), | |
"sha": commit["id"], | |
"message": commit["message"], | |
"url": commit["web_url"], | |
"author": commit["author_email"], | |
} | |
for commit in json_response | |
] | |
) | |
page = r.headers["X-Next-Page"] | |
if not page: | |
break | |
commits.sort(key=itemgetter("date")) | |
return commits | |
def save_all_commits( | |
output_folder: str, | |
gitlab_url: str, | |
headers: dict, | |
projects: list, | |
author: str, | |
start_date: datetime.datetime, | |
last_date: datetime.datetime, | |
timezone: pytz.timezone, | |
): | |
logger.info(f"Getting commits for author {author}") | |
while start_date < last_date: | |
end_date = start_date + datetime.timedelta(days=7) | |
logger.info(f"Getting commits from {start_date} to {end_date}") | |
commits = get_commits( | |
gitlab_url, headers, projects, author, start_date, end_date, timezone | |
) | |
if commits: | |
with Path( | |
f"{output_folder}/gitlab_commit_logs_{start_date.date().isoformat()}.csv" | |
).open("w", newline="") as f: | |
writer = csv.DictWriter( | |
f, | |
fieldnames=["project", "date", "sha", "message", "url", "author"], | |
delimiter=";", | |
) | |
writer.writeheader() | |
writer.writerows(commits) | |
start_date += datetime.timedelta(days=7) | |
def create_argsparser(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"output_folder", help="Output folder to save logs to, must exist" | |
) | |
parser.add_argument( | |
"gitlab_url", | |
help="domain of the gitlab instance, either gitlab.com or your self-managed domain", | |
) | |
parser.add_argument( | |
"author", help="username of the user you want to extract commits for" | |
) | |
parser.add_argument( | |
"access_token", help="gitlab access token with sufficient permissions" | |
) | |
parser.add_argument( | |
"-s", | |
"--start_date", | |
help="Date to start extraction from, in ISO format", | |
type=datetime.datetime.fromisoformat, | |
default=datetime.datetime( | |
year=2021, month=1, day=11, hour=0, minute=0, second=0 | |
), | |
) | |
parser.add_argument( | |
"-e", | |
"--end_date", | |
help="Date to end extraction at, in ISO format", | |
type=datetime.datetime.fromisoformat, | |
default=datetime.datetime.now(), | |
) | |
parser.add_argument( | |
"-t", | |
"--timezone", | |
help="Timezone code name you want to present the commit date as, e.g. 'Europe/Paris'", | |
type=pytz.timezone, | |
default=pytz.timezone("Europe/Paris"), | |
) | |
return parser | |
def main(): | |
parser = create_argsparser() | |
args = parser.parse_args() | |
headers = {"PRIVATE-TOKEN": args.access_token} | |
projects = get_projects(args.gitlab_url, headers) | |
save_all_commits( | |
args.output_folder, | |
args.gitlab_url, | |
headers, | |
projects, | |
args.author, | |
args.start_date, | |
args.end_date, | |
args.timezone, | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment