Skip to content

Instantly share code, notes, and snippets.

@Chadys
Last active July 4, 2023 19:51
Show Gist options
  • Save Chadys/81a27a7c6cda3bdc4c7038934433c87e to your computer and use it in GitHub Desktop.
Save Chadys/81a27a7c6cda3bdc4c7038934433c87e to your computer and use it in GitHub Desktop.
Gitlab user all commits export
import csv
import logging
import os
from operator import itemgetter
from pathlib import Path
import pytz as pytz
import requests
import argparse
import datetime
LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
logging.basicConfig(level=LOGLEVEL)
logger = logging.getLogger(__name__)
"""
Create weekly csv files containing all commits authored by the given user,
sorted by date
"""
def get_projects(gitlab_url: str, headers: dict) -> list:
logger.info(f"Getting all registered projects in gitlab instance {gitlab_url}")
projects = []
page = "1"
while True:
r = requests.get(
f"https://{gitlab_url}/api/v4/projects",
headers=headers,
params={"page": page, "per_page": 100},
)
json_response = r.json()
projects.extend(
[
{"id": project["id"], "name": project["name_with_namespace"]}
for project in json_response
]
)
page = r.headers["X-Next-Page"]
if not page:
break
return projects
def normalize_date(date_str: str, timezone: pytz.timezone):
date = datetime.datetime.fromisoformat(date_str)
return date.astimezone(timezone).strftime("%Y-%m-%d-%X")
def get_commits(
gitlab_url: str,
headers: dict,
projects: list,
author: str,
start_date: datetime.datetime,
end_date: datetime.datetime,
timezone: pytz.timezone,
) -> list:
commits = []
# Note: Gitlab filters date on committed_date and not authored_date
params = {
"per_page": 100,
"author": author,
"all": True,
"since": start_date.isoformat(),
"until": end_date.isoformat(),
}
for project in projects:
page = "1"
while True:
params["page"] = page
r = requests.get(
f"https://{gitlab_url}/api/v4/projects/{project['id']}/repository/commits",
headers=headers,
params=params,
)
json_response = r.json()
commits.extend(
[
{
"project": project["name"],
"date": normalize_date(commit["authored_date"], timezone),
"sha": commit["id"],
"message": commit["message"],
"url": commit["web_url"],
"author": commit["author_email"],
}
for commit in json_response
]
)
page = r.headers["X-Next-Page"]
if not page:
break
commits.sort(key=itemgetter("date"))
return commits
def save_all_commits(
output_folder: str,
gitlab_url: str,
headers: dict,
projects: list,
author: str,
start_date: datetime.datetime,
last_date: datetime.datetime,
timezone: pytz.timezone,
):
logger.info(f"Getting commits for author {author}")
while start_date < last_date:
end_date = start_date + datetime.timedelta(days=7)
logger.info(f"Getting commits from {start_date} to {end_date}")
commits = get_commits(
gitlab_url, headers, projects, author, start_date, end_date, timezone
)
if commits:
with Path(
f"{output_folder}/gitlab_commit_logs_{start_date.date().isoformat()}.csv"
).open("w", newline="") as f:
writer = csv.DictWriter(
f,
fieldnames=["project", "date", "sha", "message", "url", "author"],
delimiter=";",
)
writer.writeheader()
writer.writerows(commits)
start_date += datetime.timedelta(days=7)
def create_argsparser():
parser = argparse.ArgumentParser()
parser.add_argument(
"output_folder", help="Output folder to save logs to, must exist"
)
parser.add_argument(
"gitlab_url",
help="domain of the gitlab instance, either gitlab.com or your self-managed domain",
)
parser.add_argument(
"author", help="username of the user you want to extract commits for"
)
parser.add_argument(
"access_token", help="gitlab access token with sufficient permissions"
)
parser.add_argument(
"-s",
"--start_date",
help="Date to start extraction from, in ISO format",
type=datetime.datetime.fromisoformat,
default=datetime.datetime(
year=2021, month=1, day=11, hour=0, minute=0, second=0
),
)
parser.add_argument(
"-e",
"--end_date",
help="Date to end extraction at, in ISO format",
type=datetime.datetime.fromisoformat,
default=datetime.datetime.now(),
)
parser.add_argument(
"-t",
"--timezone",
help="Timezone code name you want to present the commit date as, e.g. 'Europe/Paris'",
type=pytz.timezone,
default=pytz.timezone("Europe/Paris"),
)
return parser
def main():
parser = create_argsparser()
args = parser.parse_args()
headers = {"PRIVATE-TOKEN": args.access_token}
projects = get_projects(args.gitlab_url, headers)
save_all_commits(
args.output_folder,
args.gitlab_url,
headers,
projects,
args.author,
args.start_date,
args.end_date,
args.timezone,
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment