Last active
May 28, 2022 17:02
-
-
Save sr-murthy/e3713372d0a64de185753c85f4388e47 to your computer and use it in GitHub Desktop.
Viewing top contributors for public GitHub repositories
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pandas as pd | |
import requests | |
def get_github_repo_top_contributors(org_user_name, repo_name, top_n=50): | |
""" | |
Gets the top n contributors data for the given GitHub repository - the | |
repository should be specified via an organisational or user name and | |
a repository name. | |
:param org_user_name: An organisational or username | |
:type org_user_name: ``str`` | |
:param repo_name: A repository name | |
:type repo_name: ``str`` | |
:param top_n: The size of the top contributors list - default | |
is ``50`` | |
:type top_n: ``int`` | |
:return: The top n contributors data for the given GitHub | |
repository, as a dataframe | |
:rtype: ``pd.DataFrame`` | |
""" | |
def process_response_json(res): | |
return json.loads(res.text) | |
url_template = ( | |
'https://api.github.com/repos/{org_user_name}/{repo_name}/contributors' | |
'?q=contributions&per_page={per_page}&page={page_num}&order=desc' | |
) | |
hundred_per_page_reqs, last_req_size = divmod(top_n, 100) | |
if hundred_per_page_reqs == 0: | |
return pd.DataFrame(process_response_json(requests.get( | |
url_template.format(org_user_name=org_user_name, repo_name=repo_name, per_page=last_req_size, page_num=1) | |
))) | |
req_urls = [ | |
url_template.format(org_user_name=org_user_name, repo_name=repo_name, per_page=100, page_num=page_num) | |
for page_num in range(1, hundred_per_page_reqs + 1) | |
] | |
if last_req_size > 0: | |
req_urls += [ | |
url_template.format(org_user_name=org_user_name, repo_name=repo_name, per_page=last_req_size, page_num=(hundred_per_page_reqs + 1)) | |
] | |
data = pd.DataFrame() | |
for req_url in req_urls: | |
try: | |
_data = pd.DataFrame(process_response_json(requests.get(req_url))) | |
except ValueError as e: | |
raise | |
else: | |
data = pd.concat([data, _data], axis=0, ignore_index=True) | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment