Created
January 21, 2015 16:12
-
-
Save therve/9873ab744f6e577a287f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import requests | |
import BeautifulSoup | |
team_mapping = { | |
"NJN": "BRK", | |
"CHA": "CHO", | |
"NOH": "NOP" | |
} | |
def get_page_body(uri): | |
d = requests.get("http://www.basketball-reference.com%s" % uri) | |
content = d.text.encode("utf-8") | |
return BeautifulSoup.BeautifulSoup(content) | |
def get_seniority(team): | |
team_page = get_page_body("/teams/%s/2015.html" % team) | |
wins = get_winning_percentage(team_page) | |
dd = team_page.find(id="totals") | |
data = [] | |
for row in dd.findAll("tr"): | |
cols = row.findAll("td") | |
if not cols: | |
continue | |
link = cols[1].findAll("a")[0]["href"] | |
mins = int(cols[5].text) | |
if mins: | |
total_mins = get_player_seniority(link, team) | |
data.append((mins, total_mins)) | |
total = sum(mins for (mins, total_mins) in data) | |
coef = sum(mins/total_mins*mins/total for (mins, total_mins) in data) | |
print team, wins, coef | |
def get_player_seniority(link, team): | |
teams = [team] | |
teams.extend(key for (key, value) in team_mapping.items() if value == team) | |
total_mins = 0 | |
dd = get_page_body(link).find(id="totals") | |
for row in dd.findAll("tr"): | |
css_class = row["class"] | |
if not css_class or css_class.split()[-1] not in ("full_table", "partial_table"): | |
continue | |
cols = row.findAll("td") | |
if cols[2].text in teams: | |
total_mins += int(cols[7].text) | |
return total_mins | |
def get_winning_percentage(team_page): | |
info_box = team_page.find(id="info_box") | |
record_box = info_box.find(text="Record:").parent.parent | |
data = record_box.contents[1].split(",")[0].strip().split("-") | |
wins, losses = int(data[0]), int(data[1]) | |
return wins / (wins + losses) | |
def list_teams(): | |
codes = [] | |
data = get_page_body("/teams").find(id="active") | |
for row in data.findAll("tr"): | |
cols = row.findAll("td") | |
if not cols: | |
continue | |
links = cols[0].findAll("a") | |
if not links: | |
continue | |
code = links[0]["href"].split("/")[2] | |
codes.append(code) | |
return codes | |
def main(): | |
teams = list_teams() | |
for team in teams: | |
if team in team_mapping: | |
team = team_mapping[team] | |
get_seniority(team) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment