Created
July 16, 2016 03:36
-
-
Save phil-lopreiato/8105c1370d2af3c0f269a5ac337cd590 to your computer and use it in GitHub Desktop.
OPR Stats Data Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
import argparse | |
import csv | |
import json | |
import urllib2 | |
from collections import defaultdict | |
""" | |
A script to try and correlate district participation with team performance. Computes: | |
1) Average OPR of District teams and Regional teams qualifying for CMP, compared year to year | |
2) Average ranking of District and Regional teams attending CMP | |
3) Amount of teams attending CMP that played in elims at least once at a regional/district event | |
Methodology: | |
- For the 2007 - 2016 FRC seasons, load each event and store: | |
- a mapping of {team_key: district} | |
- a mapping of {team_key: [oprs for REGIONAL/DISTRICT/DCMP]} | |
- a mapping of {team_key: cmp opr}, if team attended CMP | |
- a mapping of {team_key: [absolute ranking at REGIONAL/DISTRICT/DCMP]} | |
- a mapping of {team_key: cmp rank}, if team attended | |
- a mapping of {team_key: [ranking percent (rank/total #teams) at REGIONAL/DISTRICT/DCMP]} | |
- a mapping of {team_key: [alliance number at REGIONAL/DISTRICT/DCMP]} | |
- a mapping of {team_key: cmp alliance number}, if qualified | |
- Generate tables: | |
- [team_key, year, district, in-season OPR, cmp OPR] | |
- [team_key, year, district, normalized in-season ranking, normalized cmp ranking] | |
- [team_key, year, district, % in-season events in elims, in cmp alliance?] | |
""" | |
BASE_URL = 'https://www.thebluealliance.com/api/v2/{}' | |
APP_HEADER = 'X-TBA-App-Id' | |
APP_ID = 'plnyyanks:oprandcmp:v0.1' | |
# See https://github.com/the-blue-alliance/the-blue-alliance/blob/master/consts/event_type.py | |
# [REGIONAL, DISTRICT, DCMP] | |
VALID_EVENT_TYPES = [0, 1, 2] | |
# See https://github.com/the-blue-alliance/the-blue-alliance/blob/master/consts/district_type.py | |
VALID_DISTRICT_SHORTS = ['chs', 'pch', 'in', 'fim', 'mar', 'nc', 'ne', 'pnw'] | |
def fetch_endpoint(endpoint): | |
full_url = BASE_URL.format(endpoint) | |
print "Fetching {}".format(full_url) | |
url = urllib2.Request(full_url, headers={APP_HEADER: APP_ID, 'User-agent': 'Mozilla/5.0'}) | |
response = urllib2.urlopen(url) | |
return json.loads(response.read()) | |
def fetch_event_keys_in_year(year): | |
api_events = fetch_endpoint("events/{}".format(year)) | |
inseason_keys = [event["key"] for event in api_events if event["event_type"] in VALID_EVENT_TYPES] | |
cmp_keys = [event["key"] for event in api_events if event["event_type"] == 3] # CMP_DIVISION | |
return (inseason_keys, cmp_keys) | |
def fetch_event_info(event_key): | |
return fetch_endpoint("event/{}".format(event_key)) | |
def fetch_event_teams(event_key): | |
return [team["key"] for team in fetch_endpoint("event/{}/teams".format(event_key))] | |
def fetch_event_rankings(event_key): | |
return fetch_endpoint("event/{}/rankings".format(event_key)) | |
def fetch_event_stats(event_key): | |
return fetch_endpoint("event/{}/stats".format(event_key)) | |
def fetch_district_team_keys(year, short): | |
return [team["key"] for team in fetch_endpoint("district/{}/{}/teams".format(short, year))] | |
def alliance_contains_team(alliances, team_key): | |
if not alliances: | |
return None | |
return any(team_key in alliance["picks"] for alliance in alliances) | |
def mean(data): | |
return sum(data) / len(data) | |
# Returns the % of other teams | |
# that the target team seeded higher than | |
def normalized_ranking(rankings, target_team, team_count): | |
if not rankings or len(rankings) == 1: | |
return None | |
raw_rank = 0 | |
for ranking in rankings: | |
if target_team[3:] == ranking[1]: | |
break | |
raw_rank += 1 | |
norm = (team_count - raw_rank) / float(team_count - 1) | |
return norm | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--start", help="First competition season to test", type=int, default=2016) | |
parser.add_argument("--end", help="Last competition season to test, inclusive", type=int, default=2016) | |
args = parser.parse_args() | |
team_districts = defaultdict(dict) | |
inseason_oprs = defaultdict(lambda: defaultdict(list)) | |
cmp_oprs = defaultdict(dict) | |
inseason_rankings = defaultdict(lambda: defaultdict(list)) | |
cmp_rankings = defaultdict(dict) | |
inseason_alliances = defaultdict(lambda: defaultdict(list)) | |
cmp_alliances = defaultdict(dict) | |
# For each year... | |
for year in range(args.start, args.end + 1): | |
# Get all district teams from that year | |
for district_key in VALID_DISTRICT_SHORTS: | |
team_keys = fetch_district_team_keys(year, district_key) | |
for team_key in team_keys: | |
team_districts[year][team_key] = district_key | |
# Get event keys relevant for this year | |
inseason_keys, cmp_keys = fetch_event_keys_in_year(year) | |
for event_key in inseason_keys: | |
teams = fetch_event_teams(event_key) | |
alliances = fetch_event_info(event_key)["alliances"] | |
rankings = fetch_event_rankings(event_key) | |
oprs = fetch_event_stats(event_key)["oprs"] | |
for team in teams: | |
team_num = team[3:] | |
inseason_alliances[year][team].append(alliance_contains_team(alliances, team)) | |
inseason_oprs[year][team].append(oprs[team_num] if team_num in oprs else None) | |
inseason_rankings[year][team].append(normalized_ranking(rankings, team, len(teams))) | |
for event_key in cmp_keys: | |
teams = fetch_event_teams(event_key) | |
alliances = fetch_event_info(event_key)["alliances"] | |
rankings = fetch_event_rankings(event_key) | |
oprs = fetch_event_stats(event_key)["oprs"] | |
for team in teams: | |
team_num = team[3:] | |
cmp_alliances[year][team] = alliance_contains_team(alliances, team) | |
cmp_oprs[year][team] = oprs[team_num] if team_num in oprs else None | |
cmp_rankings[year][team] = normalized_ranking(rankings, team, len(teams)) | |
with open('alliances.csv', 'wb') as csvfile: | |
alliance_writer = csv.writer(csvfile) | |
for year, team_maps in inseason_alliances.iteritems(): | |
for team, results in team_maps.iteritems(): | |
district = team_districts[year].get(team, None) | |
clean_results= [result for result in results if result is not None] | |
percent_in_alliances = results.count(True) / float(len(clean_results)) if len(clean_results) > 0 else 0 | |
alliance_writer.writerow([year, team, district, percent_in_alliances, cmp_alliances[year][team] if team in cmp_alliances[year] else False]) | |
with open('rankings.csv', 'wb') as csvfile: | |
rankings_writer = csv.writer(csvfile) | |
for year, team_maps in inseason_rankings.iteritems(): | |
for team, results in team_maps.iteritems(): | |
district = team_districts[year].get(team, None) | |
clean_results = [result for result in results if result is not None] | |
avg_norm_rank = sum(clean_results ) / float(len(clean_results)) if len(clean_results) > 0 else 0 | |
rankings_writer.writerow([year, team, district, avg_norm_rank, cmp_rankings[year][team] if team in cmp_rankings[year] else None]) | |
with open('opr.csv', 'wb') as csvfile: | |
opr_writer = csv.writer(csvfile) | |
for year, team_maps in inseason_oprs.iteritems(): | |
for team, results in team_maps.iteritems(): | |
district = team_districts[year].get(team, None) | |
clean_results = [result for result in results if result if result is not None] | |
avg_opr = sum(clean_results) / float(len(clean_results)) if len(clean_results) > 0 else 0 | |
max_opr = max(clean_results) if clean_results else 0 | |
opr_writer.writerow([year, team, district, avg_opr, max_opr, cmp_oprs[year][team] if team in cmp_oprs[year] else None]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment