Created
January 14, 2011 18:32
Revisions
-
JeffSackmann revised this gist
Jan 15, 2011 . 1 changed file with 12 additions and 15 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,7 +2,6 @@ from createTuple import createTuple ## gist: 778481 from writeMatrixCSV import writeMatrixCSV ## gist: 778484 def makeBatTable(r): for stat in ['AB', 'H', 'D', 'T', 'HR', 'SO', 'BB', 'SF', 'HP', 'CI']: @@ -14,22 +13,22 @@ def makeBatTable(r): slg = 0 else: avg = float(r['H'])/float(r['AB']) r['AVG'] = round(avg, 3) slg = float(r['H']+r['D']+(2*r['T'])+(3*r['HR']))/float(r['AB']) r['SLG'] = round(slg, 3) if (r['AB']+r['BB']+r['SF']+r['HP']) == 0: r['OBP'] = 0 r['OPS'] = 0 r['wOBA'] = 0 else: pa = float(r['AB']+r['BB']+r['SF']+r['HP']+r['CI']) obp = float(r['H']+r['BB']+r['HP']+r['CI'])/pa r['OBP'] = round(obp, 3) sing = int(r['H']) - int(r['HR']) - int(r['T']) - int(r['D']) num = (.72*int(r['BB'])) + (.75*int(r['HP'])) + (.9*sing) + (1.24*int(r['D'])) + (1.56*int(r['T'])) + (1.95*int(r['HR'])) den = int(r['BB']) + int(r['AB']) + int(r['HP']) woba = num/den r['wOBA'] = round(woba, 3) return r def marcelBattingSeason(yr): @@ -169,7 +168,7 @@ def marcelBattingSeason(yr): finalProj[stat] = prorateProj[stat]/ageAdj ## reliability reliab = 1 - (1200.0/compPa) finalProj['rel'] = round(reliab, 2) finalProj['Age'] = age ## add to master dict rawProjections[b] = finalProj @@ -200,9 +199,9 @@ def marcelBattingSeason(yr): if projRatios[stat] == 0: marcels[pl][stat] = rawProjections[pl][stat] else: marcels[pl][stat] = round((trueRatios[stat]/projRatios[stat])*rawProjections[pl][stat], 0) elif stat == 'PA': marcels[pl][stat] = round(rawProjections[pl][stat], 0) else: marcels[pl][stat] = rawProjections[pl][stat] @@ -245,19 +244,17 @@ def marcelBattingSeason(yr): pitchers = createTuple('bdb_pitching.csv') ## this is the pitcher seasons sheet from the lahman db. headers: ## playerID,yearID,stint,teamID,lgID,W,L,G,GS,CG,SHO,SV,Ipouts,H,ER,HR,BB,SO,Baopp,ERA,IBB,WP,HP,BK,BFP,GF,R batters = createTuple('bdb_batting.csv') ## this is the batting seasons sheet from the lahman db. headers: ## playerID,yearID,stint,teamID,lgID,G,G_batting,AB,R,H,D,T,HR,RBI,SB,CS,BB,SO,IBB,HP,SH,SF,GIDP,G_old ## master db for birthYear master = createTuple('bdb_master.csv') ## master biographical data sheet from lahman db. headers: ## lahmanID,playerID,managerID,hofID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameNote,nameGiven,nameNick,weight,height,bats,throws,debut,finalGame,college,lahman40ID,lahman birthYear = {} for pl in master: @@ -268,4 +265,4 @@ def marcelBattingSeason(yr): firstlast[pl[1]] = [pl[16], pl[17]] ## sample usage marcelBattingSeason(2005) -
JeffSackmann revised this gist
Jan 14, 2011 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -245,16 +245,16 @@ def marcelBattingSeason(yr): pitchers = createTuple('bdb_pitching.csv') ## this is the pitcher seasons sheet from the bb databank db. headers: ## playerID,yearID,stint,teamID,lgID,W,L,G,GS,CG,SHO,SV,Ipouts,H,ER,HR,BB,SO,Baopp,ERA,IBB,WP,HP,BK,BFP,GF,R batters = createTuple('bdb_batting.csv') ## this is the batting seasons sheet from the bb databank db. headers: ## playerID,yearID,stint,teamID,lgID,G,G_batting,AB,R,H,D,T,HR,RBI,SB,CS,BB,SO,IBB,HP,SH,SF,GIDP,G_old ## master db for birthYear master = createTuple('bdb_master.csv') ## master biographical data sheet from bb databank db. headers: ## lahmanID,playerID,managerID,hofID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity, ## deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameNote,nameGiven, ## nameNick,weight,height,bats,throws,debut,finalGame,college,lahman40ID,lahman -
JeffSackmann created this gist
Jan 14, 2011 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,271 @@ ## Generate a full season's worth of batting Marcel projections from past years' stats from createTuple import createTuple ## gist: 778481 from writeMatrixCSV import writeMatrixCSV ## gist: 778484 from convXdig import convXdig as cxd ## gist: 779951 def makeBatTable(r): for stat in ['AB', 'H', 'D', 'T', 'HR', 'SO', 'BB', 'SF', 'HP', 'CI']: if stat in r: pass else: r[stat] = 0 if r['AB'] == 0: r['SLG'] = 0 r['AVG'] = 0 slg = 0 else: avg = float(r['H'])/float(r['AB']) r['AVG'] = cxd(avg, 3) slg = float(r['H']+r['D']+(2*r['T'])+(3*r['HR']))/float(r['AB']) r['SLG'] = cxd(slg, 3) if (r['AB']+r['BB']+r['SF']+r['HP']) == 0: r['OBP'] = 0 r['OPS'] = 0 r['wOBA'] = 0 else: pa = float(r['AB']+r['BB']+r['SF']+r['HP']+r['CI']) obp = float(r['H']+r['BB']+r['HP']+r['CI'])/pa r['OBP'] = cxd(obp, 3) sing = int(r['H']) - int(r['HR']) - int(r['T']) - int(r['D']) num = (.72*int(r['BB'])) + (.75*int(r['HP'])) + (.9*sing) + (1.24*int(r['D'])) + (1.56*int(r['T'])) + (1.95*int(r['HR'])) den = int(r['BB']) + int(r['AB']) + int(r['HP']) woba = num/den r['wOBA'] = cxd(woba, 3) return r def marcelBattingSeason(yr): # yr = year being projected, input as int yr = str(yr) yr1 = str(int(yr) - 1) yr2 = str(int(yr) - 2) yr3 = str(int(yr) - 3) ## get list of pitchers; determine which batters are really ## 'batters' and throw out pitchers with at-bats projectBatters = [] for yr in [yr3, yr2, yr]: yearPitchers = {} for p in pitchers: pitchID = p[0] if p[1] == yr: yearPitchers[pitchID] = int(p[12]) else: pass for b in batters: batID = b[0] if b[1] == yr: pass else: continue abString = b[7] if abString == '': continue else: batAb = int(abString) if batID in yearPitchers: if yearPitchers[batID] > batAb: continue else: pass else: pass if batID in projectBatters: pass else: projectBatters.append(batID) ## find league average for previous year yearPitchers = {} for p in pitchers: pitchID = p[0] if p[1] == yr1: yearPitchers[pitchID] = int(p[12]) else: pass leagueAverage = {} for b in batters: batID = b[0] if b[1] == yr1: pass else: continue abString = b[7] if abString == '': continue else: batAb = int(abString) if batID in yearPitchers: if yearPitchers[batID] > batAb: continue else: pass else: pass if batID in projectBatters: pass else: projectBatters.append(batID) for stat in batHeaders: col = batHeaders[stat] try: playerStat = int(b[col]) except: continue else: pass if stat in leagueAverage: leagueAverage[stat] += playerStat else: leagueAverage[stat] = playerStat for stat in ['HP', 'SF', 'SH']: if stat in leagueAverage: pass else: leagueAverage[stat] = 0 totalPa = leagueAverage['AB'] + leagueAverage['BB'] + leagueAverage['HP'] + leagueAverage['SF'] + leagueAverage['SH'] regression = {} for stat in leagueAverage: regression[stat] =(1200.0/totalPa)*leagueAverage[stat] rawProjections = {} ## calculate projections for each player for b in projectBatters: components = {} y2pa = 0 y1pa = 0 for stat in batHeaders: components[stat] = 0 for row in batters: if row[0] == b: pass else: continue if row[1] == yr3: for stat in batHeaders: try: playerStat = int(row[batHeaders[stat]]) except: continue components[stat] += 3*playerStat elif row[1] == yr2: for stat in batHeaders: try: playerStat = int(row[batHeaders[stat]]) except: continue components[stat] += 4*playerStat for stat in ['AB', 'BB', 'HP', 'SF', 'SH']: try: y2pa += int(row[batHeaders[stat]]) except: continue elif row[1] == yr1: for stat in batHeaders: try: playerStat = int(row[batHeaders[stat]]) except: continue components[stat] += 5*playerStat for stat in ['AB', 'BB', 'HP', 'SF', 'SH']: try: y1pa += int(row[batHeaders[stat]]) except: continue else: continue ## add regression component for stat in regression: components[stat] += regression[stat] ## get projected PA projPa = 0.5*y1pa + 0.1*y2pa + 200 ## prorate into projected PA compPa = components['AB'] + components['BB'] + components['HP'] + components['SF'] + components['SH'] prorateProj = {} for stat in components: prorateProj[stat] = (projPa/compPa)*components[stat] prorateProj['PA'] = projPa try: age = int(yr) - int(birthYear[b]) except: age = 29 ## in case birthyear is missing or corrupted ## age adjust if age > 29: ageAdj = 1/(1 + ((age - 29)*0.003)) elif age < 29: ageAdj = 1 + ((29 - age)*0.006) else: ageAdj = 1 finalProj = {} for stat in prorateProj: if stat in ['PA', 'AB']: finalProj[stat] = prorateProj[stat] elif stat in ['R', 'H', 'D', 'T', 'HR', 'RBI', 'SB', 'BB', 'IBB', 'HP', 'SH', 'SF']: finalProj[stat] = prorateProj[stat]*ageAdj else: finalProj[stat] = prorateProj[stat]/ageAdj ## reliability reliab = 1 - (1200.0/compPa) finalProj['rel'] = cxd(reliab, 2) finalProj['Age'] = age ## add to master dict rawProjections[b] = finalProj ## re-baseline projTotal = {} for pl in rawProjections: for stat in rawProjections[pl]: if stat in projTotal: projTotal[stat] += rawProjections[pl][stat] else: projTotal[stat] = rawProjections[pl][stat] projTotalPa = projTotal['PA'] projRatios = {} for stat in ['AB', 'R', 'H', 'D', 'T', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'IBB', 'HP', 'SH', 'SF', 'GDP']: projRatios[stat] = projTotal[stat]/projTotalPa trueRatios = {} for stat in ['AB', 'R', 'H', 'D', 'T', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'IBB', 'HP', 'SH', 'SF', 'GDP']: try: trueRatios[stat] = leagueAverage[stat]/float(totalPa) except: trueRatios[stat] = 0 marcels = {} for pl in rawProjections: marcels[pl] = {} for stat in rawProjections[pl]: if stat in ['AB', 'R', 'H', 'D', 'T', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'IBB', 'HP', 'SH', 'SF', 'GDP']: if projRatios[stat] == 0: marcels[pl][stat] = rawProjections[pl][stat] else: marcels[pl][stat] = cxd((trueRatios[stat]/projRatios[stat])*rawProjections[pl][stat], 0) elif stat == 'PA': marcels[pl][stat] = cxd(rawProjections[pl][stat], 0) else: marcels[pl][stat] = rawProjections[pl][stat] header = ['bdbID', 'First', 'Last', 'Year', 'age', 'rel', 'wOBA', 'AVG', 'OBP', 'SLG', 'PA', 'AB', 'R', 'H', 'D', 'T', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'IBB', 'HP', 'SH', 'SF', 'GDP'] marcelSheet = [header] for pl in marcels: row = [pl] row += firstlast[pl] row.append(yr) marcels[pl] = makeBatTable(marcels[pl]) for stat in ['Age', 'rel', 'wOBA', 'AVG', 'OBP', 'SLG', 'PA', 'AB', 'R', 'H', 'D', 'T', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'IBB', 'HP', 'SH', 'SF', 'GDP']: row.append(marcels[pl][stat]) marcelSheet.append(row) filename = 'marcel_batters_' + yr + '.csv' writeMatrixCSV(marcelSheet, filename) batHeaders = {'AB': 7, 'R': 8, 'H': 9, 'D': 10, 'T': 11, 'HR': 12, 'RBI': 13, 'SB': 14, 'CS': 15, 'BB': 16, 'SO': 17, 'IBB': 18, 'HP': 19, 'SH': 20, 'SF': 21, 'GDP': 22 } pitchers = createTuple('bdb_pitching.csv') ## this is the pitcher seasons sheet from the lahman db. headers: ## playerID,yearID,stint,teamID,lgID,W,L,G,GS,CG,SHO,SV,Ipouts,H,ER,HR,BB,SO,Baopp,ERA,IBB,WP,HP,BK,BFP,GF,R batters = createTuple('bdb_batting.csv') ## this is the batting seasons sheet from the lahman db. headers: ## playerID,yearID,stint,teamID,lgID,G,G_batting,AB,R,H,D,T,HR,RBI,SB,CS,BB,SO,IBB,HP,SH,SF,GIDP,G_old ## master db for birthYear master = createTuple('bdb_master.csv') ## master biographical data sheet from lahman db. headers: ## lahmanID,playerID,managerID,hofID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity, ## deathYear,deathMonth,deathDay,deathCountry,deathState,deathCity,nameFirst,nameLast,nameNote,nameGiven, ## nameNick,weight,height,bats,throws,debut,finalGame,college,lahman40ID,lahman birthYear = {} for pl in master: birthYear[pl[1]] = pl[4] firstlast = {} for pl in master: firstlast[pl[1]] = [pl[16], pl[17]] ## sample usage ## marcelBattingSeason(2005)