Created
April 24, 2013 01:53
-
-
Save jweissbock/5448995 to your computer and use it in GitHub Desktop.
Gets the data daily for the teams before and after the games.
getData.py was set up as a cron job and was able to figure out which information to grab from each team, bit of a hack job
dailyData.py gets the data and stats for each team before the game, normally you would pass a list of teams via command line
dailyScores.py gets the scores of the…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib2 | |
import csv | |
import datetime | |
import sys | |
"""teams = ['Washington', 'Boston', 'NY Rangers', 'Pittsburgh', 'Ottawa', 'Buffalo', 'Minnesota', 'Colorado', 'Winnipeg', 'Toronto', 'Montreal', | |
'New Jersey', 'Carolina', 'Tampa Bay', 'Phoenix', 'Columbus', 'NY Islanders', 'Florida', 'Anaheim', 'St Louis', 'Chicago', | |
'Dallas', 'Detroit', 'Vancouver', 'San Jose', 'Los Angeles']""" | |
teams = sys.argv[1:] | |
if len(teams) == 0: | |
print "No supplied teams" | |
sys.exit() | |
# so far doesnt like NY Islanders, and CGY but only in behindthenet sites | |
finalData = [[None]*11 for _ in range(len(teams))] | |
teamToAbrv = {'Phoenix': "PHX", 'Philadelphia': 'PHI', 'Ottawa': 'OTT', 'NY Rangers': "NYR", 'Nashville': 'NSH', 'NY Islanders': 'NYI', | |
'Pittsburgh': 'PIT', 'San Jose': "S.J", 'Washington': 'WSH', 'Vancouver': 'VAN', 'Toronto': 'TOR', 'St Louis': 'STL', | |
'Tampa Bay': 'T.B', 'New Jersey': 'N.J', 'Montreal': "MTL", "Columbus": 'CBJ', 'Calgary': 'CGY', 'Carolina': "CAR", | |
'Buffalo': 'BUF', 'Winnipeg': 'WPG', 'Boston': 'BOS', 'Chicago': 'CHI', 'Colorado': 'COL', 'Los Angeles': 'L.A', | |
'Minnesota': 'MIN', 'Florida': 'FLA', 'Edmonton': 'EDM', 'Dallas': 'DAL', 'Detroit': 'DET', 'Anaheim': "ANA", | |
'Winnipeg': 'ATL'} | |
todaysTeams = [teamToAbrv[k] for k in teams if k in teamToAbrv] | |
teams = [t.lower() for t in teams] | |
# for each team we need to get their stats | |
# [Team, FenwickClose, Goals For, Goals Against, PP%, PK%, sh%, sv%, winstreak, standings, 5-5 F/A] | |
# FenwickClose | |
fenwickURL = 'http://behindthenet.ca/fenwick_2012.php' | |
request = urllib2.Request(fenwickURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
rows = soup.findAll('tr')[2:] | |
# loop through each row | |
for r in rows: | |
allTDs = r.findAll('td') | |
t = allTDs[0].text | |
# if a team in a row is a team we are looking for | |
# then store its appropriate value into the final data | |
if t in todaysTeams: | |
tKey = todaysTeams.index(t) | |
finalData[tKey][1] = allTDs[6].text | |
"""# goals for | |
gfURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsPerGame&viewName=goalsFor' | |
request = urllib2.Request(gfURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:] | |
# loop through each row | |
for r in rows: | |
allTDs = r.findAll('td') | |
t = allTDs[1].text.lower() | |
# if a team in a row is a team we are looking for | |
# then store its appropriate value into the final data | |
if t in teams: | |
tKey = teams.index(t) | |
finalData[tKey][2] = allTDs[14].text | |
# goals against | |
gaURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsAgainstPerGame&viewName=goalsAgainst' | |
request = urllib2.Request(gaURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:] | |
# loop through each row | |
for r in rows: | |
allTDs = r.findAll('td') | |
t = allTDs[1].text.lower() | |
# if a team in a row is a team we are looking for | |
# then store its appropriate value into the final data | |
if t in teams: | |
tKey = teams.index(t) | |
finalData[tKey][3] = allTDs[14].text | |
""" | |
# pp%, pk% | |
ppURL = "http://www.nhl.com/ice/teamstats.htm" | |
request = urllib2.Request(ppURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:] | |
# loop through each row | |
for r in rows: | |
allTDs = r.findAll('td') | |
t = allTDs[1].text.lower() | |
# if a team in a row is a team we are looking for | |
# then store its appropriate value into the final data | |
if t in teams: | |
tKey = teams.index(t) | |
finalData[tKey][4] = allTDs[11].text | |
finalData[tKey][5] = allTDs[12].text | |
# we can also get the teams 5-5 F/A Ratio | |
finalData[tKey][10] = allTDs[10].text | |
# sh%, #sv% | |
PDOurl = 'http://www.behindthenet.ca/2012/team_data3.php' | |
request = urllib2.Request(PDOurl) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
rows = soup.findAll('tr')[2:] | |
# loop through each row | |
for r in rows: | |
allTDs = r.findAll('td') | |
t = allTDs[0].text | |
# if a team in a row is a team we are looking for | |
# then store its appropriate value into the final data | |
if t in todaysTeams: | |
tKey = todaysTeams.index(t) | |
finalData[tKey][6] = allTDs[16].text.strip() | |
finalData[tKey][7] = allTDs[19].text.strip() | |
# win streak & standings | |
winURL = "http://www.tsn.ca/nhl/standings/" | |
request = urllib2.Request(winURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
rows = soup.findAll('table')[0] | |
rows2 = rows.findAll('tbody')[1] | |
rows = rows.findAll('tbody')[0] | |
rows = rows.findAll('tr')[0:8] + rows.findAll('tr')[9:16] | |
rows = rows + rows2.findAll('tr')[0:8] + rows2.findAll('tr')[9:16] | |
# loop through each row | |
for r in rows: | |
allTDs = r.findAll('td') | |
allTHs = r.findAll('th') | |
t = allTDs[0].findAll('a')[0].text.strip().lower() | |
# if a team in a row is a team we are looking for | |
# then store its appropriate value into the final data | |
if t in teams: | |
tKey = teams.index(t) | |
finalData[tKey][8] = allTDs[6].text | |
finalData[tKey][8] = finalData[tKey][8].replace('Lost', '-').replace('OT', '-') | |
finalData[tKey][8] = finalData[tKey][8].replace('Won', '').replace(' ', '').strip() | |
finalData[tKey][9] = allTHs[0].text | |
finalData[tKey][2] = allTHs[4].text # GF | |
finalData[tKey][3] = allTDs[4].text # GA | |
i = 0 | |
for t in teams: | |
finalData[i][0] = t.title() | |
i = i + 1 | |
print finalData | |
now = datetime.datetime.now() | |
fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailydata/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv" | |
myfile = open(fileName, 'wb') | |
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) | |
for line in finalData: | |
wr.writerow(line) | |
print "Wrote file" | |
# great, let's dump into a CSV |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib2 | |
import csv | |
import datetime | |
import sys | |
if len(sys.argv[1:]) == 0: | |
print "No supplied URLs" | |
sys.exit() | |
gameIDs = sys.argv[1:] | |
# [TeamAway, Win/Loss, GF, GA, ShFor, ShA] | |
# [TeamHome, Loss/Win, GA, GF, ShFor, ShA] | |
now = datetime.datetime.now() | |
fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailyscores/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv" | |
myfile = open(fileName, 'wb') | |
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) | |
# loop through all gameIDs | |
for g in gameIDs: | |
fenwickURL = "http://www.tsn.ca"+g | |
request = urllib2.Request(fenwickURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
box = soup.findAll('table', 'boxScore') # store the tables | |
col = len(box[0].findAll('tr')[1].findAll('td')) # count if it is OT or not | |
#print col | |
TeamAway = box[0].findAll('tr')[1].findAll('td')[0].text # get the home/away team name | |
TeamHome = box[0].findAll('tr')[2].findAll('td')[0].text | |
GF = box[0].findAll('tr')[1].findAll('td')[col-1].text # get the teams scores | |
GA = box[0].findAll('tr')[2].findAll('td')[col-1].text | |
statusAway = "Win" if GF > GA else "Loss" # determine who won | |
statusHome = "Loss" if statusAway == "Win" else "Win" | |
try: | |
isSO = box[0].findAll('tr')[0].findAll('th')[5].text | |
sub = 2 if isSO == 'SO' else 1 | |
except: | |
sub = 1 | |
ShFor = box[1].findAll('tr')[1].findAll('td')[col-sub].text # get shots for | |
ShA = box[1].findAll('tr')[2].findAll('td')[col-sub].text | |
Line1 = [TeamAway, statusAway, GF, GA, ShFor, ShA] # store them in the right format | |
Line2 = [TeamHome, statusHome, GA, GF, ShA, ShFor] | |
# output the lines | |
print Line1 | |
print Line2 | |
# write to csv | |
wr.writerow(Line1) | |
wr.writerow(Line2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib2 | |
import csv | |
import datetime | |
import sys | |
if len(sys.argv[1:]) == 0: | |
print "No supplied URLs" | |
sys.exit() | |
gameIDs = sys.argv[1:] | |
# [TeamAway, Win/Loss, GF, GA, ShFor, ShA] | |
# [TeamHome, Loss/Win, GA, GF, ShFor, ShA] | |
now = datetime.datetime.now() | |
fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailyscores/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv" | |
myfile = open(fileName, 'wb') | |
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) | |
# loop through all gameIDs | |
for g in gameIDs: | |
fenwickURL = "http://www.tsn.ca"+g | |
request = urllib2.Request(fenwickURL) | |
response = urllib2.urlopen(request) | |
the_page = response.read() | |
soup = BeautifulSoup(the_page) | |
box = soup.findAll('table', 'boxScore') # store the tables | |
col = len(box[0].findAll('tr')[1].findAll('td')) # count if it is OT or not | |
#print col | |
TeamAway = box[0].findAll('tr')[1].findAll('td')[0].text # get the home/away team name | |
TeamHome = box[0].findAll('tr')[2].findAll('td')[0].text | |
GF = box[0].findAll('tr')[1].findAll('td')[col-1].text # get the teams scores | |
GA = box[0].findAll('tr')[2].findAll('td')[col-1].text | |
statusAway = "Win" if GF > GA else "Loss" # determine who won | |
statusHome = "Loss" if statusAway == "Win" else "Win" | |
try: | |
isSO = box[0].findAll('tr')[0].findAll('th')[5].text | |
sub = 2 if isSO == 'SO' else 1 | |
except: | |
sub = 1 | |
ShFor = box[1].findAll('tr')[1].findAll('td')[col-sub].text # get shots for | |
ShA = box[1].findAll('tr')[2].findAll('td')[col-sub].text | |
Line1 = [TeamAway, statusAway, GF, GA, ShFor, ShA] # store them in the right format | |
Line2 = [TeamHome, statusHome, GA, GF, ShA, ShFor] | |
# output the lines | |
print Line1 | |
print Line2 | |
# write to csv | |
wr.writerow(Line1) | |
wr.writerow(Line2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment