jweissbock · April 24, 2013 01:53
diff --git a/dailyData.py b/dailyData.py
 from bs4 import BeautifulSoup
 import urllib2
 import csv
 import datetime
 import sys

 """teams = ['Washington', 'Boston', 'NY Rangers', 'Pittsburgh', 'Ottawa', 'Buffalo', 'Minnesota', 'Colorado', 'Winnipeg', 'Toronto', 'Montreal',
  	'New Jersey', 'Carolina', 'Tampa Bay', 'Phoenix', 'Columbus', 'NY Islanders', 'Florida', 'Anaheim', 'St Louis', 'Chicago',
 		'Dallas', 'Detroit', 'Vancouver', 'San Jose', 'Los Angeles']"""

 teams = sys.argv[1:]

 if len(teams) == 0:
 	print "No supplied teams"
 	sys.exit()

 # so far doesnt like NY Islanders, and CGY but only in behindthenet sites

 finalData = [[None]*11 for _ in range(len(teams))]

 teamToAbrv = {'Phoenix': "PHX", 'Philadelphia': 'PHI', 'Ottawa': 'OTT', 'NY Rangers': "NYR", 'Nashville': 'NSH', 'NY Islanders': 'NYI',
 			'Pittsburgh': 'PIT', 'San Jose': "S.J", 'Washington': 'WSH', 'Vancouver': 'VAN', 'Toronto': 'TOR', 'St Louis': 'STL',
 			'Tampa Bay': 'T.B', 'New Jersey': 'N.J', 'Montreal': "MTL", "Columbus": 'CBJ', 'Calgary': 'CGY', 'Carolina': "CAR",
 			'Buffalo': 'BUF', 'Winnipeg': 'WPG', 'Boston': 'BOS', 'Chicago': 'CHI', 'Colorado': 'COL', 'Los Angeles': 'L.A',
 			'Minnesota': 'MIN', 'Florida': 'FLA', 'Edmonton': 'EDM', 'Dallas': 'DAL', 'Detroit': 'DET', 'Anaheim': "ANA",
 			'Winnipeg': 'ATL'}

 todaysTeams = [teamToAbrv[k] for k in teams if k in teamToAbrv]

 teams = [t.lower() for t in teams]

 # for each team we need to get their stats
 # [Team, FenwickClose, Goals For, Goals Against, PP%, PK%, sh%, sv%, winstreak, standings, 5-5 F/A]

 # FenwickClose
 fenwickURL = 'http://behindthenet.ca/fenwick_2012.php'

 request = urllib2.Request(fenwickURL)
 response = urllib2.urlopen(request)

 the_page = response.read()
 soup = BeautifulSoup(the_page)

 rows = soup.findAll('tr')[2:]
 # loop through each row
 for r in rows:
 	allTDs = r.findAll('td')
 	t = allTDs[0].text 
 	# if a team in a row is a team we are looking for
 	# then store its appropriate value into the final data
 	if t in todaysTeams:
 		tKey = todaysTeams.index(t)
 		finalData[tKey][1] = allTDs[6].text

 """# goals for
 gfURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsPerGame&viewName=goalsFor'

 request = urllib2.Request(gfURL)
 response = urllib2.urlopen(request)

 the_page = response.read()
 soup = BeautifulSoup(the_page)

 rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
 # loop through each row
 for r in rows:
 	allTDs = r.findAll('td')
 	t = allTDs[1].text.lower()
 	# if a team in a row is a team we are looking for
 	# then store its appropriate value into the final data
 	if t in teams:
 		tKey = teams.index(t)
 		finalData[tKey][2] = allTDs[14].text

 # goals against
 gaURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsAgainstPerGame&viewName=goalsAgainst'

 request = urllib2.Request(gaURL)
 response = urllib2.urlopen(request)

 the_page = response.read()
 soup = BeautifulSoup(the_page)

 rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
 # loop through each row
 for r in rows:
 	allTDs = r.findAll('td')
 	t = allTDs[1].text.lower()
 	# if a team in a row is a team we are looking for
 	# then store its appropriate value into the final data
 	if t in teams:
 		tKey = teams.index(t)
 		finalData[tKey][3] = allTDs[14].text
 """
 # pp%, pk%
 ppURL = "http://www.nhl.com/ice/teamstats.htm"

 request = urllib2.Request(ppURL)
 response = urllib2.urlopen(request)

 the_page = response.read()
 soup = BeautifulSoup(the_page)

 rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
 # loop through each row
 for r in rows:
 	allTDs = r.findAll('td')
 	t = allTDs[1].text.lower()
 	# if a team in a row is a team we are looking for
 	# then store its appropriate value into the final data
 	if t in teams:
 		tKey = teams.index(t)
 		finalData[tKey][4] = allTDs[11].text
 		finalData[tKey][5] = allTDs[12].text
 		# we can also get the teams 5-5 F/A Ratio
 		finalData[tKey][10] = allTDs[10].text

 # sh%, #sv%
 PDOurl = 'http://www.behindthenet.ca/2012/team_data3.php'
 request = urllib2.Request(PDOurl)
 response = urllib2.urlopen(request)

 the_page = response.read()
 soup = BeautifulSoup(the_page)

 rows = soup.findAll('tr')[2:]
 # loop through each row
 for r in rows:
 	allTDs = r.findAll('td')
 	t = allTDs[0].text 
 	# if a team in a row is a team we are looking for
 	# then store its appropriate value into the final data
 	if t in todaysTeams:
 		tKey = todaysTeams.index(t)
 		finalData[tKey][6] = allTDs[16].text.strip()
 		finalData[tKey][7] = allTDs[19].text.strip()

 # win streak & standings
 winURL = "http://www.tsn.ca/nhl/standings/"

 request = urllib2.Request(winURL)
 response = urllib2.urlopen(request)

 the_page = response.read()
 soup = BeautifulSoup(the_page)

 rows = soup.findAll('table')[0]
 rows2 = rows.findAll('tbody')[1]
 rows = rows.findAll('tbody')[0]
 rows = rows.findAll('tr')[0:8] + rows.findAll('tr')[9:16]
 rows = rows + rows2.findAll('tr')[0:8] + rows2.findAll('tr')[9:16]
 # loop through each row
 for r in rows:
 	allTDs = r.findAll('td')
 	allTHs = r.findAll('th')
 	t = allTDs[0].findAll('a')[0].text.strip().lower()
 	# if a team in a row is a team we are looking for
 	# then store its appropriate value into the final data
 	if t in teams:
 		tKey = teams.index(t)
 		finalData[tKey][8] = allTDs[6].text
 		finalData[tKey][8] = finalData[tKey][8].replace('Lost', '-').replace('OT', '-')
 		finalData[tKey][8] = finalData[tKey][8].replace('Won', '').replace(' ', '').strip()
 		finalData[tKey][9] = allTHs[0].text

 		finalData[tKey][2] = allTHs[4].text # GF
 		finalData[tKey][3] = allTDs[4].text # GA

 i = 0
 for t in teams:
 	finalData[i][0] = t.title()
 	i = i + 1

 print finalData

 now = datetime.datetime.now()

 fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailydata/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

 myfile = open(fileName, 'wb')
 wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
 for line in finalData:
 	wr.writerow(line)

 print "Wrote file"

 # great, let's dump into a CSV
diff --git a/dailyScores.py b/dailyScores.py
 from bs4 import BeautifulSoup
 import urllib2
 import csv
 import datetime
 import sys

 if len(sys.argv[1:]) == 0:
  print "No supplied URLs"
 	sys.exit()

 gameIDs = sys.argv[1:]

 # [TeamAway, Win/Loss, GF, GA, ShFor, ShA]
 # [TeamHome, Loss/Win, GA, GF, ShFor, ShA]

 now = datetime.datetime.now()
 fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailyscores/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

 myfile = open(fileName, 'wb')
 wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)

 # loop through all gameIDs
 for g in gameIDs:
 	fenwickURL = "http://www.tsn.ca"+g

 	request = urllib2.Request(fenwickURL)
 	response = urllib2.urlopen(request)

 	the_page = response.read()
 	soup = BeautifulSoup(the_page)

 	box = soup.findAll('table', 'boxScore') # store the tables
 	col = len(box[0].findAll('tr')[1].findAll('td')) # count if it is OT or not
 	#print col

 	TeamAway = box[0].findAll('tr')[1].findAll('td')[0].text # get the home/away team name
 	TeamHome = box[0].findAll('tr')[2].findAll('td')[0].text	

 	GF = box[0].findAll('tr')[1].findAll('td')[col-1].text # get the teams scores
 	GA = box[0].findAll('tr')[2].findAll('td')[col-1].text

 	statusAway = "Win" if GF > GA else "Loss"				# determine who won
 	statusHome = "Loss" if statusAway == "Win" else "Win"

 	try:
 		isSO = box[0].findAll('tr')[0].findAll('th')[5].text
 		sub = 2 if isSO == 'SO' else 1
 	except:
 		sub = 1

 	ShFor = box[1].findAll('tr')[1].findAll('td')[col-sub].text # get shots for
 	ShA = box[1].findAll('tr')[2].findAll('td')[col-sub].text

 	Line1 = [TeamAway, statusAway, GF, GA, ShFor, ShA] # store them in the right format
 	Line2 = [TeamHome, statusHome, GA, GF, ShA, ShFor]

 	# output the lines
 	print Line1
 	print Line2

 	# write to csv
 	wr.writerow(Line1)
 	wr.writerow(Line2)
diff --git a/getData.py b/getData.py
 from bs4 import BeautifulSoup
 import urllib2
 import csv
 import datetime
 import sys

 if len(sys.argv[1:]) == 0:
  print "No supplied URLs"
 	sys.exit()

 gameIDs = sys.argv[1:]

 # [TeamAway, Win/Loss, GF, GA, ShFor, ShA]
 # [TeamHome, Loss/Win, GA, GF, ShFor, ShA]

 now = datetime.datetime.now()
 fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailyscores/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

 myfile = open(fileName, 'wb')
 wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)

 # loop through all gameIDs
 for g in gameIDs:
 	fenwickURL = "http://www.tsn.ca"+g

 	request = urllib2.Request(fenwickURL)
 	response = urllib2.urlopen(request)

 	the_page = response.read()
 	soup = BeautifulSoup(the_page)

 	box = soup.findAll('table', 'boxScore') # store the tables
 	col = len(box[0].findAll('tr')[1].findAll('td')) # count if it is OT or not
 	#print col

 	TeamAway = box[0].findAll('tr')[1].findAll('td')[0].text # get the home/away team name
 	TeamHome = box[0].findAll('tr')[2].findAll('td')[0].text	

 	GF = box[0].findAll('tr')[1].findAll('td')[col-1].text # get the teams scores
 	GA = box[0].findAll('tr')[2].findAll('td')[col-1].text

 	statusAway = "Win" if GF > GA else "Loss"				# determine who won
 	statusHome = "Loss" if statusAway == "Win" else "Win"

 	try:
 		isSO = box[0].findAll('tr')[0].findAll('th')[5].text
 		sub = 2 if isSO == 'SO' else 1
 	except:
 		sub = 1

 	ShFor = box[1].findAll('tr')[1].findAll('td')[col-sub].text # get shots for
 	ShA = box[1].findAll('tr')[2].findAll('td')[col-sub].text

 	Line1 = [TeamAway, statusAway, GF, GA, ShFor, ShA] # store them in the right format
 	Line2 = [TeamHome, statusHome, GA, GF, ShA, ShFor]

 	# output the lines
 	print Line1
 	print Line2

 	# write to csv
 	wr.writerow(Line1)
 	wr.writerow(Line2)
	from bs4 import BeautifulSoup
	import urllib2
	import csv
	import datetime
	import sys

	"""teams = ['Washington', 'Boston', 'NY Rangers', 'Pittsburgh', 'Ottawa', 'Buffalo', 'Minnesota', 'Colorado', 'Winnipeg', 'Toronto', 'Montreal',
	'New Jersey', 'Carolina', 'Tampa Bay', 'Phoenix', 'Columbus', 'NY Islanders', 'Florida', 'Anaheim', 'St Louis', 'Chicago',
	'Dallas', 'Detroit', 'Vancouver', 'San Jose', 'Los Angeles']"""

	teams = sys.argv[1:]

	if len(teams) == 0:
	print "No supplied teams"
	sys.exit()

	# so far doesnt like NY Islanders, and CGY but only in behindthenet sites

	finalData = [[None]*11 for _ in range(len(teams))]

	teamToAbrv = {'Phoenix': "PHX", 'Philadelphia': 'PHI', 'Ottawa': 'OTT', 'NY Rangers': "NYR", 'Nashville': 'NSH', 'NY Islanders': 'NYI',
	'Pittsburgh': 'PIT', 'San Jose': "S.J", 'Washington': 'WSH', 'Vancouver': 'VAN', 'Toronto': 'TOR', 'St Louis': 'STL',
	'Tampa Bay': 'T.B', 'New Jersey': 'N.J', 'Montreal': "MTL", "Columbus": 'CBJ', 'Calgary': 'CGY', 'Carolina': "CAR",
	'Buffalo': 'BUF', 'Winnipeg': 'WPG', 'Boston': 'BOS', 'Chicago': 'CHI', 'Colorado': 'COL', 'Los Angeles': 'L.A',
	'Minnesota': 'MIN', 'Florida': 'FLA', 'Edmonton': 'EDM', 'Dallas': 'DAL', 'Detroit': 'DET', 'Anaheim': "ANA",
	'Winnipeg': 'ATL'}

	todaysTeams = [teamToAbrv[k] for k in teams if k in teamToAbrv]

	teams = [t.lower() for t in teams]

	# for each team we need to get their stats
	# [Team, FenwickClose, Goals For, Goals Against, PP%, PK%, sh%, sv%, winstreak, standings, 5-5 F/A]

	# FenwickClose
	fenwickURL = 'http://behindthenet.ca/fenwick_2012.php'

	request = urllib2.Request(fenwickURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[0].text
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in todaysTeams:
	tKey = todaysTeams.index(t)
	finalData[tKey][1] = allTDs[6].text

	"""# goals for
	gfURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsPerGame&viewName=goalsFor'

	request = urllib2.Request(gfURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][2] = allTDs[14].text

	# goals against
	gaURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsAgainstPerGame&viewName=goalsAgainst'

	request = urllib2.Request(gaURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][3] = allTDs[14].text
	"""
	# pp%, pk%
	ppURL = "http://www.nhl.com/ice/teamstats.htm"

	request = urllib2.Request(ppURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][4] = allTDs[11].text
	finalData[tKey][5] = allTDs[12].text
	# we can also get the teams 5-5 F/A Ratio
	finalData[tKey][10] = allTDs[10].text

	# sh%, #sv%
	PDOurl = 'http://www.behindthenet.ca/2012/team_data3.php'
	request = urllib2.Request(PDOurl)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[0].text
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in todaysTeams:
	tKey = todaysTeams.index(t)
	finalData[tKey][6] = allTDs[16].text.strip()
	finalData[tKey][7] = allTDs[19].text.strip()

	# win streak & standings
	winURL = "http://www.tsn.ca/nhl/standings/"

	request = urllib2.Request(winURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table')[0]
	rows2 = rows.findAll('tbody')[1]
	rows = rows.findAll('tbody')[0]
	rows = rows.findAll('tr')[0:8] + rows.findAll('tr')[9:16]
	rows = rows + rows2.findAll('tr')[0:8] + rows2.findAll('tr')[9:16]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	allTHs = r.findAll('th')
	t = allTDs[0].findAll('a')[0].text.strip().lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][8] = allTDs[6].text
	finalData[tKey][8] = finalData[tKey][8].replace('Lost', '-').replace('OT', '-')
	finalData[tKey][8] = finalData[tKey][8].replace('Won', '').replace(' ', '').strip()
	finalData[tKey][9] = allTHs[0].text

	finalData[tKey][2] = allTHs[4].text # GF
	finalData[tKey][3] = allTDs[4].text # GA

	i = 0
	for t in teams:
	finalData[i][0] = t.title()
	i = i + 1

	print finalData

	now = datetime.datetime.now()

	fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailydata/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

	myfile = open(fileName, 'wb')
	wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
	for line in finalData:
	wr.writerow(line)

	print "Wrote file"

	# great, let's dump into a CSV