Skip to content

Instantly share code, notes, and snippets.

@hybridjosto
Last active August 6, 2018 11:22
Show Gist options
  • Select an option

  • Save hybridjosto/4573849 to your computer and use it in GitHub Desktop.

Select an option

Save hybridjosto/4573849 to your computer and use it in GitHub Desktop.
scraping data from a web table using python and Beautiful Soup
import urllib2
from bs4 import BeautifulSoup
# http://segfault.in/2010/07/parsing-html-table-in-python-with-beautifulsoup/
f = open('cricket-data.txt','w')
linksFile = open("linksSource.txt")
lines = list(linksFile.readlines())
for i in lines[12:108]: #12:108
url = "http://www.gunnercricket.com/"+str(i)
try:
page = urllib2.urlopen(url)
except:
continue
soup = BeautifulSoup(page)
title = soup.title
date = title.string[:4]+',' #take first 4 characters from title
try:
table = soup.find('table')
rows = table.findAll('tr')
for tr in rows:
cols = tr.findAll('td')
text_data = []
for td in cols:
text = ''.join(td)
utftext = str(text.encode('utf-8'))
text_data.append(utftext) # EDIT
text = date+','.join(text_data)
f.write(text + '\n')
except:
pass
f.close()
@sxb1649
Copy link
Copy Markdown

sxb1649 commented Jan 15, 2018

import pandas as pd
from pandas import Series, DataFrame

from bs4 import BeautifulSoup
import json
import csv

import requests

import lxml

url = "http://espn.go.com/college-football/bcs/_/year/2013 "

result = requests.get(url)

c= result.content
soup = BeautifulSoup((c), "lxml")

soup.prettify()

summary = soup.find('table',attrs = {'class':'tablehead'})
tables = summary.find_all('table')

#tables = summary.fins_all('td' /'tr')

data =[]

rows = tables[0].findAll('tr')
'''
for tr in rows:
cols = tr.findAll('td')
for td in cols:
text = td.find(text = True)
print (text),
data.append(text)
'''
soup = BeautifulSoup((html), "lxml")
table = soup.find('table', attrs = {'class' : 'tablehead'})

list_of_rows=[]

for row in table.findAll('tr')[0:]:
list_of_cells=[]
for cell in findAll('td'):
text = cell.text.replace(' ','')
list_of_cells.append(text)
list_of_rows.append(list_of_cells)

outfile = open("./Rankings.csv", "wb")
writer = csv.writer(outfile)
writer.writerows(list_of_rows)

Can please you help me with this code? Am using python 3.5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment