ryanpitts · March 4, 2020 01:12 · ryanpitts · Mar 4, 2020
diff --git a/scraper.py b/scraper.py
 import requests
 import bs4
 import csv

 URL = 'http://www.dllr.state.md.us/employment/warn.shtml'
 warn_page = requests.get(URL)

 soup = bs4.BeautifulSoup(warn_page.text, 'html.parser')
 table = soup.find('table')
 rows = table.find_all('tr')

 HEADERS = ['warn_date', 'naics_code', 'biz', 'address', 'wia_code', 'total_employees', 'effective_date', 'type_code']

 with open('warn-data.csv', 'w', newline='') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(HEADERS)

    for row in rows[1:]:
        cells = row.find_all('td')
        warn_date = cells[0].text.strip()
        naics_code = cells[1].text.strip()
        biz = cells[2].text.strip().split()
        clean_biz = ' '.join(biz)
        address = cells[3].text.strip().split()
        clean_address = ' '.join(address)
        wia_code = cells[4].text.strip()
        total_employees = cells[5].text.strip()
        effective_date = cells[6].text.strip()
        type_code = cells[7].text.strip()

        data_out = [warn_date, naics_code, clean_biz, clean_address, wia_code, total_employees, effective_date, type_code]
        writer.writerow(data_out)
	import requests
	import bs4
	import csv

	URL = 'http://www.dllr.state.md.us/employment/warn.shtml'
	warn_page = requests.get(URL)

	soup = bs4.BeautifulSoup(warn_page.text, 'html.parser')
	table = soup.find('table')
	rows = table.find_all('tr')

	HEADERS = ['warn_date', 'naics_code', 'biz', 'address', 'wia_code', 'total_employees', 'effective_date', 'type_code']

	with open('warn-data.csv', 'w', newline='') as outfile:
	writer = csv.writer(outfile)
	writer.writerow(HEADERS)

	for row in rows[1:]:
	cells = row.find_all('td')
	warn_date = cells[0].text.strip()
	naics_code = cells[1].text.strip()
	biz = cells[2].text.strip().split()
	clean_biz = ' '.join(biz)
	address = cells[3].text.strip().split()
	clean_address = ' '.join(address)
	wia_code = cells[4].text.strip()
	total_employees = cells[5].text.strip()
	effective_date = cells[6].text.strip()
	type_code = cells[7].text.strip()

	data_out = [warn_date, naics_code, clean_biz, clean_address, wia_code, total_employees, effective_date, type_code]
	writer.writerow(data_out)
No results found