Created
March 10, 2023 19:48
-
-
Save MoisesTedeschi/bc23747dcd0f5549297072fde3e6942c to your computer and use it in GitHub Desktop.
Raspagem de dados - Fundos Imobiliários do site "fundsexplorer".
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import requests | |
import time | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
url = 'https://www.fundsexplorer.com.br/ranking' | |
#Libs necessárias: | |
# pip install requests | |
# pip install beautifulsoup4 | |
# Data Cleansing | |
# 'R$' => '' | |
# '%' => '' | |
# '.0' => '' | |
# '.' => '' | |
# ',' => '.' | |
# 'N/A' => '' | |
print("Starting...{}".format(datetime.now())) | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, "html.parser") | |
data = [] | |
table = soup.find(id="table-ranking") | |
table_head = table.find('thead') | |
rows = table_head.find_all('tr') | |
for row in rows: | |
cols = row.find_all('th') | |
colsd = [ele.get_text(separator=" ").strip() for ele in cols] | |
data.append([ele for ele in colsd]) | |
table_body = table.find('tbody') | |
rows = table_body.find_all('tr') | |
for row in rows: | |
cols = row.find_all('td') | |
colsd = [ele.text.replace('R$','').replace('%','').replace('.0','').replace('.','').replace('N/A','').replace(',','.').strip() for ele in cols] | |
data.append([ele for ele in colsd]) | |
file = open("fii.csv", "w") | |
wtr = csv.writer(file, delimiter=';', lineterminator='\n') | |
for x in data : wtr.writerow(x) | |
file.close() | |
print("Finish...{}".format(datetime.now())) | |
time.sleep(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment