Created
March 7, 2022 09:49
-
-
Save phase7/1cd45759d0dbe65bee23fca606bbbd43 to your computer and use it in GitHub Desktop.
Data Wrangling Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import partial | |
from io import BytesIO | |
from zipfile import ZipFile | |
import pandas as pd | |
import requests | |
zipurl = "https://eco2mix.rte-france.com/download/eco2mix/eCO2mix_RTE_energie_M.zip" | |
resp = requests.get(zipurl) | |
unzipped = ZipFile(BytesIO(resp.content)) | |
xls_data = unzipped.open('eCO2mix_RTE_energie_M.xls').read() | |
cols = ('Mois', | |
'Territoire', | |
'Production totale', | |
'Production nucléaire', | |
'Production thermique totale', | |
'Production thermique charbon', | |
'Production thermique fioul', | |
'Production thermique gaz', | |
'Production hydraulique', | |
'Production éolien', | |
'Production solaire', | |
'Production bioénergies', | |
'Consommation totale', | |
'Solde exportateur', | |
'Echanges export', | |
'Echanges import', | |
'Echanges avec le Royaume-Uni', | |
"Echanges avec l'Espagne", | |
"Echanges avec l'Italie", | |
'Echanges avec la Suisse', | |
"Echanges avec l'Allemagne et la Belgique") | |
df = pd.read_csv(BytesIO(xls_data), header=0, usecols=cols, | |
index_col="Mois", encoding="latin-1", delimiter="\t") | |
df = df[df['Territoire'] == "France"] | |
df = df.drop('Territoire', axis=1) | |
KEY_MAP = {'Production totale': 'pro-tot', | |
'Production nucléaire': 'nuc', | |
'Production thermique totale': 'thermal', | |
'Production thermique charbon': 'coal', | |
'Production thermique fioul': 'oil', | |
'Production thermique gaz': 'gas', | |
'Production hydraulique': 'hydro', | |
'Production éolien': 'wind', | |
'Production solaire': 'solar', | |
'Production bioénergies': 'bioenergy', | |
'Consommation totale': 'con', | |
'Solde exportateur': 'export-net', | |
'Echanges export': 'export', | |
'Echanges import': 'import', | |
'Echanges avec le Royaume-Uni': 'fr>uk', | |
"Echanges avec l'Espagne": 'fr>es', | |
"Echanges avec l'Italie": 'fr>it', | |
'Echanges avec la Suisse': 'fr>ch', | |
"Echanges avec l'Allemagne et la Belgique": 'fr>de-be'} | |
dt_transform = partial(pd.to_datetime, format="%Y-%m") | |
df.index = df.index.map(dt_transform) | |
df.columns = df.columns.map(KEY_MAP.get) | |
# latest 5 month | |
for key, time_val in df.tail().iteritems(): | |
print(key) | |
for time, val in time_val.items(): | |
print("\t", time.to_pydatetime().date(), val) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment