pierrelouisbescond · February 28, 2025 15:10
diff --git a/fPCA_data_preparation.py b/fPCA_data_preparation.py
 import pandas as pd
 import numpy as np

 # Import the CSV file with only useful columns
 # source: https://www.data.gouv.fr/fr/datasets/temperature-quotidienne-departementale-depuis-janvier-2018/
 df = pd.read_csv("temperature-quotidienne-departementale.csv", sep=";", usecols=[0,1,4])

 # Rename columns to simplify syntax
 df = df.rename(columns={"Code INSEE département": "Region", "TMax (°C)": "Temp"})

 # Select 2019 records only
 df = df[(df["Date"]>="2019-01-01") & (df["Date"]<="2019-12-31")]

 # Pivot table to get "Date" as index and regions as columns 
 df = df.pivot(index='Date', columns='Region', values='Temp')

 # Select a set of regions across France
 df = df[["06","25","59","62","83","85","75"]]

 display(df)

 # Convert the Pandas dataframe to a Numpy array with time-series only
 f = df.to_numpy().astype(float)

 # Create a float vector between 0 and 1 for time index
 time = np.linspace(0,1,len(f))
	import pandas as pd
	import numpy as np

	# Import the CSV file with only useful columns
	# source: https://www.data.gouv.fr/fr/datasets/temperature-quotidienne-departementale-depuis-janvier-2018/
	df = pd.read_csv("temperature-quotidienne-departementale.csv", sep=";", usecols=[0,1,4])

	# Rename columns to simplify syntax
	df = df.rename(columns={"Code INSEE département": "Region", "TMax (°C)": "Temp"})

	# Select 2019 records only
	df = df[(df["Date"]>="2019-01-01") & (df["Date"]<="2019-12-31")]

	# Pivot table to get "Date" as index and regions as columns
	df = df.pivot(index='Date', columns='Region', values='Temp')

	# Select a set of regions across France
	df = df[["06","25","59","62","83","85","75"]]

	display(df)

	# Convert the Pandas dataframe to a Numpy array with time-series only
	f = df.to_numpy().astype(float)

	# Create a float vector between 0 and 1 for time index
	time = np.linspace(0,1,len(f))