Skip to content

Instantly share code, notes, and snippets.

@aasensio
Created April 7, 2020 13:19
Show Gist options
  • Save aasensio/a64e08e2f6785771bf4d37376ec7076d to your computer and use it in GitHub Desktop.
Save aasensio/a64e08e2f6785771bf4d37376ec7076d to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import matplotlib.pyplot as pl
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.gaussian_process.kernels import DotProduct
def calculate(which, ax):
print("Reading data...")
if (which == 0):
tmp = pd.read_csv('https://raw.githubusercontent.com/datadista/datasets/master/COVID%2019/ccaa_covid19_fallecidos.csv').values
label = 'Fallecidos'
left = 3
else:
tmp = pd.read_csv('https://raw.githubusercontent.com/datadista/datasets/master/COVID%2019/ccaa_covid19_casos.csv').values
label = 'Casos'
left = 8
print("Done")
tmp = tmp[-1,left:].astype('float')
y = tmp
n = len(y)
dy = np.sqrt(y)
logy = np.log(y)
dlogy = dy / y
x = np.atleast_2d(np.linspace(0,n,n)).T
kernel = C(1.0, (1e-3, 1e3)) * RBF(1, (1e-2, 1e2))
# kernel = DotProduct()
print("Computing Gaussian Process...")
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9, alpha=dlogy)
print("Fitting Gaussian Process...")
gp.fit(x, logy)
print("Computing derivatives and samples...")
xnew = np.atleast_2d(np.linspace(0,n+10,200)).T
logy_pred, sigma = gp.predict(xnew, return_std=True)
logy_samples = gp.sample_y(xnew, 1000)
y_samples = np.exp(logy_samples)
dy1 = np.gradient(logy_samples, 1, axis=0)
dy2 = np.gradient(dy1, 1, axis=0)
print("Doing plots...")
ax[0].semilogy(x, y, '.', color='C0')
ax[0].errorbar(x, y, linestyle='', yerr=dlogy, color='C0', capsize=2)
ax[0].semilogy(xnew, y_samples, color='C1', alpha=0.01)
ax[1].plot(xnew, dy1, color='C0', alpha=0.01)
ax[1].axvline(n, linestyle='--')
ax[2].plot(xnew, dy2, color='C0', alpha=0.01)
ax[2].axhline(0.0, linestyle='--')
ax[2].axvline(n, linestyle='--')
for i in range(4):
ax[i].set_xlabel('Días desde 4/03/20')
ax[0].set_ylabel(f'{label}')
ax[1].set_ylabel('Pendiente')
ax[1].set_ylim([0,0.07])
ax[2].set_ylabel('Pendiente de la pendiente')
print("Computing peaks...")
xnew = np.atleast_2d(np.linspace(0,n+40,300)).T
logy_samples = gp.sample_y(xnew, 2000)
dy1 = np.gradient(logy_samples, 1, axis=0)
zero = xnew[np.argmin(np.abs(dy1),axis=0)]
ax[3].hist(zero, bins=np.arange(n+20), density=True)
ax[3].axvline(n, linestyle='--')
ax[3].set_xlim([0, 50])
ax[3].set_ylabel('Probabilidad llegada al pico')
tmp = np.argmin(np.abs(dy1), axis=0)
y_samples = np.exp(logy_samples)
maxim = np.zeros(2000)
for i in range(2000):
maxim[i] = y_samples[tmp[i], i]
ax[4].hist(maxim, bins=30, density=True, range=[0,50000])
ax[4].set_xlabel('Fallecidos')
ax[4].set_ylabel('Probabilidad')
ax[4].set_xlim([0,40000])
if (__name__ == '__main__'):
f, ax = pl.subplots(nrows=1, ncols=5, figsize=(18,5))
calculate(0, ax[:])
# calculate(1, ax[1,:])
print("Finalizing plot")
pl.tight_layout()
pl.show()
pl.savefig('coronavirus.png')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment