Skip to content

Instantly share code, notes, and snippets.

@zhensongren
Created July 14, 2025 19:37
Show Gist options
  • Save zhensongren/2820add681ad76ee5c53e0a9f0daf4fb to your computer and use it in GitHub Desktop.
Save zhensongren/2820add681ad76ee5c53e0a9f0daf4fb to your computer and use it in GitHub Desktop.
ccf
# %% [markdown]
# A 4×4 grid of scatter plots
# Each showing the relationship between lagged ts1 (predictor) and ts2 (target)
# %%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from darts import TimeSeries
from sklearn.linear_model import LinearRegression
# Step 1: Simulate two Darts TimeSeries
np.random.seed(42)
n = 100
dates = pd.date_range(start="2020-01-01", periods=n, freq="D")
series_1 = pd.Series(np.random.normal(50, 10, size=n), index=dates)
series_2 = pd.Series(0.7 * series_1.shift(2).fillna(method='bfill') + np.random.normal(0, 5, size=n), index=dates)
ts_1 = TimeSeries.from_series(series_1.rename("ts1"))
ts_2 = TimeSeries.from_series(series_2.rename("ts2"))
# Step 2: Plotting function using Darts TimeSeries
def darts_cross_corr_plot(ts_x: TimeSeries, ts_y: TimeSeries, max_lag: int = 12):
df_x = ts_x.to_dataframe()
df_y = ts_y.to_dataframe()
target_col = df_y.columns[0]
input_col = df_x.columns[0]
fig, axes = plt.subplots(4, 4, figsize=(16, 12))
axes = axes.flatten()
for lag in range(max_lag + 1):
x_lagged = df_x.shift(lag)
df_comb = pd.concat([x_lagged, df_y], axis=1).dropna()
x_vals = df_comb[input_col].values.reshape(-1, 1)
y_vals = df_comb[target_col].values
ax = axes[lag]
sns.scatterplot(x=x_vals.flatten(), y=y_vals, ax=ax, s=20, color="teal")
model = LinearRegression().fit(x_vals, y_vals)
x_line = np.linspace(x_vals.min(), x_vals.max(), 100).reshape(-1, 1)
y_line = model.predict(x_line)
ax.plot(x_line, y_line, color='red', linewidth=1)
corr = np.corrcoef(x_vals.flatten(), y_vals)[0, 1]
ax.set_title(f"{input_col}(t-{lag}), r={corr:.2f}")
for i in range(max_lag + 1, len(axes)):
fig.delaxes(axes[i])
plt.tight_layout()
plt.show()
# Step 3: Use the function
darts_cross_corr_plot(ts_x=ts_1, ts_y=ts_2, max_lag=12)
# %% [markdown]
# Generate cross correlation plots from pandas dataframe series.
# %%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
# Step 1: Create example time series
np.random.seed(42)
n = 150
rain = np.random.gamma(shape=2.0, scale=15.0, size=n)
ndvi = 0.5 * np.roll(rain, 1) + np.random.normal(0, 10, size=n)
df = pd.DataFrame({'rain': rain, 'ndvi': ndvi})
# Step 2: Define plot function
def cross_corr_plot_matrix(df, x_col, y_col, max_lag=12):
fig, axes = plt.subplots(4, 4, figsize=(16, 12))
axes = axes.flatten()
for lag in range(max_lag + 1):
x_lagged = df[x_col].shift(lag)
mask = x_lagged.notna() & df[y_col].notna()
x_vals = x_lagged[mask].values.reshape(-1, 1)
y_vals = df[y_col][mask].values
ax = axes[lag]
sns.scatterplot(x=x_vals.flatten(), y=y_vals, ax=ax, s=20, color='black')
# Regression line
model = LinearRegression().fit(x_vals, y_vals)
x_line = np.linspace(x_vals.min(), x_vals.max(), 100).reshape(-1, 1)
y_line = model.predict(x_line)
ax.plot(x_line, y_line, color='red', linewidth=1)
# Pearson correlation
corr = np.corrcoef(x_vals.flatten(), y_vals)[0, 1]
ax.set_title(f"{x_col}(t-{lag}), r={corr:.2f}")
# Hide unused subplots
for i in range(max_lag + 1, len(axes)):
fig.delaxes(axes[i])
plt.tight_layout()
plt.show()
# Step 3: Run the plot
cross_corr_plot_matrix(df, x_col='rain', y_col='ndvi', max_lag=12)
# %%
# %%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import ccf
# Simulated example data
np.random.seed(42)
n = 100
# rain is the leading variable
rain = np.random.normal(10, 2, n)
# NDVI lags behind rain; create ndvi with roll and lag 2 and add noise
ndvi = np.roll(rain, 2) + np.random.normal(0, 1, n) # NDVI lags behind rain
data = pd.DataFrame({'rain': rain, 'ndvi': ndvi})
# Standardize the series (recommended before computing CCF)
rain_std = (data['rain'] - np.mean(data['rain'])) / np.std(data['rain'])
ndvi_std = (data['ndvi'] - np.mean(data['ndvi'])) / np.std(data['ndvi'])
# Compute cross-correlation for full range of lags
max_lag = 20
lags = np.arange(-max_lag, max_lag + 1)
ccf_full = []
for lag in lags:
if lag < 0:
x = rain_std[:lag]
y = ndvi_std[-lag:]
elif lag > 0:
x = rain_std[lag:]
y = ndvi_std[:-lag]
else:
x = rain_std
y = ndvi_std
corr = np.corrcoef(x, y)[0, 1]
ccf_full.append(corr)
# Plot
plt.figure(figsize=(10, 5))
plt.stem(lags, ccf_full)
plt.axhline(0, color='black')
conf_level = 2 / np.sqrt(n) # 95% confidence bound (approx)
plt.axhline(conf_level, color='blue', linestyle='--')
plt.axhline(-conf_level, color='blue', linestyle='--')
plt.title("Cross-Correlation Function (rain vs. NDVI)")
plt.xlabel("Lag")
plt.ylabel("Correlation")
plt.grid(True)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment