Created
July 14, 2025 19:37
-
-
Save zhensongren/2820add681ad76ee5c53e0a9f0daf4fb to your computer and use it in GitHub Desktop.
ccf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% [markdown] | |
# A 4×4 grid of scatter plots | |
# Each showing the relationship between lagged ts1 (predictor) and ts2 (target) | |
# %% | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from darts import TimeSeries | |
from sklearn.linear_model import LinearRegression | |
# Step 1: Simulate two Darts TimeSeries | |
np.random.seed(42) | |
n = 100 | |
dates = pd.date_range(start="2020-01-01", periods=n, freq="D") | |
series_1 = pd.Series(np.random.normal(50, 10, size=n), index=dates) | |
series_2 = pd.Series(0.7 * series_1.shift(2).fillna(method='bfill') + np.random.normal(0, 5, size=n), index=dates) | |
ts_1 = TimeSeries.from_series(series_1.rename("ts1")) | |
ts_2 = TimeSeries.from_series(series_2.rename("ts2")) | |
# Step 2: Plotting function using Darts TimeSeries | |
def darts_cross_corr_plot(ts_x: TimeSeries, ts_y: TimeSeries, max_lag: int = 12): | |
df_x = ts_x.to_dataframe() | |
df_y = ts_y.to_dataframe() | |
target_col = df_y.columns[0] | |
input_col = df_x.columns[0] | |
fig, axes = plt.subplots(4, 4, figsize=(16, 12)) | |
axes = axes.flatten() | |
for lag in range(max_lag + 1): | |
x_lagged = df_x.shift(lag) | |
df_comb = pd.concat([x_lagged, df_y], axis=1).dropna() | |
x_vals = df_comb[input_col].values.reshape(-1, 1) | |
y_vals = df_comb[target_col].values | |
ax = axes[lag] | |
sns.scatterplot(x=x_vals.flatten(), y=y_vals, ax=ax, s=20, color="teal") | |
model = LinearRegression().fit(x_vals, y_vals) | |
x_line = np.linspace(x_vals.min(), x_vals.max(), 100).reshape(-1, 1) | |
y_line = model.predict(x_line) | |
ax.plot(x_line, y_line, color='red', linewidth=1) | |
corr = np.corrcoef(x_vals.flatten(), y_vals)[0, 1] | |
ax.set_title(f"{input_col}(t-{lag}), r={corr:.2f}") | |
for i in range(max_lag + 1, len(axes)): | |
fig.delaxes(axes[i]) | |
plt.tight_layout() | |
plt.show() | |
# Step 3: Use the function | |
darts_cross_corr_plot(ts_x=ts_1, ts_y=ts_2, max_lag=12) | |
# %% [markdown] | |
# Generate cross correlation plots from pandas dataframe series. | |
# %% | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.linear_model import LinearRegression | |
# Step 1: Create example time series | |
np.random.seed(42) | |
n = 150 | |
rain = np.random.gamma(shape=2.0, scale=15.0, size=n) | |
ndvi = 0.5 * np.roll(rain, 1) + np.random.normal(0, 10, size=n) | |
df = pd.DataFrame({'rain': rain, 'ndvi': ndvi}) | |
# Step 2: Define plot function | |
def cross_corr_plot_matrix(df, x_col, y_col, max_lag=12): | |
fig, axes = plt.subplots(4, 4, figsize=(16, 12)) | |
axes = axes.flatten() | |
for lag in range(max_lag + 1): | |
x_lagged = df[x_col].shift(lag) | |
mask = x_lagged.notna() & df[y_col].notna() | |
x_vals = x_lagged[mask].values.reshape(-1, 1) | |
y_vals = df[y_col][mask].values | |
ax = axes[lag] | |
sns.scatterplot(x=x_vals.flatten(), y=y_vals, ax=ax, s=20, color='black') | |
# Regression line | |
model = LinearRegression().fit(x_vals, y_vals) | |
x_line = np.linspace(x_vals.min(), x_vals.max(), 100).reshape(-1, 1) | |
y_line = model.predict(x_line) | |
ax.plot(x_line, y_line, color='red', linewidth=1) | |
# Pearson correlation | |
corr = np.corrcoef(x_vals.flatten(), y_vals)[0, 1] | |
ax.set_title(f"{x_col}(t-{lag}), r={corr:.2f}") | |
# Hide unused subplots | |
for i in range(max_lag + 1, len(axes)): | |
fig.delaxes(axes[i]) | |
plt.tight_layout() | |
plt.show() | |
# Step 3: Run the plot | |
cross_corr_plot_matrix(df, x_col='rain', y_col='ndvi', max_lag=12) | |
# %% | |
# %% | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from statsmodels.tsa.stattools import ccf | |
# Simulated example data | |
np.random.seed(42) | |
n = 100 | |
# rain is the leading variable | |
rain = np.random.normal(10, 2, n) | |
# NDVI lags behind rain; create ndvi with roll and lag 2 and add noise | |
ndvi = np.roll(rain, 2) + np.random.normal(0, 1, n) # NDVI lags behind rain | |
data = pd.DataFrame({'rain': rain, 'ndvi': ndvi}) | |
# Standardize the series (recommended before computing CCF) | |
rain_std = (data['rain'] - np.mean(data['rain'])) / np.std(data['rain']) | |
ndvi_std = (data['ndvi'] - np.mean(data['ndvi'])) / np.std(data['ndvi']) | |
# Compute cross-correlation for full range of lags | |
max_lag = 20 | |
lags = np.arange(-max_lag, max_lag + 1) | |
ccf_full = [] | |
for lag in lags: | |
if lag < 0: | |
x = rain_std[:lag] | |
y = ndvi_std[-lag:] | |
elif lag > 0: | |
x = rain_std[lag:] | |
y = ndvi_std[:-lag] | |
else: | |
x = rain_std | |
y = ndvi_std | |
corr = np.corrcoef(x, y)[0, 1] | |
ccf_full.append(corr) | |
# Plot | |
plt.figure(figsize=(10, 5)) | |
plt.stem(lags, ccf_full) | |
plt.axhline(0, color='black') | |
conf_level = 2 / np.sqrt(n) # 95% confidence bound (approx) | |
plt.axhline(conf_level, color='blue', linestyle='--') | |
plt.axhline(-conf_level, color='blue', linestyle='--') | |
plt.title("Cross-Correlation Function (rain vs. NDVI)") | |
plt.xlabel("Lag") | |
plt.ylabel("Correlation") | |
plt.grid(True) | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment