Skip to content

Instantly share code, notes, and snippets.

@eliseomartelli
Created March 24, 2025 16:28
Show Gist options
  • Save eliseomartelli/58d288e41286b35198a91c37d54fa146 to your computer and use it in GitHub Desktop.
Save eliseomartelli/58d288e41286b35198a91c37d54fa146 to your computer and use it in GitHub Desktop.
Predict the next WWDC date
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score, LeaveOneOut
def preprocess_data(data):
df = pd.DataFrame(data)
for i, row in df.iterrows():
df.at[i, "wwdc_date"] = datetime.strptime(
f"{row['wwdc']}/{row['year']}", "%m/%d/%Y"
)
df.at[i, "announced_date"] = datetime.strptime(
f"{row['announced']}/{row['year']}", "%m/%d/%Y"
)
df["wwdc_month"] = df["wwdc_date"].dt.month
df["wwdc_day"] = df["wwdc_date"].dt.day
df["wwdc_weekday"] = df["wwdc_date"].dt.weekday
return df
def train_models(df):
X = df[["year"]].copy()
y_day = df["wwdc_day"].values
y_month = df["wwdc_month"].values
y_lead = df["lead_time"].values
y_weekday = df["wwdc_weekday"].values
models = {
"day": {
"rf": RandomForestRegressor(
n_estimators=100, random_state=42, min_samples_leaf=2
),
"ridge": Ridge(alpha=1.0),
},
"month": {
"rf": RandomForestRegressor(
n_estimators=100, random_state=42, min_samples_leaf=2
),
"ridge": Ridge(alpha=1.0),
},
"lead_time": {
"rf": RandomForestRegressor(
n_estimators=100, random_state=42, min_samples_leaf=2
),
"ridge": Ridge(alpha=1.0),
},
"weekday": {
"rf": RandomForestRegressor(
n_estimators=100, random_state=42, min_samples_leaf=2
),
"ridge": Ridge(alpha=1.0),
},
}
loo = LeaveOneOut()
best_models = {}
print("\nModel Evaluation:")
print("-" * 50)
for target_name, target_models in models.items():
best_score = float("inf")
best_model_name = None
if target_name == "day":
y = y_day
elif target_name == "month":
y = y_month
elif target_name == "lead_time":
y = y_lead
else:
y = y_weekday
for model_name, model in target_models.items():
cv_scores = cross_val_score(
model, X, y, cv=loo, scoring="neg_mean_absolute_error"
)
mae = -np.mean(cv_scores)
if mae < best_score:
best_score = mae
best_model_name = model_name
print(
f"{target_name.capitalize()} - {model_name}: MAE = {mae:.2f}",
)
best_models[target_name] = target_models[best_model_name].fit(X, y)
print(
f"Best for {target_name}: {best_model_name}",
f"MAE = {best_score:.2f}",
)
print("-" * 50)
return best_models, X.columns
def predict_dates(models, year, feature_columns):
X_pred = pd.DataFrame({"year": [year]})
predicted_month = int(round(models["month"].predict(X_pred)[0]))
predicted_day = int(round(models["day"].predict(X_pred)[0]))
predicted_lead_time = int(round(models["lead_time"].predict(X_pred)[0]))
if predicted_month != 6 and predicted_month != 8:
predicted_month = 6
target_weekday = 0
try:
temp_date = datetime(year, predicted_month, predicted_day)
weekday_diff = (target_weekday - temp_date.weekday()) % 7
if weekday_diff != 0:
temp_date += timedelta(days=weekday_diff)
wwdc_date = temp_date
except ValueError as e:
print(f"Error in initial date prediction: {e}")
wwdc_date = datetime(year, predicted_month, 1)
while wwdc_date.weekday() != 0:
wwdc_date += timedelta(days=1)
announced_date = wwdc_date - timedelta(days=predicted_lead_time)
return wwdc_date, announced_date
def validate_prediction(wwdc_date, announced_date):
issues = []
if wwdc_date.weekday() >= 5:
issues.append(f"WWDC date is {wwdc_date.strftime('%A')}")
if announced_date.weekday() >= 5:
issues.append(f"Announcement date is {announced_date.strftime('%A')}")
if (wwdc_date - announced_date).days < 30:
issues.append(f"Lead time is {(wwdc_date - announced_date).days} days")
return issues
def main():
data = [
{"year": 2024, "wwdc": "6/10", "announced": "3/26", "lead_time": 76},
{"year": 2023, "wwdc": "6/5", "announced": "3/29", "lead_time": 68},
{"year": 2022, "wwdc": "6/6", "announced": "4/5", "lead_time": 62},
{"year": 2021, "wwdc": "6/7", "announced": "3/30", "lead_time": 69},
{"year": 2020, "wwdc": "6/22", "announced": "5/5", "lead_time": 48},
{"year": 2019, "wwdc": "6/3", "announced": "3/14", "lead_time": 81},
{"year": 2018, "wwdc": "6/4", "announced": "3/15", "lead_time": 81},
{"year": 2017, "wwdc": "6/5", "announced": "2/16", "lead_time": 109},
{"year": 2016, "wwdc": "6/13", "announced": "4/18", "lead_time": 56},
{"year": 2015, "wwdc": "6/8", "announced": "4/14", "lead_time": 55},
{"year": 2014, "wwdc": "6/2", "announced": "4/3", "lead_time": 60},
{"year": 2013, "wwdc": "6/10", "announced": "4/24", "lead_time": 47},
{"year": 2012, "wwdc": "6/11", "announced": "4/25", "lead_time": 47},
{"year": 2011, "wwdc": "6/6", "announced": "3/28", "lead_time": 70},
{"year": 2010, "wwdc": "6/7", "announced": "4/28", "lead_time": 40},
{"year": 2009, "wwdc": "6/8", "announced": "3/26", "lead_time": 74},
{"year": 2008, "wwdc": "6/9", "announced": "3/13", "lead_time": 88},
{"year": 2007, "wwdc": "6/11", "announced": "2/6", "lead_time": 125},
{"year": 2006, "wwdc": "8/7", "announced": "3/7", "lead_time": 153},
{"year": 2005, "wwdc": "6/6", "announced": "2/15", "lead_time": 111},
{"year": 2004, "wwdc": "6/28", "announced": "2/17", "lead_time": 132},
]
df = preprocess_data(data)
models, feature_columns = train_models(df)
year = 2025
wwdc_date, announced_date = predict_dates(
models,
year,
feature_columns,
)
issues = validate_prediction(wwdc_date, announced_date)
print("\nPredictions:")
print("-" * 50)
print(f"WWDC {year} Date: {wwdc_date.strftime('%A, %B %d, %Y')}")
print(f"Announcement Date: {announced_date.strftime('%A, %B %d, %Y')}")
print(f"Lead Time: {(wwdc_date - announced_date).days} days")
if issues:
print("\nValidation Issues:")
for issue in issues:
print(f"- {issue}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment