Created
July 27, 2019 13:09
-
-
Save atriptoparadise/96ed28124c4ae05aa2b9f577ddcb554f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plotFor5(Territory): | |
df_model = df[df['TerritoryID'] == Territory] | |
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0) | |
data = pd.DataFrame(df_timeseries.NetAmount) | |
data.columns = ["y"] | |
# Drop data after 2019-06 | |
data = data.loc[data.index[:-1]] | |
# Adding the lag of the target variable from 7 steps back up to 48 months ago | |
for i in range(7, 48): | |
data["lag_{}".format(i)] = data.y.shift(i) | |
y = data.dropna().y | |
X = data.dropna().drop(['y'], axis=1) | |
if len(y) <= 6: # drop the territory less than 8 records in month level | |
pass | |
else: | |
# Reserve 30% of data for testing | |
X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3) | |
# Scaling | |
X_train_scaled = scaler.fit_transform(X_train) | |
X_test_scaled = scaler.transform(X_test) | |
# Linear Regression | |
lr = LinearRegression() | |
lr.fit(X_train, y_train) | |
prediction1 = lr.predict(X_test) | |
error_linear = mean_absolute_percentage_error(prediction1, y_test) | |
# Ridge | |
ridge = RidgeCV(cv=tscv) | |
ridge.fit(X_train_scaled, y_train) | |
prediction2 = ridge.predict(X_test_scaled) | |
error_ridge = mean_absolute_percentage_error(prediction2, y_test) | |
# Lasso | |
lasso = LassoCV(cv=tscv) | |
lasso.fit(X_train_scaled, y_train) | |
prediction3 = lasso.predict(X_test_scaled) | |
error_lasso = mean_absolute_percentage_error(prediction3, y_test) | |
# XGB | |
xgb = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, | |
colsample_bynode=1, colsample_bytree=0.3, gamma=0, | |
importance_type='gain', learning_rate=0.1, max_delta_step=0, | |
max_depth=4, min_child_weight=1, missing=None, n_estimators=100, | |
n_jobs=1, nthread=None, objective='reg:linear', random_state=0, | |
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, | |
silent=None, subsample=0.5, verbosity=1) | |
xgb.fit(X_train_scaled, y_train) | |
prediction4 = xgb.predict(X_test_scaled) | |
error_xgb = mean_absolute_percentage_error(prediction4, y_test) | |
# LightGBM | |
lgb_train = lgb.Dataset(X_train_scaled, y_train) | |
lgb_eval = lgb.Dataset(X_test_scaled, y_test, reference=lgb_train) | |
lightgbm_params = {'boosting_type': 'gbdt', | |
'colsample_bytree': 0.90, | |
'learning_rate': 0.005, | |
'n_estimators': 40, | |
'num_leaves': 6, | |
'reg_alpha': 1, | |
'reg_lambda': 1, | |
'subsample': 0.7} | |
gbm = lgb.train(lightgbm_params,lgb_train,num_boost_round=10,valid_sets=lgb_eval) | |
prediction5 = gbm.predict(X_test_scaled) | |
error_lightgbm = mean_absolute_percentage_error(prediction5, y_test) | |
# Stacking | |
#stack_error = mean_absolute_percentage_error(prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4, y_test) | |
#l1 = [error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm, stack_error, (error_linear+error_ridge+error_lasso+error_xgb+error_lightgbm)/5, min(error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm)] | |
#global df_final | |
#df_final = pd.DataFrame({Territory:l1}).T | |
#df_final.columns = ['linear', 'ridge', 'lasso', 'xgb', 'lightgbm', 'stack_error', 'avg_error', 'min_error'] | |
Prediction = prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4 | |
max1 = max(max(Prediction), max(y_test)) | |
max2 = max(max1,max(y_train)) | |
layout = go.Layout(xaxis=dict(autorange=True),yaxis=dict(range = [0, max2*1.2])) | |
fig = go.Figure(layout= layout) | |
#fig.update_layout(autosize=False) | |
fig.add_trace(go.Scatter(y=y_test, x= list(range(13,19)), | |
mode='lines', | |
name='Actual', line = dict(width=4))) | |
fig.add_trace(go.Scatter(y=prediction1, x= list(range(13,19)), | |
mode='lines', | |
name='Linear', line = dict(width=2, dash='dash'))) | |
fig.add_trace(go.Scatter(y=prediction2,x= list(range(13,19)), | |
mode='lines', | |
name='Ridge', line = dict(width=2, dash='dash'))) | |
fig.add_trace(go.Scatter(y=prediction3,x= list(range(13,19)), | |
mode='lines', | |
name='Lasso', line = dict(width=2, dash='dash'))) | |
fig.add_trace(go.Scatter(y=prediction4,x= list(range(13,19)), | |
mode='lines', | |
name='XGB', line = dict(width=2, dash='dash'))) | |
fig.add_trace(go.Scatter(y=prediction5,x= list(range(13,19)), | |
mode='lines', | |
name='Lightgbm', line = dict(width=2, dash='dash'))) | |
fig.add_trace(go.Scatter(y=Prediction,x= list(range(13,19)), | |
mode='lines', | |
name='Prediction', line = dict(width=4))) | |
y_trainPlot = list(y_train) | |
y_trainPlot.append(y_test[0]) | |
fig.add_trace(go.Scatter(y=y_trainPlot, x= list(range(0,14)), | |
mode='lines', | |
name='History', line = dict(width=2,dash='dash'))) | |
y_fake = [y_train[-1],Prediction[0]] | |
fig.add_trace(go.Scatter(y=y_fake, x= list(range(12,14)), | |
mode='lines', | |
name='Fake', line = dict(width=2,dash='dash'))) | |
#fig.update_yaxes(tickvals=[max1/100,max1*1.2]) | |
fig.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment