atriptoparadise · July 27, 2019 13:09
diff --git a/gistfile1.txt b/gistfile1.txt
 def plotFor5(Territory):
    df_model = df[df['TerritoryID'] == Territory]
    df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)
    data = pd.DataFrame(df_timeseries.NetAmount)
    data.columns = ["y"]
 
    # Drop data after 2019-06
    data = data.loc[data.index[:-1]]
 # Adding the lag of the target variable from 7 steps back up to 48 months ago
    for i in range(7, 48):
        data["lag_{}".format(i)] = data.y.shift(i)
 y = data.dropna().y
    X = data.dropna().drop(['y'], axis=1)
    
    if len(y) <= 6: # drop the territory less than 8 records in month level
        pass
    else:
        # Reserve 30% of data for testing
        X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3)
        # Scaling
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Linear Regression 
        lr = LinearRegression()
        lr.fit(X_train, y_train)
        prediction1 = lr.predict(X_test)
        error_linear = mean_absolute_percentage_error(prediction1, y_test)
 # Ridge
        ridge = RidgeCV(cv=tscv)
        ridge.fit(X_train_scaled, y_train)
        prediction2 = ridge.predict(X_test_scaled)
        error_ridge = mean_absolute_percentage_error(prediction2, y_test)
 # Lasso
        lasso = LassoCV(cv=tscv)
        lasso.fit(X_train_scaled, y_train)
        prediction3 = lasso.predict(X_test_scaled)
        error_lasso = mean_absolute_percentage_error(prediction3, y_test)
 # XGB
        xgb = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.3, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=4, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=0.5, verbosity=1)          
        xgb.fit(X_train_scaled, y_train)
        prediction4 = xgb.predict(X_test_scaled)
        error_xgb = mean_absolute_percentage_error(prediction4, y_test)
 # LightGBM
        lgb_train = lgb.Dataset(X_train_scaled, y_train)
        lgb_eval = lgb.Dataset(X_test_scaled, y_test, reference=lgb_train)
        lightgbm_params = {'boosting_type': 'gbdt', 
                   'colsample_bytree': 0.90, 
                   'learning_rate': 0.005, 
                   'n_estimators': 40, 
                   'num_leaves': 6, 
                   'reg_alpha': 1, 
                   'reg_lambda': 1, 
                   'subsample': 0.7}
        gbm = lgb.train(lightgbm_params,lgb_train,num_boost_round=10,valid_sets=lgb_eval)
        prediction5 = gbm.predict(X_test_scaled)
        error_lightgbm = mean_absolute_percentage_error(prediction5, y_test)
 # Stacking
        #stack_error = mean_absolute_percentage_error(prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4, y_test)
        #l1 = [error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm, stack_error, (error_linear+error_ridge+error_lasso+error_xgb+error_lightgbm)/5, min(error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm)]
        
        #global df_final
        #df_final = pd.DataFrame({Territory:l1}).T
        #df_final.columns = ['linear', 'ridge', 'lasso', 'xgb', 'lightgbm', 'stack_error', 'avg_error', 'min_error']
        Prediction = prediction1*0.1+prediction2*0.075+prediction3*0.075+prediction4*0.35+prediction5*0.4
        max1 = max(max(Prediction), max(y_test))
        max2 = max(max1,max(y_train))
        layout = go.Layout(xaxis=dict(autorange=True),yaxis=dict(range = [0, max2*1.2]))
        fig = go.Figure(layout= layout)
        #fig.update_layout(autosize=False)
        fig.add_trace(go.Scatter(y=y_test, x= list(range(13,19)),
                    mode='lines',
                    name='Actual', line = dict(width=4)))
        fig.add_trace(go.Scatter(y=prediction1, x= list(range(13,19)),
                    mode='lines',
                    name='Linear', line = dict(width=2, dash='dash')))
        fig.add_trace(go.Scatter(y=prediction2,x= list(range(13,19)),
                    mode='lines',
                    name='Ridge', line = dict(width=2, dash='dash')))
        fig.add_trace(go.Scatter(y=prediction3,x= list(range(13,19)),
                    mode='lines',
                    name='Lasso', line = dict(width=2, dash='dash')))
        fig.add_trace(go.Scatter(y=prediction4,x= list(range(13,19)),
                    mode='lines',
                    name='XGB', line = dict(width=2, dash='dash')))
        fig.add_trace(go.Scatter(y=prediction5,x= list(range(13,19)),
                    mode='lines',
                    name='Lightgbm', line = dict(width=2, dash='dash')))
        fig.add_trace(go.Scatter(y=Prediction,x= list(range(13,19)),
                    mode='lines',
                    name='Prediction', line = dict(width=4)))
        y_trainPlot = list(y_train)
        y_trainPlot.append(y_test[0])
        fig.add_trace(go.Scatter(y=y_trainPlot, x= list(range(0,14)),
                    mode='lines',
                    name='History', line = dict(width=2,dash='dash')))
        y_fake = [y_train[-1],Prediction[0]]
        fig.add_trace(go.Scatter(y=y_fake, x= list(range(12,14)),
                    mode='lines',
                    name='Fake', line = dict(width=2,dash='dash')))
        #fig.update_yaxes(tickvals=[max1/100,max1*1.2])
        fig.show()
	def plotFor5(Territory):
	df_model = df[df['TerritoryID'] == Territory]
	df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)
	data = pd.DataFrame(df_timeseries.NetAmount)
	data.columns = ["y"]

	# Drop data after 2019-06
	data = data.loc[data.index[:-1]]
	# Adding the lag of the target variable from 7 steps back up to 48 months ago
	for i in range(7, 48):
	data["lag_{}".format(i)] = data.y.shift(i)
	y = data.dropna().y
	X = data.dropna().drop(['y'], axis=1)

	if len(y) <= 6: # drop the territory less than 8 records in month level
	pass
	else:
	# Reserve 30% of data for testing
	X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3)
	# Scaling
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Linear Regression
	lr = LinearRegression()
	lr.fit(X_train, y_train)
	prediction1 = lr.predict(X_test)
	error_linear = mean_absolute_percentage_error(prediction1, y_test)
	# Ridge
	ridge = RidgeCV(cv=tscv)
	ridge.fit(X_train_scaled, y_train)
	prediction2 = ridge.predict(X_test_scaled)
	error_ridge = mean_absolute_percentage_error(prediction2, y_test)
	# Lasso
	lasso = LassoCV(cv=tscv)
	lasso.fit(X_train_scaled, y_train)
	prediction3 = lasso.predict(X_test_scaled)
	error_lasso = mean_absolute_percentage_error(prediction3, y_test)
	# XGB
	xgb = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
	colsample_bynode=1, colsample_bytree=0.3, gamma=0,
	importance_type='gain', learning_rate=0.1, max_delta_step=0,
	max_depth=4, min_child_weight=1, missing=None, n_estimators=100,
	n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
	reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
	silent=None, subsample=0.5, verbosity=1)
	xgb.fit(X_train_scaled, y_train)
	prediction4 = xgb.predict(X_test_scaled)
	error_xgb = mean_absolute_percentage_error(prediction4, y_test)
	# LightGBM
	lgb_train = lgb.Dataset(X_train_scaled, y_train)
	lgb_eval = lgb.Dataset(X_test_scaled, y_test, reference=lgb_train)
	lightgbm_params = {'boosting_type': 'gbdt',
	'colsample_bytree': 0.90,
	'learning_rate': 0.005,
	'n_estimators': 40,
	'num_leaves': 6,
	'reg_alpha': 1,
	'reg_lambda': 1,
	'subsample': 0.7}
	gbm = lgb.train(lightgbm_params,lgb_train,num_boost_round=10,valid_sets=lgb_eval)
	prediction5 = gbm.predict(X_test_scaled)
	error_lightgbm = mean_absolute_percentage_error(prediction5, y_test)
	# Stacking
	#stack_error = mean_absolute_percentage_error(prediction10.1+prediction20.075+prediction30.075+prediction40.35+prediction5*0.4, y_test)
	#l1 = [error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm, stack_error, (error_linear+error_ridge+error_lasso+error_xgb+error_lightgbm)/5, min(error_linear, error_ridge, error_lasso, error_xgb, error_lightgbm)]

	#global df_final
	#df_final = pd.DataFrame({Territory:l1}).T
	#df_final.columns = ['linear', 'ridge', 'lasso', 'xgb', 'lightgbm', 'stack_error', 'avg_error', 'min_error']
	Prediction = prediction10.1+prediction20.075+prediction30.075+prediction40.35+prediction5*0.4
	max1 = max(max(Prediction), max(y_test))
	max2 = max(max1,max(y_train))
	layout = go.Layout(xaxis=dict(autorange=True),yaxis=dict(range = [0, max2*1.2]))
	fig = go.Figure(layout= layout)
	#fig.update_layout(autosize=False)
	fig.add_trace(go.Scatter(y=y_test, x= list(range(13,19)),
	mode='lines',
	name='Actual', line = dict(width=4)))
	fig.add_trace(go.Scatter(y=prediction1, x= list(range(13,19)),
	mode='lines',
	name='Linear', line = dict(width=2, dash='dash')))
	fig.add_trace(go.Scatter(y=prediction2,x= list(range(13,19)),
	mode='lines',
	name='Ridge', line = dict(width=2, dash='dash')))
	fig.add_trace(go.Scatter(y=prediction3,x= list(range(13,19)),
	mode='lines',
	name='Lasso', line = dict(width=2, dash='dash')))
	fig.add_trace(go.Scatter(y=prediction4,x= list(range(13,19)),
	mode='lines',
	name='XGB', line = dict(width=2, dash='dash')))
	fig.add_trace(go.Scatter(y=prediction5,x= list(range(13,19)),
	mode='lines',
	name='Lightgbm', line = dict(width=2, dash='dash')))
	fig.add_trace(go.Scatter(y=Prediction,x= list(range(13,19)),
	mode='lines',
	name='Prediction', line = dict(width=4)))
	y_trainPlot = list(y_train)
	y_trainPlot.append(y_test[0])
	fig.add_trace(go.Scatter(y=y_trainPlot, x= list(range(0,14)),
	mode='lines',
	name='History', line = dict(width=2,dash='dash')))
	y_fake = [y_train[-1],Prediction[0]]
	fig.add_trace(go.Scatter(y=y_fake, x= list(range(12,14)),
	mode='lines',
	name='Fake', line = dict(width=2,dash='dash')))
	#fig.update_yaxes(tickvals=[max1/100,max1*1.2])
	fig.show()