peakBreaker · June 19, 2019 08:30 · JonNesvold · Jun 19, 2019
diff --git a/postproc_scikit_sample.py b/postproc_scikit_sample.py
 # Pred and prob arrays are numpy array outputs from a sklearn model:
 #  - pred_array = model.predict(X).astype(int)
 #  - prob_arr = model.predict_proba(X)
 #
 # Here we run the inital data through multiple models and structure the
 # model output into a multilevel dataframe for probabilities and predictions
 # 
 # Typically the next stage would be to enhance the labels of numerical results
 # to string/categories or similar basaed on whatever we want, aswell as providing
 # the results to a database or something like that

 prob_arr_m1 = model1.predict_proba(original_df)
 prob_arr_m2 = model2.predict_proba(original_df)
 prob_arr_m3 = model3.predict_proba(original_df)
 pred_arr_m1 = model1.predict(original_df).astype(int)
 pred_arr_m2 = model2.predict(original_df).astype(int)
 pred_arr_m3 = model3.predict(original_df).astype(int)

 # Stack the predictions
 predictions = np.column_stack((pred_arr_m1, pred_arr_m2, pred_arr_m3))
 probabilities = np.column_stack((prob_arr_m1, prob_arr_m2, prob_arr_m3))

 # Create the multilevel index
 probcols_raw = ['m1_prob1', 'm1_prob2', 'm2_prob1', 'm2_prob2', 'm2_prob3', 'm2_prob4',
                'm2_prob5', 'm2_prob6', 'm3_prob1', 'm1_prob2']
 predcols_raw = ['m1_prediction', 'm2_prediction', 'm3_prediction']
 predcols = [s for s in map(lambda e: ('segments', e), predcols_raw)]
 probcols = [p for p in map(lambda e: ('probabilities', e), probcols_raw)]
 cols = pd.MultiIndex.from_tuples([*segcols, *probcols])

 # Converting to dataframe with multiindex
 pred_df = pd.DataFrame(index=original_df.index, columns=cols)
 pred_df['segments'] = predictions
 pred_df['probabilities'] = probabilities
	# Pred and prob arrays are numpy array outputs from a sklearn model:
	# - pred_array = model.predict(X).astype(int)
	# - prob_arr = model.predict_proba(X)
	#
	# Here we run the inital data through multiple models and structure the
	# model output into a multilevel dataframe for probabilities and predictions
	#
	# Typically the next stage would be to enhance the labels of numerical results
	# to string/categories or similar basaed on whatever we want, aswell as providing
	# the results to a database or something like that

	prob_arr_m1 = model1.predict_proba(original_df)
	prob_arr_m2 = model2.predict_proba(original_df)
	prob_arr_m3 = model3.predict_proba(original_df)
	pred_arr_m1 = model1.predict(original_df).astype(int)
	pred_arr_m2 = model2.predict(original_df).astype(int)
	pred_arr_m3 = model3.predict(original_df).astype(int)

	# Stack the predictions
	predictions = np.column_stack((pred_arr_m1, pred_arr_m2, pred_arr_m3))
	probabilities = np.column_stack((prob_arr_m1, prob_arr_m2, prob_arr_m3))

	# Create the multilevel index
	probcols_raw = ['m1_prob1', 'm1_prob2', 'm2_prob1', 'm2_prob2', 'm2_prob3', 'm2_prob4',
	'm2_prob5', 'm2_prob6', 'm3_prob1', 'm1_prob2']
	predcols_raw = ['m1_prediction', 'm2_prediction', 'm3_prediction']
	predcols = [s for s in map(lambda e: ('segments', e), predcols_raw)]
	probcols = [p for p in map(lambda e: ('probabilities', e), probcols_raw)]
	cols = pd.MultiIndex.from_tuples([segcols, probcols])

	# Converting to dataframe with multiindex
	pred_df = pd.DataFrame(index=original_df.index, columns=cols)
	pred_df['segments'] = predictions
	pred_df['probabilities'] = probabilities