Created
March 23, 2022 12:50
-
-
Save Burntt/86d87ac2dd5047d138217069b0602026 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def back_test_paths_generator(t_span, n, k, verbose=True): | |
# split data into N groups, with N << T | |
# this will assign each index position to a group position | |
group_num = np.arange(t_span) // (t_span // n) | |
group_num[group_num == n] = n-1 | |
# generate the combinations | |
test_groups = np.array(list(itt.combinations(np.arange(n), k))).reshape(-1, k) | |
C_nk = len(test_groups) | |
n_paths = C_nk * k // n | |
if verbose: | |
print('n_sim:', C_nk) | |
print('n_paths:', n_paths) | |
# is_test is a T x C(n, k) array where each column is a logical array | |
# indicating which observation in in the test set | |
is_test_group = np.full((n, C_nk), fill_value=False) | |
is_test = np.full((t_span, C_nk), fill_value=False) | |
# assign test folds for each of the C(n, k) simulations | |
for k, pair in enumerate(test_groups): | |
i, j = pair | |
is_test_group[[i, j], k] = True | |
# assigning the test folds | |
mask = (group_num == i) | (group_num == j) | |
is_test[mask, k] = True | |
# for each path, connect the folds from different simulations to form a backtest path | |
# the fold coordinates are: the fold number, and the simulation index e.g. simulation 0, fold 0 etc | |
path_folds = np.full((n, n_paths), fill_value=np.nan) | |
for i in range(n_paths): | |
for j in range(n): | |
s_idx = is_test_group[j, :].argmax().astype(int) | |
path_folds[j, i] = s_idx | |
is_test_group[j, s_idx] = False | |
cv.split(X, y, pred_times=prediction_times, eval_times=evaluation_times) | |
# finally, for each path we indicate which simulation we're building the path from and the time indices | |
paths = np.full((t_span, n_paths), fill_value= np.nan) | |
for p in range(n_paths): | |
for i in range(n): | |
mask = (group_num == i) | |
paths[mask, p] = int(path_folds[i, p]) | |
# paths = paths_# .astype(int) | |
return (is_test, paths, path_folds) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment