bmmalone · April 15, 2021 08:41 · bmmalone · Apr 15, 2021
diff --git a/cross-validation.pseudo b/cross-validation.pseudo
 given labeled_training_indices (e.g., maybe there are 20 labeled training instances)
 given labeled_test_indices (there are always ~3000 of these due to the split created by Harutyunyan et al.)

 train_fold, val_fold <- stratified split(labeled_training_indices, train=70%, "test"=30%) # "test" is really the validation set here

 # for example, if we have 20 labeled training instances, then we have 14 instances for training and 6 for validation
 # ... so we really don't have a lot when the number of labeled training instances is small

 hp_grid = ParameterGrid({
 	'penalty': ['l1', 'l2'],
 	'C': [0.001, 0.01, 0.1, ...]
 	'embedding_epoch': [1, 11, 21, ...],
 	... other hyperparameters ...
 })

 best_model <- None

 for each hp in hp_grid:
 	load embeddings for 'embedding_epoch'
 	train logistic regression model on train_fold using embeddings and other hps
 	evaluate model on val_fold
 	
 	if model is better than best_model: #"model" includes the embedding epoch
 		best_model <- model
 		
 evaluate best_model (including the embedding epoch) on labeled_test_indices
	given labeled_training_indices (e.g., maybe there are 20 labeled training instances)
	given labeled_test_indices (there are always ~3000 of these due to the split created by Harutyunyan et al.)

	train_fold, val_fold <- stratified split(labeled_training_indices, train=70%, "test"=30%) # "test" is really the validation set here

	# for example, if we have 20 labeled training instances, then we have 14 instances for training and 6 for validation
	# ... so we really don't have a lot when the number of labeled training instances is small

	hp_grid = ParameterGrid({
	'penalty': ['l1', 'l2'],
	'C': [0.001, 0.01, 0.1, ...]
	'embedding_epoch': [1, 11, 21, ...],
	... other hyperparameters ...
	})

	best_model <- None

	for each hp in hp_grid:
	load embeddings for 'embedding_epoch'
	train logistic regression model on train_fold using embeddings and other hps
	evaluate model on val_fold

	if model is better than best_model: #"model" includes the embedding epoch
	best_model <- model

	evaluate best_model (including the embedding epoch) on labeled_test_indices
No results found