tmbo · May 17, 2016 13:03
diff --git a/tangmao.py b/tangmao.py
 INPUT: 
  U # underlying set of features
  EXPECTED_FEATURE_SET_SIZE # how many features should be selected 

 OUTPUT: 
  S # selected set of features

 while(not is_good_enough(S)):
  Ux, Uz = split_into_continous_nominal_features(U)
  
  # select the best continous feature
  X´ = None
  # if the measure needs to be maximized or minimized depends on the evaluation measure. 
  # lets asume a lower value indicates a better feature set
  best_J = +Inf
  
  measure = if(S contains nominal feature): Jm else: Jc 
  
  for ux in Ux:
    j = calculate_subset_evaluation_measure(ux, measure)
    if j < best_J:
      best_J = j
      X´ = ux
  
  
  # select the best nominal feature
  Z´ = None
  # if the measure needs to be maximized or minimized depends on the evaluation measure. 
  # lets asume a lower value indicates a better feature set
  best_J = +Inf
  
  measure = if(S contains continous feature): Jm else: Jn 
  
  for uz in Uz:
    j = calculate_subset_evaluation_measure(uz, measure)
    if j < best_J:
      best_J = j
      Z´ = uz
      
  classifier_performance_ux = train_classifier_on(S + X´).error
  
  classifier_performance_uz = train_classifier_on(S + Z´).error
  
  if classifier_performance_ux > classifier_performance_uz:
    U = U \ Z´
    S = S + Z´
  else:
    U = U \ X´
    S = S + X´
    
 return S


 # example for an abort condition
 def is_good_enough(S):
  return S.size >= EXPECTED_FEATURE_SET_SIZE
	INPUT:
	U # underlying set of features
	EXPECTED_FEATURE_SET_SIZE # how many features should be selected

	OUTPUT:
	S # selected set of features

	while(not is_good_enough(S)):
	Ux, Uz = split_into_continous_nominal_features(U)

	# select the best continous feature
	X´ = None
	# if the measure needs to be maximized or minimized depends on the evaluation measure.
	# lets asume a lower value indicates a better feature set
	best_J = +Inf

	measure = if(S contains nominal feature): Jm else: Jc

	for ux in Ux:
	j = calculate_subset_evaluation_measure(ux, measure)
	if j < best_J:
	best_J = j
	X´ = ux


	# select the best nominal feature
	Z´ = None
	# if the measure needs to be maximized or minimized depends on the evaluation measure.
	# lets asume a lower value indicates a better feature set
	best_J = +Inf

	measure = if(S contains continous feature): Jm else: Jn

	for uz in Uz:
	j = calculate_subset_evaluation_measure(uz, measure)
	if j < best_J:
	best_J = j
	Z´ = uz

	classifier_performance_ux = train_classifier_on(S + X´).error

	classifier_performance_uz = train_classifier_on(S + Z´).error

	if classifier_performance_ux > classifier_performance_uz:
	U = U \ Z´
	S = S + Z´
	else:
	U = U \ X´
	S = S + X´

	return S


	# example for an abort condition
	def is_good_enough(S):
	return S.size >= EXPECTED_FEATURE_SET_SIZE