cpatdowling · April 27, 2021 00:50
diff --git a/linear_transition_kernel.py b/linear_transition_kernel.py
 import numpy as np

 def return_linear_pdf(n):
    #n is the number of dimensions/players in the joint action
    #this describes a n-dimensional linear surface defined from [0,1] that integrates to 1
    P = lambda x: (2.0/float(n))*np.sum(x)
    return(P)

 def return_linear_cdf(n):
    #cummulative distribution function for linear PDF
    #C(X <= z) is the probability of a multivariate linear RV X being less or eq to z
    C = lambda x: (1.0/float(n))*np.sum(np.power(x, 2))
    return(C)

 def scale_action(action, max_action=1.0, min_action=-1.0):
    #srescale joint n x 1 action vector to 0,1 interval to keep cdf and pdf simple
    return((action - min_action)/(max_action - min_action))

 #example
 #we will say that a random variable X is linearly distributed, this like
 #continous voting, where all 0's implies a 0 probability of X being true
 #and all 1's implies a probability of 1 that X is true

 #we'll use this in an MDP context, where if X is true, the MDP transitions out
 #of the current state with uniform probability across other states
 #and if X is false, the MDP does not transition

 n = 5 #number of agents
 #these are lambda functions on action vectors in [0,1] of size 5
 PDF = return_linear_pdf(5) #might want this to plot
 CDF = return_linear_cdf(5) #this gives probability of X <= z for some fixed z

 #actions are defined over the 0,1 interval, but can be normalized to 0,1 
 #for this transition kernel
 #suppose the learned policies choose the following joint action, for example
 action = np.array([1.0, -0.25, 0.1, -0.89, 0.03]) #actions here defined from -1 to 1
 scaled_action = scale_action(action) #default min and max is -1 and 1
 print(scaled_action)
 cdf = CDF(scaled_action)

 #cdf gives value of biased coin flip with probability of event being true
 #can simulate with uniform dist
 #let current state s=1, and M total states
 unif_sample = np.random.uniform(0,1)
 if unif_sample <= cdf:
    #transition to new state
    transition = True
    #transition amongst uniformly
    new_state = np.random.choice([s for s in states if s not current_state])
 else:
    #remain at current state
    transition = False
    new_state = current_state
	import numpy as np

	def return_linear_pdf(n):
	#n is the number of dimensions/players in the joint action
	#this describes a n-dimensional linear surface defined from [0,1] that integrates to 1
	P = lambda x: (2.0/float(n))*np.sum(x)
	return(P)

	def return_linear_cdf(n):
	#cummulative distribution function for linear PDF
	#C(X <= z) is the probability of a multivariate linear RV X being less or eq to z
	C = lambda x: (1.0/float(n))*np.sum(np.power(x, 2))
	return(C)

	def scale_action(action, max_action=1.0, min_action=-1.0):
	#srescale joint n x 1 action vector to 0,1 interval to keep cdf and pdf simple
	return((action - min_action)/(max_action - min_action))

	#example
	#we will say that a random variable X is linearly distributed, this like
	#continous voting, where all 0's implies a 0 probability of X being true
	#and all 1's implies a probability of 1 that X is true

	#we'll use this in an MDP context, where if X is true, the MDP transitions out
	#of the current state with uniform probability across other states
	#and if X is false, the MDP does not transition

	n = 5 #number of agents
	#these are lambda functions on action vectors in [0,1] of size 5
	PDF = return_linear_pdf(5) #might want this to plot
	CDF = return_linear_cdf(5) #this gives probability of X <= z for some fixed z

	#actions are defined over the 0,1 interval, but can be normalized to 0,1
	#for this transition kernel
	#suppose the learned policies choose the following joint action, for example
	action = np.array([1.0, -0.25, 0.1, -0.89, 0.03]) #actions here defined from -1 to 1
	scaled_action = scale_action(action) #default min and max is -1 and 1
	print(scaled_action)
	cdf = CDF(scaled_action)

	#cdf gives value of biased coin flip with probability of event being true
	#can simulate with uniform dist
	#let current state s=1, and M total states
	unif_sample = np.random.uniform(0,1)
	if unif_sample <= cdf:
	#transition to new state
	transition = True
	#transition amongst uniformly
	new_state = np.random.choice([s for s in states if s not current_state])
	else:
	#remain at current state
	transition = False
	new_state = current_state