Last active
April 27, 2021 00:50
-
-
Save cpatdowling/1ff5583904e51ae86cfe2288107a81c0 to your computer and use it in GitHub Desktop.
A linear transition kernel akin to continuous voting for a multi-agent MDP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def return_linear_pdf(n): | |
#n is the number of dimensions/players in the joint action | |
#this describes a n-dimensional linear surface defined from [0,1] that integrates to 1 | |
P = lambda x: (2.0/float(n))*np.sum(x) | |
return(P) | |
def return_linear_cdf(n): | |
#cummulative distribution function for linear PDF | |
#C(X <= z) is the probability of a multivariate linear RV X being less or eq to z | |
C = lambda x: (1.0/float(n))*np.sum(np.power(x, 2)) | |
return(C) | |
def scale_action(action, max_action=1.0, min_action=-1.0): | |
#srescale joint n x 1 action vector to 0,1 interval to keep cdf and pdf simple | |
return((action - min_action)/(max_action - min_action)) | |
#example | |
#we will say that a random variable X is linearly distributed, this like | |
#continous voting, where all 0's implies a 0 probability of X being true | |
#and all 1's implies a probability of 1 that X is true | |
#we'll use this in an MDP context, where if X is true, the MDP transitions out | |
#of the current state with uniform probability across other states | |
#and if X is false, the MDP does not transition | |
n = 5 #number of agents | |
#these are lambda functions on action vectors in [0,1] of size 5 | |
PDF = return_linear_pdf(5) #might want this to plot | |
CDF = return_linear_cdf(5) #this gives probability of X <= z for some fixed z | |
#actions are defined over the 0,1 interval, but can be normalized to 0,1 | |
#for this transition kernel | |
#suppose the learned policies choose the following joint action, for example | |
action = np.array([1.0, -0.25, 0.1, -0.89, 0.03]) #actions here defined from -1 to 1 | |
scaled_action = scale_action(action) #default min and max is -1 and 1 | |
print(scaled_action) | |
cdf = CDF(scaled_action) | |
#cdf gives value of biased coin flip with probability of event being true | |
#can simulate with uniform dist | |
#let current state s=1, and M total states | |
unif_sample = np.random.uniform(0,1) | |
if unif_sample <= cdf: | |
#transition to new state | |
transition = True | |
#transition amongst uniformly | |
new_state = np.random.choice([s for s in states if s not current_state]) | |
else: | |
#remain at current state | |
transition = False | |
new_state = current_state |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment