Last active
June 20, 2017 20:36
-
-
Save pablocastilla/9d51d261fdf6826718b4f37fe3291205 to your computer and use it in GitHub Desktop.
Evolutionary resolution of lunar lander v2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Evolutionary resolution of lunar lander v2 | |
import gym | |
from gym import wrappers | |
import numpy as np | |
import concurrent.futures | |
import logging | |
import threading | |
gym.undo_logger_setup() | |
EXPERIMENTPATH = '/tmp/LunarLander-experiment-1' | |
main_env = gym.make('LunarLander-v2') | |
main_env = wrappers.Monitor(main_env, EXPERIMENTPATH,force=True) | |
print(main_env.observation_space.shape[0]) | |
print(main_env.action_space.n) | |
MAX_EPISODES = 1000 | |
MAX_STEPS = 250 | |
DO_NOTHING_ACTION = 0 | |
POPULATION=100 | |
LEARNING_RATE_EXPLORING= 0.0002 # Learning rate | |
LEARNING_RATE_MATURE= 0.0002 # Learning rate | |
SIGMA=0.1 | |
mutation_environments = [] | |
for i in range(POPULATION): | |
mutation_environments.append(gym.make('LunarLander-v2')) | |
class EvolutionaryNetWork: | |
def __init__(self, sigma=0.01, state_size=8, | |
action_size=4, population_size=100): | |
self.population_size=population_size | |
self.action_size =action_size | |
self.state_size =state_size | |
self.sigma = sigma | |
self.weight= np.random.rand(state_size, action_size) | |
def generate_mutations(self): | |
mutations=[] | |
noise = np.random.randn(self.population_size, self.state_size, self.action_size) | |
for i in range(self.population_size): | |
mutations.append(self.weight+ self.sigma * noise[i]) | |
np_mutations=np.array(mutations) | |
return np_mutations.reshape(self.population_size, self.state_size, self.action_size), noise | |
def update_genes(self,total_rewards, noise, learning_rate): | |
weighted_noise = np.matmul(noise.T, total_rewards).T | |
self.weight = self.weight + learning_rate / (self.population_size * self.sigma) * weighted_noise | |
def run_episode(weight,env,show = False): | |
state = env.reset() | |
total_reward = 0 | |
done = False | |
step = 0 | |
while not done: | |
if(show): | |
env.render() | |
if step < MAX_STEPS: | |
action = np.matmul(weight.T, state) | |
move = np.argmax(action) | |
else: | |
move = DO_NOTHING_ACTION | |
state, reward, done, _ = env.step(move) | |
step += 1 | |
total_reward += reward | |
return total_reward | |
genes = EvolutionaryNetWork(population_size=POPULATION,sigma=SIGMA) | |
#run episodes | |
for ep in range(MAX_EPISODES): | |
show=False | |
if(ep%100==0): | |
show=True | |
#run episode with current genes | |
current_gen_eval = run_episode(genes.weight,main_env,show) | |
mutations,noise = genes.generate_mutations() | |
#run mutations in parallel | |
total_rewards = np.zeros(POPULATION) | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
for i in range(POPULATION): | |
future=executor.submit(run_episode,mutations[i],mutation_environments[i],False) | |
total_rewards[i] = future.result() | |
#select LR | |
learning_rate=LEARNING_RATE_EXPLORING | |
if(current_gen_eval>200): | |
learning_rate=LEARNING_RATE_MATURE | |
#update genes | |
genes.update_genes(total_rewards, noise, learning_rate) | |
gen_mean = np.mean(total_rewards) | |
if(ep%1==0): | |
#print(genes.weight) | |
print(ep, ': ',current_gen_eval,' ',gen_mean) | |
main_env.close() | |
for i in range(POPULATION): | |
mutation_environments[i].close() | |
Hi!
State size is the size of the state that the environment gives you in each step: Position, speed, etc. In this case it has 8 parameters.
Population is the number of mutations at each step.
Any time!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Pablo!
I am trying to implement a evolutionary resolution learning strategy to solve lunar lander. I see you have done something similar. Can you comment on what state_size and population_size is?
Thanks!