Created
December 4, 2016 20:24
-
-
Save hmate9/49758ee1117ae55616f45d72186834a5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import gym | |
from keras.models import Sequential | |
from keras.layers import Dense, Activation, Flatten | |
from keras.optimizers import Adam | |
from rl.agents.dqn import DQNAgent | |
from rl.policy import BoltzmannQPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy | |
from rl.memory import SequentialMemory | |
import gym_catch | |
ENV_NAME = 'catch-v0' | |
# Get the environment and extract the number of actions. | |
env = gym.make(ENV_NAME) | |
np.random.seed(123) | |
env.seed(123) | |
nb_actions = env.action_space.n | |
print("Actions: " + str(nb_actions)) | |
print("ObSpace: " + str(env.observation_space.shape)) | |
# Next, we build a very simple model. | |
model = Sequential() | |
model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) | |
model.add(Dense(100)) | |
model.add(Activation('relu')) | |
model.add(Dense(100)) | |
model.add(Activation('relu')) | |
model.add(Dense(nb_actions)) | |
model.add(Activation('linear')) | |
print(model.summary()) | |
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and | |
# even the metrics! | |
memory = SequentialMemory(limit=500, window_length=1) | |
policy = BoltzmannQPolicy(tau=0.05) | |
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, | |
target_model_update=10000, policy=policy) | |
dqn.compile(Adam(lr=0.2), metrics=['mae']) | |
# Okay, now it's time to learn something! We visualize the training here for show, but this | |
# slows down training quite a lot. You can always safely abort the training prematurely using | |
# Ctrl + C. | |
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2) | |
# After training is done, we save the final weights. | |
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) | |
#dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) | |
# Finally, evaluate our algorithm for 5 episodes. | |
dqn.test(env, nb_episodes=500, visualize=True) |
@codiphy hey I just randomly stumbled on this comment. The solution was posted in keras-rl/keras-rl#51
tldr: In my environment implementation of the step
function when I returned the state I returned a reference to the current state, and not a deep copy. This messed up things completely of course.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've played around with this quite a bit, but i couldn't get it to converge, no matter how i tweak things.
On this issue you commented that you were able to make it work https://github.com/matthiasplappert/keras-rl/issues/51
Could you explain what exactly you changed?
Thanks in Advance