Created
May 25, 2017 07:07
-
-
Save gabrielgarza/7a549311e644c3f6c4f9abf920303706 to your computer and use it in GitHub Desktop.
CartPole-v0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
from gym import wrappers | |
import random | |
import numpy as np | |
import tensorflow as tf | |
import tflearn | |
from tflearn.layers.core import input_data, dropout, fully_connected | |
from tflearn.layers.estimator import regression | |
from statistics import mean, median | |
from collections import Counter | |
LR = 1e-3 | |
env = gym.make('CartPole-v0') | |
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1') | |
env.reset() | |
goal_steps = 500 | |
score_requirement = 110 | |
initial_games = 50000 | |
# Cache model | |
cached_model = None | |
def random_games(): | |
for episode in range(20): | |
env.reset() | |
for t in range(goal_steps): | |
env.render() | |
action = env.action_space.sample() | |
observation, reward, done, info = env.step(action) | |
if done: | |
env.reset() | |
break | |
# random_games() | |
def loadModel(): | |
global cached_model | |
if cached_model is not None: | |
return cached_model | |
else: | |
tf.reset_default_graph() | |
network = neural_network(4) | |
model = tflearn.DNN(network, tensorboard_dir='log') | |
model.load("modelNew.tflearn") | |
cached_model = model | |
return model | |
def initial_population(): | |
training_data = [] | |
scores = [] | |
accepted_scores = [] | |
for _ in range(initial_games): | |
score = 0 | |
game_memory = [] | |
prev_observation = [] | |
for _ in range(goal_steps): | |
### Random moves | |
action = random.randrange(0,2) | |
observation, reward, done, info = env.step(action) | |
### End Random moves | |
#### or | |
#### Use Saved Model to generate new training_data | |
# model2 = loadModel() | |
# if len(prev_observation) > 0: | |
# | |
# action = np.argmax(model2.predict(prev_observation.reshape(-1, len(prev_observation), 1))[0] ) | |
# else: | |
# action = random.randrange(0,2) | |
# observation, reward, done, info = env.step(action) | |
#### | |
if len(prev_observation) > 0: | |
game_memory.append([prev_observation, action]) | |
prev_observation = observation | |
score += reward | |
if done: | |
break | |
if score >= score_requirement: | |
accepted_scores.append(score) | |
# print(score) | |
# print(len(accepted_scores)) | |
for data in game_memory: | |
output = [not data[1], data[1]] | |
training_data.append([data[0], output]) | |
env.reset() | |
scores.append(score) | |
training_data_save = np.array(training_data) | |
np.save('saved.npy', training_data_save) | |
print("Average accepted score:", mean(accepted_scores)) | |
print("Median accepted score:", median(accepted_scores)) | |
print(Counter(accepted_scores)) | |
return training_data | |
def neural_network(input_size): | |
network = input_data(shape=[None, input_size, 1], name='input') | |
network = fully_connected(network,300,activation='relu') | |
network = dropout(network, 0.8) | |
network = fully_connected(network,400,activation='relu') | |
network = dropout(network, 0.8) | |
network = fully_connected(network,300,activation='relu') | |
network = dropout(network, 0.8) | |
network = fully_connected(network, 2, activation='softmax') | |
network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets') | |
return network | |
def neural_network_model(input_size): | |
model = tflearn.DNN(neural_network(input_size), tensorboard_dir='log') | |
return model | |
def train_model(training_data, model=False): | |
X = np.array([i[0] for i in training_data], dtype=np.float64).reshape(-1, len(training_data[0][0]), 1) | |
y = np.array([i[1] for i in training_data], dtype=np.float64) | |
# [i[1] for i in training_data] | |
if not model: | |
model = neural_network_model(input_size = len(X[0])) | |
model.fit( | |
{'input': X}, | |
{'targets': y}, | |
n_epoch = 50, | |
snapshot_step=500, | |
show_metric=True, | |
run_id='openai_cartpole' | |
) | |
# tf.reset_default_graph() | |
model.save("modelNew.tflearn") | |
return model | |
#### Train a new model | |
# training_data = initial_population() | |
# model = train_model(training_data) | |
#### Use existing model | |
tf.reset_default_graph() | |
network = neural_network(4) | |
model = tflearn.DNN(network, tensorboard_dir='log') | |
model.load("modelNew.tflearn") | |
scores = [] | |
choices = [] | |
for each_game in range(100): | |
score = 0 | |
game_memory = [] | |
prev_obs = [] | |
env.reset() | |
for _ in range(goal_steps): | |
# env.render() | |
if len(prev_obs) == 0: | |
action = random.randrange(0,2) | |
else: | |
# print("model predict") | |
# print(model.predict(prev_obs.reshape(-1, len(prev_obs), 1))) | |
action = np.argmax(model.predict(prev_obs.reshape(-1, len(prev_obs), 1))[0] ) | |
# print(action) | |
choices.append(action) | |
new_observation, reward, done, info = env.step(action) | |
game_memory.append([prev_obs, action]) | |
prev_obs = new_observation | |
score += reward | |
if done: | |
break | |
scores.append(score) | |
print('Average Score', sum(scores)/len(scores)) | |
print('Choice 1: {}, Choice 0: {}'.format(choices.count(1)/len(choices), choices.count(0)/len(choices) )) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am facing this error, when I try to execute your code:
print(model.predict(prev_obs.reshape(-1, len(prev_obs), 1)))
AttributeError: 'NoneType' object has no attribute 'predict'
How do i eradicate this issue?