Skip to content

Instantly share code, notes, and snippets.

@rickhenderson
Last active May 5, 2016 03:29
Show Gist options
  • Save rickhenderson/026ee6a4722c9cc9f3e9c97ebb4a7991 to your computer and use it in GitHub Desktop.
Save rickhenderson/026ee6a4722c9cc9f3e9c97ebb4a7991 to your computer and use it in GitHub Desktop.
Problem with frozen and random.
# Basic Agent Class from Code samples on OpenGym
# Modified by Rick Henderson
# Modified on May 4, 2016
# May the 4th be with you!
import logging
import os
import gym
# The world's simplest agent!
class CodedAgent(object):
def __init__(self, action_space):
self.action_space = action_space
def act(self, observation, reward, done):
# Set some sort of action
# UP = 0, RIGHT = 1, DOWN = 2, LEFT = 3
move = 2
# Return the action as an integer in the required range.
return move
if __name__ == '__main__':
# You can optionally set up the logger. Also fine to set the level
# to logging.DEBUG or logging.WARN if you want to change the
# amount of outut.
logger = logging.getLogger()
logger.setLevel(logging.INFO)
env = gym.make('FrozenLake-v0')
agent = CodedAgent(env.action_space)
# Action space is Discrete(4), values 0 - 3
# UP = 0, RIGHT = 1, DOWN = 2, LEFT = 3
# You provide the directory to write to (can be an existing
# directory, but can't contain previous monitor results. You can
# also dump to a tempdir if you'd like: tempfile.mkdtemp().
outdir = "tmp/c2"
env.monitor.start(outdir, force=True)
episode_count = 100
max_steps = 200
reward = 0
done = False
for i in xrange(episode_count):
ob = env.reset()
reward = done = None
for j in xrange(max_steps):
action = agent.act(ob, reward, done)
ob, reward, done, _ = env.step(action)
env.render()
if done:
break
# Dump result info to disk
env.monitor.close()
# Upload to the scoreboard. We could also do this from another
# process if we wanted.
logger.info("Successfully ran CodedAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
gym.upload(outdir, algorithm_id='coded')

Running "random agent" code but replaced the random move with a single action.

@rickhenderson
Copy link
Author

Regardless of what action value I give, the output render appears to show the random walk through the array.
Action 2 should Down, but the output from the render always displays [RIGHT].

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment