Created
April 13, 2016 21:41
-
-
Save ntasfi/49b7b3032775559eebf3e6ad2aec2674 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import importlib | |
import logging | |
import numpy as np | |
from pastalog import Log | |
from sacred import Experiment | |
from sacred.observers import MongoObserver | |
logging.getLogger("requests").setLevel(logging.WARNING) | |
logging.getLogger("urllib3").setLevel(logging.WARNING) | |
from PLE_env import MyEnv as PLE_env | |
from deeprl.q_networks.q_net_theano import MyQNetwork | |
from deeprl.agent_ale import ALEAgent as PLEAgent | |
#could * import but better to know what you're getting | |
from deeprl.experiment.base_controllers import VerboseController, LearningRateController, DiscountFactorController, EpsilonController, InterleavedTestEpochController, Controller | |
class PLEDisplayToggleController(Controller): | |
def __init__(self, periodicity=2): | |
super(self.__class__, self).__init__() | |
self._epochCount = 0 | |
self._periodicity = 2 | |
def OnStart(self, agent): | |
if (self._active == False): | |
return | |
self._epochCount = 0 | |
def OnEpochEnd(self, agent): | |
if (self._active == False): | |
return | |
self._epochCount += 1 | |
if (self._epochCount % self._periodicity) == 0: | |
#prob not a good idea? | |
agent._environment._ple.display_screen = True | |
else: | |
agent._environment._ple.display_screen = False | |
class PastaLogController(Controller): | |
def __init__(self, model_name=None, host="https://localhost:9000", loss_periodicity=100): | |
super(self.__class__, self).__init__() | |
self._loss_periodicity = loss_periodicity | |
self._action_count = 0 | |
self._log = Log(host, model_name) | |
def OnStart(self, agent): | |
if (self._active == False): | |
return | |
self._action_count = 0 | |
self._log.post("learning_rate", value=agent.learningRate(), step=self._action_count) | |
self._log.post("discount", value=agent.discountFactor(), step=self._action_count) | |
self._log.post("epsilon", value=agent.epsilon(), step=self._action_count) | |
def OnActionTaken(self, agent): | |
if (self._active == False): | |
return | |
if self._action_count % self._loss_periodicity == 0: | |
bell_res = float(agent.avgBellmanResidual()) | |
ep_v = float(agent.avgEpisodeVValue()) | |
self._log.post("avg_bellman_residual", value=bell_res, step=self._action_count) | |
self._log.post("avg_episode_value", value=ep_v, step=self._action_count) | |
self._action_count += 1 | |
def OnEpochEnd(self, agent): | |
if (self._active == False): | |
return | |
self._log.post("learning_rate", value=agent.learningRate(), step=self._action_count) | |
self._log.post("discount", value=agent.discountFactor(), step=self._action_count) | |
self._log.post("epsilon", value=agent.epsilon(), step=self._action_count) | |
ex = Experiment("dqn_trn") | |
@ex.config | |
def config(): | |
#game specific | |
game_cfg = { | |
"width": 64, | |
"height": 64, | |
"name": "WaterWorld", | |
"other": { | |
"num_creeps": 8 | |
}, | |
"allowed": ["WaterWorld", "PuckWorld", "Snake", "RaycastMaze"] | |
} | |
assert game_cfg["name"] in game_cfg["allowed"], ("Game must be in games['allowed']:", game_cfg["allowed"]) | |
#ple specific | |
ple_cfg = { | |
"fps": 30, | |
"frame_skip": 3, | |
"force_fps": True, | |
"display_screen": False, | |
"add_noop_action": True | |
} | |
#deeprl specific | |
deeprl_cfg = { | |
"steps_per_epoch": 90000, | |
"epochs": 30, | |
"steps_per_test": 30000, | |
"discount_inc_periodicity": 1, | |
"learning_decay_periodicity": 1 | |
} | |
dqn_agent_cfg = { | |
"update_rule": "adam", | |
"batch_accumulator": "sum", | |
"learning_rate_init": 0.01, | |
"learning_rate_final": 1e-4, | |
"discount_init": 0.1, | |
"discount_max_at": 15, | |
"discount_max": 0.99, | |
"clip_delta": 1.0, | |
"epsilon_init": 1.0, | |
"epsilon_min": 0.1, | |
"epsilon_decay": deeprl_cfg["steps_per_epoch"]*5, | |
"update_frequency": 1, | |
"replay_memory_size": 1000000, | |
"batch_size": 32, | |
"freeze_interval": 10000, | |
"deterministic": True | |
} | |
#live in the moment @ the start and go for longer preds later | |
dqn_agent_cfg["discount_inc"] = (dqn_agent_cfg["discount_max"] - dqn_agent_cfg["discount_init"]) / dqn_agent_cfg["discount_max_at"] | |
dqn_agent_cfg["learning_rate_decay"] = np.exp( | |
np.log(dqn_agent_cfg["learning_rate_final"] / dqn_agent_cfg["learning_rate_init"]) / (deeprl_cfg["epochs"]/deeprl_cfg["learning_decay_periodicity"]) | |
) | |
def init_game(game_cfg): | |
game = getattr(importlib.import_module("ple.games.%s" % game_cfg["name"].lower()), game_cfg["name"]) | |
if "num_creeps" in game_cfg["other"] and game_cfg["name"] == "WaterWorld": | |
game = game( | |
width=game_cfg["width"], | |
height=game_cfg["height"], | |
num_creeps=game_cfg["other"]["num_creeps"]) | |
else: | |
game = game(width=game_cfg["width"], height=game_cfg["height"]) | |
return game | |
@ex.automain | |
def run(game_cfg, ple_cfg, deeprl_cfg, dqn_agent_cfg, _rnd): | |
rng = _rnd | |
game = init_game(game_cfg) | |
env = PLE_env(rng, game=game, **ple_cfg) | |
q_network = MyQNetwork( | |
env, | |
0.9, | |
0.0001, | |
0.0, | |
dqn_agent_cfg["clip_delta"], | |
dqn_agent_cfg["freeze_interval"], | |
dqn_agent_cfg["batch_size"], | |
"General_DQN_0", | |
"deepmind_rmsprop", | |
dqn_agent_cfg["batch_accumulator"], | |
rng | |
) | |
agent = PLEAgent( | |
env, | |
q_network, | |
dqn_agent_cfg["replay_memory_size"], | |
max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))), | |
dqn_agent_cfg["batch_size"], | |
rng | |
) | |
#prints summary of performance before each epoch | |
agent.attach(VerboseController( | |
evaluateOn="epoch", | |
periodicity=1 | |
)) | |
#decays the discount factor over time | |
agent.attach(LearningRateController( | |
initialLearningRate=dqn_agent_cfg["learning_rate_init"], | |
learningRateDecay=dqn_agent_cfg["learning_rate_decay"], | |
periodicity=deeprl_cfg["learning_decay_periodicity"] | |
)) | |
#increases the discount factor over time | |
agent.attach(DiscountFactorController( | |
initialDiscountFactor=dqn_agent_cfg["discount_init"], | |
discountFactorGrowth=dqn_agent_cfg["discount_inc"], | |
discountFactorMax=dqn_agent_cfg["discount_max"], | |
periodicity=deeprl_cfg["discount_inc_periodicity"] | |
)) | |
agent.attach(EpsilonController( | |
initialE=dqn_agent_cfg["epsilon_init"], | |
eDecays=dqn_agent_cfg["epsilon_decay"], | |
eMin=dqn_agent_cfg["epsilon_min"], | |
evaluateOn="action", | |
periodicity=1, | |
resetEvery="none" | |
)) | |
#pretty live graphs | |
agent.attach(PastaLogController( | |
model_name="DQN_%s" % game_cfg['name'], | |
host="http://localhost:8000", | |
loss_periodicity=500 | |
)) | |
agent.attach(InterleavedTestEpochController( | |
id=PLE_env.VALIDATION_MODE, | |
epochLength=deeprl_cfg["steps_per_test"], | |
controllersToDisable=[0, 1, 2, 3, 4], | |
periodicity=2, | |
showScore=True, | |
summarizeEvery=1 | |
)) | |
#just toggles the display on and off so we can watch | |
agent.attach(PLEDisplayToggleController( | |
periodicity=2 | |
)) | |
agent.run(deeprl_cfg["epochs"], deeprl_cfg["steps_per_epoch"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Interface with the PLE environment | |
Authors: Vincent Francois-Lavet, David Taralla | |
Modified by: Norman Tasfi | |
""" | |
import numpy as np | |
import cv2 | |
from ple import PLE | |
from deeprl.base_classes import Environment | |
from mpl_toolkits.axes_grid1 import host_subplot | |
import mpl_toolkits.axisartist as AA | |
import matplotlib.pyplot as plt | |
class MyEnv(Environment): | |
VALIDATION_MODE = 0 | |
def __init__(self, rng, game=None, frame_skip=4, | |
display_screen=True, add_noop_action=True, force_fps=True, fps=30): | |
self._mode = -1 | |
self._modeScore = 0.0 | |
self._modeEpisodeCount = 0 | |
self._frameSkip = frame_skip if frame_skip >= 1 else 1 | |
self._randomState = rng | |
if game is None: | |
raise ValueError("Game must be provided") | |
#always default ple to 1, let the env skip. | |
self._ple = PLE(game, frame_skip=1, display_screen=display_screen, | |
add_noop_action=add_noop_action, force_fps=force_fps, fps=fps) | |
self._ple.init() | |
w, h = self._ple.getScreenDims() | |
self._screen = np.empty((h, w), dtype=np.uint8) | |
self._reducedScreen = np.empty((48, 48), dtype=np.uint8) | |
self._actions = self._ple.getActionSet() | |
def reset(self, mode): | |
if mode == MyEnv.VALIDATION_MODE: | |
if self._mode != MyEnv.VALIDATION_MODE: | |
self._mode = MyEnv.VALIDATION_MODE | |
self._modeScore = 0.0 | |
self._modeEpisodeCount = 0 | |
else: | |
self._modeEpisodeCount += 1 | |
elif self._mode != -1: # and thus mode == -1 | |
self._mode = -1 | |
self._ple.reset_game() | |
for _ in range(self._randomState.randint(15)): | |
self._ple.act(self._ple.NOOP) | |
self._screen = self._ple.getScreenGrayscale() | |
cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST) | |
return [4 * [48 * [48 * [0]]]] | |
def act(self, action): | |
action = self._actions[action] | |
reward = 0 | |
for _ in range(self._frameSkip): | |
reward += self._ple.act(action) | |
if self.inTerminalState(): | |
break | |
self._screen = self._ple.getScreenGrayscale() | |
cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST) | |
self._modeScore += reward | |
return np.sign(reward) | |
def summarizePerformance(self, test_data_set): | |
if self.inTerminalState() == False: | |
self._modeEpisodeCount += 1 | |
print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) | |
def inputDimensions(self): | |
return [(4, 48, 48)] | |
def observationType(self, subject): | |
return np.uint8 | |
def nActions(self): | |
return len(self._actions) | |
def observe(self): | |
return [np.array(self._reducedScreen)] | |
def inTerminalState(self): | |
return self._ple.game_over() | |
if __name__ == "__main__": | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment