pyliaorachel · June 15, 2018 05:12
diff --git a/dqn_modify_reward.py b/dqn_modify_reward.py
 ...
 next_state, reward, done, info = env.step(action)

 # 修改 reward，加快訓練
 x, v, theta, omega = next_state
 r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 # 小車離中間越近越好
 r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 # 柱子越正越好
 reward = r1 + r2

 dqn.store_transition(state, action, reward, next_state)
 ...
	...
	next_state, reward, done, info = env.step(action)

	# 修改 reward，加快訓練
	x, v, theta, omega = next_state
	r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 # 小車離中間越近越好
	r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 # 柱子越正越好
	reward = r1 + r2

	dqn.store_transition(state, action, reward, next_state)
	...