Last active
November 4, 2021 15:03
-
-
Save zkytony/51d43ee6818375434eb3b84a77a47a5c to your computer and use it in GitHub Desktop.
Defining and solving a small POMDP using pomdp-py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Example of defining a small, tabular POMDP and solving | |
it using Cassandra's pomdp-solve value iteration solver. | |
Refer to documentation: | |
https://h2r.github.io/pomdp-py/html/examples.external_solvers.html | |
""" | |
import pomdp_py | |
def cryingbaby(): | |
"""This is a POMDP defined in the Algorithms for Decision Making book | |
by M. J. Kochenderfer et al. in section F.7""" | |
S = ['hungry', 'sated'] | |
A = ['feed', 'sing', 'ignore'] | |
Z = ['crying', 'quiet'] | |
T = pomdp_py.TabularTransitionModel({ | |
# state, action, next state | |
('hungry', 'feed', 'sated'): 1.0, | |
('hungry', 'feed', 'hungry'): 0.0, | |
('hungry', 'sing', 'hungry'): 1.0, | |
('hungry', 'sing', 'sated'): 0.0, | |
('hungry', 'ignore', 'hungry'): 1.0, | |
('hungry', 'ignore', 'sated'): 0.0, | |
('sated', 'feed', 'sated'): 1.0, | |
('sated', 'feed', 'hungry'): 0.0, | |
('sated', 'sing', 'hungry'): 0.1, | |
('sated', 'sing', 'sated'): 0.9, | |
('sated', 'ignore', 'hungry'): 0.1, | |
('sated', 'ignore', 'sated'): 0.9 | |
}) | |
O = pomdp_py.TabularObservationModel({ | |
# state, action, observation | |
('hungry', 'feed', 'crying'): 0.8, | |
('hungry', 'feed', 'quiet'): 0.2, | |
('hungry', 'sing', 'crying'): 0.9, | |
('hungry', 'sing', 'quiet'): 0.1, | |
('hungry', 'ignore', 'crying'): 0.8, | |
('hungry', 'ignore', 'quiet'): 0.2, | |
('sated', 'feed', 'crying'): 0.1, | |
('sated', 'feed', 'quiet'): 0.9, | |
('sated', 'sing', 'crying'): 0.1, | |
('sated', 'sing', 'quiet'): 0.9, | |
('sated', 'ignore', 'crying'): 0.1, | |
('sated', 'ignore', 'quiet'): 0.9, | |
}) | |
R = pomdp_py.TabularRewardModel({ | |
# state, action | |
('hungry', 'feed'): -10 - 5, | |
('hungry', 'sing'): -10 - 0.5, | |
('hungry', 'ignore'): -10, | |
('sated', 'feed'): -5, | |
('sated', 'sing'): -0.5, | |
('sated', 'ignore'): 0 | |
}) | |
gamma = 0.9 | |
return S, A, Z, T, O, R, gamma | |
if __name__ == "__main__": | |
S, A, Z, T, O, R, gamma = cryingbaby() | |
pi = pomdp_py.UniformPolicyModel(A) | |
b0 = pomdp_py.Histogram({"hungry": 0.22, | |
"sated": 0.78}) | |
agent = pomdp_py.Agent(b0, pi, T, O, R) | |
horizon = 5 | |
filename = "cryingbaby.POMDP" | |
pomdp_py.to_pomdp_file(agent, filename, discount_factor=gamma) | |
# path to the pomdp-solve binary | |
pomdp_solve_path = "/home/kaiyuzh/software/pomdp-solve-5.4/src/pomdp-solve" | |
policy = pomdp_py.vi_pruning(agent, pomdp_solve_path, | |
discount_factor=gamma, | |
options=["-horizon", horizon], | |
remove_generated_files=False, | |
return_policy_graph=False) | |
print(pomdp_py.value(agent.belief, S, A, Z, T, O, R, gamma, horizon=horizon)) | |
state = "hungry" # true initial state | |
for step in range(10): | |
action = policy.plan(agent) | |
next_state = T.sample(state, action) | |
reward = R.sample(state, action, next_state) | |
observation = O.sample(next_state, action) | |
print(f"step = {step+1}" | |
f"\t|\taction: {action}" | |
f"\t|\tobservation: {observation}" | |
f"\t|\tstate: {state} " | |
f"\t|\treward: {reward}" | |
f"\t|\tbelief: {agent.belief}") | |
# update agent belief | |
next_belief = pomdp_py.belief_update(agent.belief, action, observation, T, O) | |
agent.set_belief(pomdp_py.Histogram(next_belief)) | |
# apply state transition to the environment | |
state = next_state |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment