Created
October 4, 2019 09:21
Revisions
-
codefever created this gist
Oct 4, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,146 @@ #!/usr/bin/env python from enum import IntEnum import math import random import logging logging.basicConfig(level=logging.INFO) GRID = [ [0,0,0,0,0], [1,1,1,0,1], [1,0,0,0,1], [0,0,1,1,1], [1,0,0,0,0], ] class Direction(IntEnum): LEFT = 0 RIGHT = 1 TOP = 2 DOWN = 3 DIRS = { Direction.LEFT: (0,-1), Direction.RIGHT: (0,1), Direction.TOP: (-1,0), Direction.DOWN: (1,0), } def _get_dir(src, dst): if src[1] != dst[1]: return Direction.LEFT if src[1] - dst[1] == 1 else Direction.RIGHT else: return Direction.TOP if src[0] - dst[0] == 1 else Direction.DOWN def neighbors(grid, y, x): for d in DIRS.values(): yy = y + d[0] xx = x + d[1] if yy >= 0 and xx >= 0 and yy < len(grid) and xx < len(grid[0]): if grid[yy][xx] > 0: continue yield yy, xx def bfs(grid): start = (0, 0) end = (len(grid)-1, len(grid[0])-1) visited = [[False for _ in range(len(grid[0]))] for _ in range(len(grid))] visited[start[0]][start[1]] = True q = [start] dist = 0 while q: new_q = [] while q: origin, q = q[0], q[1:] if origin == end: return dist for nn in neighbors(grid, origin[0], origin[1]): if visited[nn[0]][nn[1]]: continue visited[nn[0]][nn[1]] = True new_q.append(nn) dist += 1 q = new_q return -1 def qlearning(grid): m, n = len(grid), len(grid[0]) end = (m-1, n-1) start = (0, 0) qtable = [[[0.0]*len(Direction) for _ in range(n)] for _ in range(m)] def reward(node, todir): d = DIRS[todir] if (node[0] + d[0], node[1] + d[1]) == end: # if it could reach the end, it would gain a greate bounus. return m*n return 1.0 def get_path(): node = start path = [] steps = 0 max_steps = m*n while node != end: val = -float('infinity') candidate = None todir = None for nn in neighbors(grid, node[0], node[1]): d = _get_dir(node, nn) if qtable[node[0]][node[1]][d.value] > val: val = qtable[node[0]][node[1]][d.value] candidate = nn todir = d path.append(todir.name[0]) node = candidate steps += 1 if steps >= max_steps: return None return ''.join(path) gramma = 0.777 alpha = 0.5 num_episodes = 20 for i in range(num_episodes): node = start steps = 0 eprate = math.exp(-i*0.1) # exploit if > while steps < 100 and node != end: valid_nn = list(neighbors(grid, node[0], node[1])) if random.random() > eprate: # exploitation val = -float('infinity') todir = None for tmp in valid_nn: tmp_dir = _get_dir(node, tmp) if qtable[node[0]][node[1]][tmp_dir] > val: nn = tmp todir = tmp_dir else: # explorartion nn = random.choice(valid_nn) todir = _get_dir(node, nn) # update table valid_tmps = [qtable[nn[0]][nn[1]][_get_dir(nn, tmp).value] for tmp in neighbors(grid, nn[0], nn[1])] qtable[node[0]][node[1]][todir.value] += alpha * (reward(node, todir) + gramma * max(valid_tmps) - qtable[node[0]][node[1]][todir.value]) # assert nn != node node = nn steps += 1 path = get_path() logging.debug('episode[{}] exploit=[{}] reach_end=[{}], path=[{}], steps=[{}]'.format(i, eprate, node==end, path, len(path) if path else 'INF')) path = get_path() return len(path) if path else -1 if __name__ == '__main__': print(bfs(GRID)) print(qlearning(GRID))