import mdp import random class Qlearner : def __init__(self, states, actions, alpha, gamma, world) : self.qtable = {} self.states = states self.actions = actions for s in states : for a in actions : self.qtable[(s,a)] = 0.0 self.world = world self.temp = 2.0 self.alpha = alpha self.gamma = gamma ### use boltzmann exploration to select an action ### you do this one def selectAction(self, state) : ### you do this one def update(self, oldstate, action, newstate, reward) : class World : def __init__(self, map) : self.map = map ### move the agent from one state to another according to the map's ### transition function. def newState(self, statenum, action) : ttable = self.map.states[statenum].transitions[action] val = random.random() total = 0.0 i = -1 while total < val : i += 1 total += ttable[i][0] return ttable[i][1].coords def atGoal(self, state) : return self.map.states[state].isGoal def reward(self, state) : if self.map.states[state].isGoal : return self.map.states[state].utility else : return -0.04 ### you do this one. def learn() :