import random
student_name = "Type your full name here."
# 1. Q-Learning
class QLearningAgent:
"""Implement Q Reinforcement Learning Agent using Q-table."""
def __init__(self, game, discount, learning_rate, explore_prob):
"""Store any needed parameters into the agent object.
Initialize Q-table.
"""
... # TODO
def get_q_value(self, state, action):
"""Retrieve Q-value from Q-table.
For an never seen (s,a) pair, the Q-value is by default 0.
"""
return 0 # TODO
def get_value(self, state):
"""Compute state value from Q-values using Bellman Equation.
V(s) = max_a Q(s,a)
"""
return 0 # TODO
def get_best_policy(self, state):
"""Compute the best action to take in the state using Policy Extraction.
π(s) = argmax_a Q(s,a)
If there are ties, return a random one for better performance.
Hint: use random.choice().
"""
return None # TODO
def update(self, state, action, next_state, reward):
"""Update Q-values using running average.
Q(s,a) = (1 - α) Q(s,a) + α (R + γ V(s'))
Where α is the learning rate, and γ is the discount.
Note: You should not call this function in your code.
"""
... # TODO
# 2. Epsilon Greedy
def get_action(self, state):
"""Compute the action to take for the agent, incorporating exploration.
That is, with probability ε, act randomly.
Otherwise, act according to the best policy.
Hint: use random.random() < ε to check if exploration is needed.
"""
return None # TODO
# 3. Bridge Crossing Revisited
def question3():
epsilon = ...
learning_rate = ...
return epsilon, learning_rate
# If not possible, return 'NOT POSSIBLE'
# 5. Approximate Q-Learning
class ApproximateQAgent(QLearningAgent):
"""Implement Approximate Q Learning Agent using weights."""
def __init__(self, *args, extractor):
"""Initialize parameters and store the feature extractor.
Initialize weights table."""
super().__init__(*args)
... # TODO
def get_weight(self, feature):
"""Get weight of a feature.
Never seen feature should have a weight of 0.
"""
return 0 # TODO
def get_q_value(self, state, action):
"""Compute Q value based on the dot product of feature components and weights.
Q(s,a) = w_1 * f_1(s,a) + w_2 * f_2(s,a) + ... + w_n * f_n(s,a)
"""
return 0 # TODO
def update(self, state, action, next_state, reward):
"""Update weights using least-squares approximation.
Δ = R + γ V(s') - Q(s,a)
Then update weights: w_i = w_i + α * Δ * f_i(s, a)
"""
... # TODO
# 6. Feedback
# Just an approximation is fine.
feedback_question_1 = 0
feedback_question_2 = """
Type your response here.
Your response may span multiple lines.
Do not include these instructions in your response.
"""
feedback_question_3 = """
Type your response here.
Your response may span multiple lines.
Do not include these instructions in your response.
"""