Qlearning

𝔸𝕝𝕘𝕖𝕣

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "GAMMA = 0.9\n",
    "LEARNING_RATE = 0.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == \"__main__\":\n",
    "    with open(\"Qtable.txt\", \"rb\") as f:\n",
    "        \"\"\"\n",
    "        This code is responsible for loading Qtable.txt if already present\n",
    "        \"\"\"\n",
    "        Q_table = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def next_best_action(state: int, Q_table: np.ndarray) -> int:\n",
    "    \"\"\"\n",
    "    Return the most suitable action value from Q_table or a random action\n",
    "    if there is no np.argmax(Q_table[state]).\n",
    "    >>> next_best_action(1, []) in [0, 1, 2, 3]\n",
    "    True\n",
    "    \"\"\"\n",
    "    action = np.argmax(Q_table[state])\n",
    "    return action if action is not None else np.random.choice([0, 1, 2, 3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def state_action_reward(player: object, x_food: int, y_food: int) -> tuple:\n",
    "    \"\"\"\n",
    "    This function returns state, action and reward to update the Qtable.\n",
    "    \"\"\"\n",
    "    x_agent, y_agent = player.body[0].pos\n",
    "    current_state = state(player, x_food, y_food)\n",
    "    current_action = next_best_action(state, Q_table)\n",
    "    current_reward = reward(player, x_food, y_food)\n",
    "    return (current_state, current_action, current_reward)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# States to consider:\n",
    "#   * Food's relative positioning wrt the head\n",
    "# Checking for food in nine directions\n",
    "###\n",
    "# #\n",
    "###\n",
    "#   * obstruction Ahead, Right, Left\n",
    "\n",
    "def state(player: object, x_food: int, y_food: int) -> int:\n",
    "    \"\"\"\n",
    "    This function Checks for the food in nine directions and returns state.\n",
    "    \"\"\"\n",
    "    x_agent, y_agent = player.body[0].pos\n",
    "    states = []\n",
    "\n",
    "    # Code to check for obstacles in front of the agent\n",
    "    DangerAhead = (\n",
    "        (player.dirnx == -1 and x_agent + player.delx < 0)\n",
    "        or (player.dirnx == -1 and ((x_agent + player.delx, y_agent) in player.body))\n",
    "        or (player.dirnx == 1 and x_agent + player.delx > 14)\n",
    "        or (player.dirnx == 1 and ((x_agent + player.delx, y_agent) in player.body))\n",
    "        or (player.dirny == 1 and y_agent + player.dely > 14)\n",
    "        or (player.dirny == 1 and ((x_agent, y_agent + player.dely) in player.body))\n",
    "        or (player.dirny == -1 and y_agent + player.dely < 0)\n",
    "        or (player.dirny == -1 and ((x_agent, y_agent + player.dely) in player.body))\n",
    "    )\n",
    "    # Code to check for obstacles to the left of the agent\n",
    "    DangerLeft = (\n",
    "        (player.dirnx == -1 and y_agent + 1 > 14)\n",
    "        or (player.dirnx == -1 and ((x_agent, y_agent + 1) in player.body))\n",
    "        or (player.dirnx == 1 and y_agent - 1 < 0)\n",
    "        or (player.dirnx == 1 and ((x_agent, y_agent - 1) in player.body))\n",
    "        or (player.dirny == 1 and x_agent + 1 > 14)\n",
    "        or (player.dirny == 1 and ((x_agent + 1, y_agent) in player.body))\n",
    "        or (player.dirny == -1 and x_agent - 1 < 0)\n",
    "        or (player.dirny == -1 and ((x_agent - 1, y_agent) in player.body))\n",
    "    )\n",
    "    # Code to check for obstacles to the right of the agent\n",
    "    DangerRight = (\n",
    "        (player.dirnx == -1 and y_agent - 1 < 0)\n",
    "        or (player.dirnx == -1 and ((x_agent, y_agent - 1) in player.body))\n",
    "        or (player.dirnx == 1 and y_agent + 1 > 14)\n",
    "        or (player.dirnx == 1 and ((x_agent, y_agent + 1) in player.body))\n",
    "        or (player.dirny == 1 and x_agent - 1 < 0)\n",
    "        or (player.dirny == 1 and ((x_agent - 1, y_agent) in player.body))\n",
    "        or (player.dirny == -1 and x_agent + 1 > 14)\n",
    "        or (player.dirny == -1 and ((x_agent + 1, y_agent) in player.body))\n",
    "    )\n",
    "    # Code for food straight wrt head\n",
    "    FoodStraightAhead = (\n",
    "        (player.dirnx == 1 and y_agent == y_food and x_food > x_agent)\n",
    "        or (player.dirnx == -1 and y_agent == y_food and x_food < x_agent)\n",
    "        or (player.dirny == 1 and y_agent < y_food and x_food == x_agent)\n",
    "        or (player.dirny == -1 and y_agent > y_food and x_food == x_agent)\n",
    "    )\n",
    "    # Code for food which is ahead and right wrt head\n",
    "    FoodAheadRight = (\n",
    "        (player.dirnx == 1 and y_agent < y_food and x_food > x_agent)\n",
    "        or (player.dirnx == -1 and y_agent > y_food and x_food < x_agent)\n",
    "        or (player.dirny == 1 and y_agent < y_food and x_food < x_agent)\n",
    "        or (player.dirny == -1 and y_agent > y_food and x_food > x_agent)\n",
    "    )\n",
    "    # Code for food which is ahead and right wrt head\n",
    "    FoodAheadLeft = (\n",
    "        (player.dirnx == 1 and y_agent > y_food and x_food > x_agent)\n",
    "        or (player.dirnx == -1 and y_agent < y_food and x_food < x_agent)\n",
    "        or (player.dirny == 1 and y_agent < y_food and x_food > x_agent)\n",
    "        or (player.dirny == -1 and y_agent > y_food and x_food < x_agent)\n",
    "    )\n",
    "    # Code for food which is ahead and right wrt head\n",
    "    FoodBehindRight = (\n",
    "        (player.dirnx == 1 and y_agent < y_food and x_food < x_agent)\n",
    "        or (player.dirnx == -1 and y_agent > y_food and x_food > x_agent)\n",
    "        or (player.dirny == 1 and y_agent > y_food and x_food < x_agent)\n",
    "        or (player.dirny == -1 and y_agent < y_food and x_food > x_agent)\n",
    "    )\n",
    "    # Code for food which is ahead and right wrt head\n",
    "    FoodBehindLeft = (\n",
    "        (player.dirnx == 1 and y_agent > y_food and x_food < x_agent)\n",
    "        or (player.dirnx == -1 and y_agent < y_food and x_food > x_agent)\n",
    "        or (player.dirny == 1 and y_agent > y_food and x_food > x_agent)\n",
    "        or (player.dirny == -1 and y_agent < y_food and x_food < x_agent)\n",
    "    )\n",
    "    # Code for food exactly behind\n",
    "    FoodBehind = (\n",
    "        (player.dirnx == 1 and y_agent == y_food and x_food < x_agent)\n",
    "        or (player.dirnx == -1 and y_agent == y_food and x_food > x_agent)\n",
    "        or (player.dirny == 1 and y_agent > y_food and x_food == x_agent)\n",
    "        or (player.dirny == -1 and y_agent < y_food and x_food == x_agent)\n",
    "    )\n",
    "    # Code for food left\n",
    "    FoodLeft = (\n",
    "        (player.dirnx == 1 and y_agent > y_food and x_food == x_agent)\n",
    "        or (player.dirnx == -1 and y_agent < y_food and x_food == x_agent)\n",
    "        or (player.dirny == 1 and y_agent == y_food and x_food > x_agent)\n",
    "        or (player.dirny == -1 and y_agent == y_food and x_food < x_agent)\n",
    "    )\n",
    "    # Code for food right\n",
    "    FoodRight = (\n",
    "        (player.dirnx == 1 and y_agent < y_food and x_food == x_agent)\n",
    "        or (player.dirnx == -1 and y_agent > y_food and x_food == x_agent)\n",
    "        or (player.dirny == 1 and y_agent == y_food and x_food < x_agent)\n",
    "        or (player.dirny == -1 and y_agent == y_food and x_food > x_agent)\n",
    "    )\n",
    "    # Adding to states list while priortizing danger over eating food\n",
    "    states = [int(x) for x in (DangerAhead, DangerLeft, DangerRight)]\n",
    "    if sum(states) == 0:\n",
    "        states += [int(x) for x in (\n",
    "            FoodStraightAhead, FoodAheadRight, FoodAheadLeft, FoodBehindRight,\n",
    "            FoodBehindLeft, FoodBehind, FoodLeft, FoodRight\n",
    "        )]\n",
    "    else:\n",
    "        states += [0] * 8\n",
    "    state = 0\n",
    "    for i in range(11):\n",
    "        if states[i] == 1:\n",
    "            state = i\n",
    "    return state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def euler_dist(x1: int, y1: int, x2: int, y2: int) -> int:\n",
    "    \"\"\"\n",
    "    For calculation of Euler Distance.\n",
    "    \"\"\"\n",
    "    dist = (x1 - x2) ** 2 + (y1 - y2) ** 2\n",
    "    return dist ** 0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reward conditions:\n",
    "#   * +10 for eating food\n",
    "#   * -12 for dying\n",
    "#   * -2 for getting closer\n",
    "#   * -7 for going away from the fruit\n",
    "\n",
    "\n",
    "def reward(player: object, x_food: int, y_food: int) -> int:\n",
    "    \"\"\"\n",
    "    This function assigns the reward to the agent according to the action taken\n",
    "    \"\"\"\n",
    "    x_agent, y_agent = player.body[0].pos\n",
    "    if x_agent == x_food and y_agent == y_food:\n",
    "        return +10\n",
    "    elif (\n",
    "        (player.dirnx == -1 and x_agent + player.delx <= 0)\n",
    "        or (\n",
    "            player.dirnx == -1\n",
    "            and ((x_agent + player.delx, y_agent + player.dely) in player.body)\n",
    "        )\n",
    "        or (player.dirnx == 1 and x_agent + player.delx >= 14)\n",
    "        or (\n",
    "            player.dirnx == 1\n",
    "            and ((x_agent + player.dirnx, y_agent + player.dely) in player.body)\n",
    "        )\n",
    "        or (player.dirny == 1 and y_agent + player.dely >= 14)\n",
    "        or (\n",
    "            player.dirny == 1\n",
    "            and ((x_agent + player.delx, y_agent + player.dely) in player.body)\n",
    "        )\n",
    "        or (player.dirny == -1 and y_agent + player.dely <= 0)\n",
    "        or (\n",
    "            player.dirny == -1\n",
    "            and ((x_agent + player.delx, y_agent + player.dely) in player.body)\n",
    "        )\n",
    "        or (player.resetDone is True)\n",
    "    ):\n",
    "        return -12\n",
    "    elif (\n",
    "        euler_dist(x_agent + player.delx, y_agent + player.dely, x_food, y_food)\n",
    "        - euler_dist(x_agent, y_agent, x_food, y_food)\n",
    "        > 0\n",
    "    ):\n",
    "        return -2\n",
    "    elif (\n",
    "        euler_dist(x_agent + player.delx, y_agent + player.dely, x_food, y_food)\n",
    "        - euler_dist(x_agent, y_agent, x_food, y_food)\n",
    "        < 0\n",
    "    ):\n",
    "        return -7\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def learn(\n",
    "        state: int, action: int, reward: int, next_state: int,\n",
    "        next_action: int, i: int, trialNumber: int, epsilon: float) -> type(None):\n",
    "    \"\"\"\n",
    "    This function is for iteratively updating the Qtable.\n",
    "    \"\"\"\n",
    "    currentQ = Q_table[state][action]\n",
    "    nextQ = Q_table[next_state][next_action]\n",
    "    # Qlearning Algorithm to get new q value for the q table.\n",
    "    newQ = (1 - LEARNING_RATE) * currentQ + LEARNING_RATE * (reward + GAMMA * nextQ)\n",
    "    Q_table[state][action] = newQ\n",
    "    state = next_state\n",
    "    currentQ = nextQ\n",
    "    if trialNumber % 100 == 0:\n",
    "        print(\"Printing Q_table: \")\n",
    "        print(Q_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

Try this Code

About this Algorithm

import numpy as np
import pickle

GAMMA = 0.9
LEARNING_RATE = 0.6

if __name__ == "__main__":
    with open("Qtable.txt", "rb") as f:
        """
        This code is responsible for loading Qtable.txt if already present
        """
        Q_table = pickle.load(f)

def next_best_action(state: int, Q_table: np.ndarray) -&gt; int:
    """
    Return the most suitable action value from Q_table or a random action
    if there is no np.argmax(Q_table[state]).
    &gt;&gt;&gt; next_best_action(1, []) in [0, 1, 2, 3]
    True
    """
    action = np.argmax(Q_table[state])
    return action if action is not None else np.random.choice([0, 1, 2, 3])

def state_action_reward(player: object, x_food: int, y_food: int) -&gt; tuple:
    """
    This function returns state, action and reward to update the Qtable.
    """
    x_agent, y_agent = player.body[0].pos
    current_state = state(player, x_food, y_food)
    current_action = next_best_action(state, Q_table)
    current_reward = reward(player, x_food, y_food)
    return (current_state, current_action, current_reward)

# States to consider:
#   * Food's relative positioning wrt the head
# Checking for food in nine directions
###
# #
###
#   * obstruction Ahead, Right, Left

def state(player: object, x_food: int, y_food: int) -&gt; int:
    """
    This function Checks for the food in nine directions and returns state.
    """
    x_agent, y_agent = player.body[0].pos
    states = []

    # Code to check for obstacles in front of the agent
    DangerAhead = (
        (player.dirnx == -1 and x_agent + player.delx &lt; 0)
        or (player.dirnx == -1 and ((x_agent + player.delx, y_agent) in player.body))
        or (player.dirnx == 1 and x_agent + player.delx &gt; 14)
        or (player.dirnx == 1 and ((x_agent + player.delx, y_agent) in player.body))
        or (player.dirny == 1 and y_agent + player.dely &gt; 14)
        or (player.dirny == 1 and ((x_agent, y_agent + player.dely) in player.body))
        or (player.dirny == -1 and y_agent + player.dely &lt; 0)
        or (player.dirny == -1 and ((x_agent, y_agent + player.dely) in player.body))
    )
    # Code to check for obstacles to the left of the agent
    DangerLeft = (
        (player.dirnx == -1 and y_agent + 1 &gt; 14)
        or (player.dirnx == -1 and ((x_agent, y_agent + 1) in player.body))
        or (player.dirnx == 1 and y_agent - 1 &lt; 0)
        or (player.dirnx == 1 and ((x_agent, y_agent - 1) in player.body))
        or (player.dirny == 1 and x_agent + 1 &gt; 14)
        or (player.dirny == 1 and ((x_agent + 1, y_agent) in player.body))
        or (player.dirny == -1 and x_agent - 1 &lt; 0)
        or (player.dirny == -1 and ((x_agent - 1, y_agent) in player.body))
    )
    # Code to check for obstacles to the right of the agent
    DangerRight = (
        (player.dirnx == -1 and y_agent - 1 &lt; 0)
        or (player.dirnx == -1 and ((x_agent, y_agent - 1) in player.body))
        or (player.dirnx == 1 and y_agent + 1 &gt; 14)
        or (player.dirnx == 1 and ((x_agent, y_agent + 1) in player.body))
        or (player.dirny == 1 and x_agent - 1 &lt; 0)
        or (player.dirny == 1 and ((x_agent - 1, y_agent) in player.body))
        or (player.dirny == -1 and x_agent + 1 &gt; 14)
        or (player.dirny == -1 and ((x_agent + 1, y_agent) in player.body))
    )
    # Code for food straight wrt head
    FoodStraightAhead = (
        (player.dirnx == 1 and y_agent == y_food and x_food &gt; x_agent)
        or (player.dirnx == -1 and y_agent == y_food and x_food &lt; x_agent)
        or (player.dirny == 1 and y_agent &lt; y_food and x_food == x_agent)
        or (player.dirny == -1 and y_agent &gt; y_food and x_food == x_agent)
    )
    # Code for food which is ahead and right wrt head
    FoodAheadRight = (
        (player.dirnx == 1 and y_agent &lt; y_food and x_food &gt; x_agent)
        or (player.dirnx == -1 and y_agent &gt; y_food and x_food &lt; x_agent)
        or (player.dirny == 1 and y_agent &lt; y_food and x_food &lt; x_agent)
        or (player.dirny == -1 and y_agent &gt; y_food and x_food &gt; x_agent)
    )
    # Code for food which is ahead and right wrt head
    FoodAheadLeft = (
        (player.dirnx == 1 and y_agent &gt; y_food and x_food &gt; x_agent)
        or (player.dirnx == -1 and y_agent &lt; y_food and x_food &lt; x_agent)
        or (player.dirny == 1 and y_agent &lt; y_food and x_food &gt; x_agent)
        or (player.dirny == -1 and y_agent &gt; y_food and x_food &lt; x_agent)
    )
    # Code for food which is ahead and right wrt head
    FoodBehindRight = (
        (player.dirnx == 1 and y_agent &lt; y_food and x_food &lt; x_agent)
        or (player.dirnx == -1 and y_agent &gt; y_food and x_food &gt; x_agent)
        or (player.dirny == 1 and y_agent &gt; y_food and x_food &lt; x_agent)
        or (player.dirny == -1 and y_agent &lt; y_food and x_food &gt; x_agent)
    )
    # Code for food which is ahead and right wrt head
    FoodBehindLeft = (
        (player.dirnx == 1 and y_agent &gt; y_food and x_food &lt; x_agent)
        or (player.dirnx == -1 and y_agent &lt; y_food and x_food &gt; x_agent)
        or (player.dirny == 1 and y_agent &gt; y_food and x_food &gt; x_agent)
        or (player.dirny == -1 and y_agent &lt; y_food and x_food &lt; x_agent)
    )
    # Code for food exactly behind
    FoodBehind = (
        (player.dirnx == 1 and y_agent == y_food and x_food &lt; x_agent)
        or (player.dirnx == -1 and y_agent == y_food and x_food &gt; x_agent)
        or (player.dirny == 1 and y_agent &gt; y_food and x_food == x_agent)
        or (player.dirny == -1 and y_agent &lt; y_food and x_food == x_agent)
    )
    # Code for food left
    FoodLeft = (
        (player.dirnx == 1 and y_agent &gt; y_food and x_food == x_agent)
        or (player.dirnx == -1 and y_agent &lt; y_food and x_food == x_agent)
        or (player.dirny == 1 and y_agent == y_food and x_food &gt; x_agent)
        or (player.dirny == -1 and y_agent == y_food and x_food &lt; x_agent)
    )
    # Code for food right
    FoodRight = (
        (player.dirnx == 1 and y_agent &lt; y_food and x_food == x_agent)
        or (player.dirnx == -1 and y_agent &gt; y_food and x_food == x_agent)
        or (player.dirny == 1 and y_agent == y_food and x_food &lt; x_agent)
        or (player.dirny == -1 and y_agent == y_food and x_food &gt; x_agent)
    )
    # Adding to states list while priortizing danger over eating food
    states = [int(x) for x in (DangerAhead, DangerLeft, DangerRight)]
    if sum(states) == 0:
        states += [int(x) for x in (
            FoodStraightAhead, FoodAheadRight, FoodAheadLeft, FoodBehindRight,
            FoodBehindLeft, FoodBehind, FoodLeft, FoodRight
        )]
    else:
        states += [0] * 8
    state = 0
    for i in range(11):
        if states[i] == 1:
            state = i
    return state

def euler_dist(x1: int, y1: int, x2: int, y2: int) -&gt; int:
    """
    For calculation of Euler Distance.
    """
    dist = (x1 - x2) ** 2 + (y1 - y2) ** 2
    return dist ** 0.5

# Reward conditions:
#   * +10 for eating food
#   * -12 for dying
#   * -2 for getting closer
#   * -7 for going away from the fruit


def reward(player: object, x_food: int, y_food: int) -&gt; int:
    """
    This function assigns the reward to the agent according to the action taken
    """
    x_agent, y_agent = player.body[0].pos
    if x_agent == x_food and y_agent == y_food:
        return +10
    elif (
        (player.dirnx == -1 and x_agent + player.delx &lt;= 0)
        or (
            player.dirnx == -1
            and ((x_agent + player.delx, y_agent + player.dely) in player.body)
        )
        or (player.dirnx == 1 and x_agent + player.delx &gt;= 14)
        or (
            player.dirnx == 1
            and ((x_agent + player.dirnx, y_agent + player.dely) in player.body)
        )
        or (player.dirny == 1 and y_agent + player.dely &gt;= 14)
        or (
            player.dirny == 1
            and ((x_agent + player.delx, y_agent + player.dely) in player.body)
        )
        or (player.dirny == -1 and y_agent + player.dely &lt;= 0)
        or (
            player.dirny == -1
            and ((x_agent + player.delx, y_agent + player.dely) in player.body)
        )
        or (player.resetDone is True)
    ):
        return -12
    elif (
        euler_dist(x_agent + player.delx, y_agent + player.dely, x_food, y_food)
        - euler_dist(x_agent, y_agent, x_food, y_food)
        &gt; 0
    ):
        return -2
    elif (
        euler_dist(x_agent + player.delx, y_agent + player.dely, x_food, y_food)
        - euler_dist(x_agent, y_agent, x_food, y_food)
        &lt; 0
    ):
        return -7

def learn(
        state: int, action: int, reward: int, next_state: int,
        next_action: int, i: int, trialNumber: int, epsilon: float) -&gt; type(None):
    """
    This function is for iteratively updating the Qtable.
    """
    currentQ = Q_table[state][action]
    nextQ = Q_table[next_state][next_action]
    # Qlearning Algorithm to get new q value for the q table.
    newQ = (1 - LEARNING_RATE) * currentQ + LEARNING_RATE * (reward + GAMMA * nextQ)
    Q_table[state][action] = newQ
    state = next_state
    currentQ = nextQ
    if trialNumber % 100 == 0:
        print("Printing Q_table: ")
        print(Q_table)

About us

We are a group of programmers helping each other build new things, whether it be writing complex encryption programs, or simple ciphers. Our goal is to work together to document and model beautiful, helpful and interesting algorithms using code. We are an open-source community - anyone can contribute. We check each other's work, communicate and collaborate to solve problems. We strive to be welcoming, respectful, yet make sure that our code follows the latest programming guidelines.