From 9109281774d3fb0796f6b8d053774ec4d3ec3ef0 Mon Sep 17 00:00:00 2001
From: nkorra <naveen6040@gmail.com>
Date: Mon, 28 Mar 2022 21:17:22 +0200
Subject: [PATCH 1/2] changes updated

---
 Network_Routing_Env_update.ipynb | 621 +++++++++++++++++++++++++++++++
 1 file changed, 621 insertions(+)
 create mode 100644 Network_Routing_Env_update.ipynb

diff --git a/Network_Routing_Env_update.ipynb b/Network_Routing_Env_update.ipynb
new file mode 100644
index 0000000..bade1d5
--- /dev/null
+++ b/Network_Routing_Env_update.ipynb
@@ -0,0 +1,621 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bc34563a",
+   "metadata": {},
+   "source": [
+    "## Environment with all funcions and hot encoding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 305,
+   "id": "b508c735-59ea-4955-b110-f5c10fd88408",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 377,
+   "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class NetworkEnv():\n",
+    " # metadata = {'render.modes': ['human']}\n",
+    "\n",
+    "    def __init__(self):\n",
+    "        super(NetworkEnv, self).__init__()\n",
+    "        self.Node_1Q = np.zeros(10).T\n",
+    "        self.Node_2Q = np.zeros(10).T\n",
+    "        self.Node_3Q = np.zeros(10).T\n",
+    "        self.Node_4Q = np.zeros(10).T\n",
+    "        self.Node_5Q = np.zeros(10).T\n",
+    "        self.state_obseravation = np.zeros([10,5])\n",
+    "        self.action = np.zeros(5)\n",
+    "        self.N1_max_actions = 2\n",
+    "        self.N2_max_actions = 3\n",
+    "        self.N3_max_actions = 3\n",
+    "        self.N4_max_actions = 3\n",
+    "        self.N5_max_actions = 3\n",
+    "        self.done = False\n",
+    "        self.episode_length = 0\n",
+    "\n",
+    "    def reset(self,df):\n",
+    "        # Reset the state of the environment to an initial state\n",
+    "        self.Node_1Q = np.array(df.loc[:,\"N1Q\"])\n",
+    "        self.Node_2Q = np.array(df.loc[:,\"N2Q\"])\n",
+    "        self.Node_3Q = np.array(df.loc[:,\"N3Q\"])\n",
+    "        self.Node_4Q = np.array(df.loc[:,\"N4Q\"])\n",
+    "        self.Node_5Q = np.array(df.loc[:,\"N5Q\"])\n",
+    "        self.state_observation = np.array([self.Node_1Q, self.Node_2Q, self.Node_3Q, self.Node_4Q, self.Node_5Q]).T\n",
+    "        self.reward=0\n",
+    "        self.done = False\n",
+    "    \n",
+    "    def random_action(self):\n",
+    "        raction_1 = np.random.randint(self.N1_max_actions + 1)\n",
+    "        raction_2 = np.random.randint(self.N2_max_actions + 1)\n",
+    "        raction_3 = np.random.randint(self.N3_max_actions + 1)\n",
+    "        raction_4 = np.random.randint(self.N4_max_actions + 1)\n",
+    "        raction_5 = np.random.randint(self.N5_max_actions + 1)\n",
+    "        random_action = np.array([raction_1, raction_2, raction_3, raction_4, raction_5])\n",
+    "\n",
+    "        return random_action\n",
+    "    \n",
+    "\n",
+    "\n",
+    "    def step(self, action):\n",
+    "        # Execute one time step within the environment\n",
+    "        if self.state_observation == np.zeros([5,10]):\n",
+    "            self.done = True\n",
+    "            return self.state_observation, self.reward, self.done, self.episode_length\n",
+    "        elif self.episode_length > 200:\n",
+    "            self.done = True\n",
+    "            return self.state_observation, self.reward, self.done, self.episode_length\n",
+    "\n",
+    "        self.action = action\n",
+    "        self.state_observation, self.reward = self.take_action()\n",
+    "        self.episode_length += 1\n",
+    "        if(self.episode_length >= 200):\n",
+    "            self.done = True\n",
+    "\n",
+    "        return self.state_observation, self.reward, self.done, self.episode_length\n",
+    "        \n",
+    "    def random_step(self):\n",
+    "        self.action = self.random_action()\n",
+    "        print(self.action)\n",
+    "        self.state_observation, self.reward = self.take_action()\n",
+    "        return self.state_observation, self.reward, self.done #self.episode_length\n",
+    "        \n",
+    "\n",
+    "\n",
+    "    def qcontrol(self, l, size, filler):    ## Functon used after each timestep to keep a constant queuee size \n",
+    "        length = len(l)\n",
+    "        if length>size:\n",
+    "            return l[:size]\n",
+    "        elif length<size:\n",
+    "            for i in range(0,size-length):\n",
+    "                l.append(filler)\n",
+    "                return l\n",
+    "        else:\n",
+    "            return l\n",
+    "\n",
+    "    def take_action(self):\n",
+    "        action = self.action\n",
+    "        q=self.state_observation.tolist()\n",
+    "        tmp = q.pop(0)        \n",
+    "\n",
+    "        #Actions (0: No routing for all)\n",
+    "        #N1, 1:route to N2, 2: route to N5\n",
+    "        #N2, 1:route to N4, 2: route to N3, 3: route to N1\n",
+    "        #N3, 1:route to N2, 2: route to N4, 3: route to N5\n",
+    "        #N4, 1:route to N5, 2: route to N3, 3: route to N2\n",
+    "        #N5, 1:route to N1, 2: route to N3, 3: route to N4\n",
+    "        q1 = self.Node_1Q[1:].tolist()\n",
+    "        q2 = self.Node_2Q[1:].tolist()\n",
+    "        q3 = self.Node_3Q[1:].tolist()      ## Seperating into individual queues for manipulation since each one will\n",
+    "        q4 = self.Node_4Q[1:].tolist()      ## receive different quantity\n",
+    "        q5 = self.Node_5Q[1:].tolist()\n",
+    "        count = 0                         ## counter managing the multidiscrete action space\n",
+    "        reward = 0                        ## initializng reward for current episode\n",
+    "        for i in action:\n",
+    "            packet = tmp[count]\n",
+    "            if packet != 0:\n",
+    "                if count==0:\n",
+    "                    if i == 0:\n",
+    "                        q1.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=2:\n",
+    "                            q2.insert(0,packet)         ## Managing routing algorithm for each qeueu starting by q1 and \n",
+    "                            reward += -1                ## along with each corresponding action, if the packet is routed \n",
+    "                        else:                           ## to its destination it disapears from our env and we get a +10 reward\n",
+    "                            reward += 10                ## if not it is added to the top of the next queue with a reward of -1                \n",
+    "                    elif i==2:\n",
+    "                        if packet!=5:\n",
+    "                            q5.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10                 ## if statement are determined by the available actions based on \n",
+    "                    count+=1                             ## network topology and encoded actions\n",
+    "                elif count==1:\n",
+    "                    if i == 0:\n",
+    "                        q2.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=4:\n",
+    "                            q4.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10  \n",
+    "                    elif i==2:\n",
+    "                        if packet!=3:\n",
+    "                            q3.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=1:\n",
+    "                            q1.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "                elif count==2:\n",
+    "                    if i == 0:\n",
+    "                        q3.insert(0,packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=2:\n",
+    "                            q2.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==2:\n",
+    "                        if packet!=4:\n",
+    "                            q4.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=5:\n",
+    "                            q5.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "                elif count==3:\n",
+    "                    if i == 0:\n",
+    "                        q4.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "\n",
+    "                    elif i==1:\n",
+    "                        if packet!=5:\n",
+    "                            q5.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==2:\n",
+    "                        if packet!=3:            \n",
+    "                            q3.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=2:\n",
+    "                            q2.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "                elif count==4:\n",
+    "                    if i == 0:\n",
+    "                        q5.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=1:\n",
+    "                            q1.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==2:\n",
+    "                        if packet!=3:\n",
+    "                            q3.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=4:\n",
+    "                            q4.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "            else:\n",
+    "                globals()[tt] = \"q\"+ str(count+1)\n",
+    "                tt.insert(0,packet)\n",
+    "\n",
+    "                if i == 0:\n",
+    "                    reward += 10\n",
+    "                else :\n",
+    "                    reward += -10\n",
+    "\n",
+    "                count+=1\n",
+    "       \n",
+    "        q1=self.qcontrol(q1,10,0)                     ## qcontrol is called for each individual to ensure a constant queue size\n",
+    "        q2=self.qcontrol(q2,10,0)                     ## according to our observation space definition\n",
+    "        q3=self.qcontrol(q3,10,0)\n",
+    "        q4=self.qcontrol(q4,10,0)\n",
+    "        q5=self.qcontrol(q5,10,0)\n",
+    "\n",
+    "        new_state = [q1,q2,q3,q4,q5]\n",
+    "        \n",
+    "        \n",
+    "    \n",
+    "#         new_state_pandas = pd.DataFrame(new_state, columns = [\"NQ1\",\"NQ2\",\"NQ3\",\"NQ4\",\"NQ%\"])\n",
+    "#         return new_state_pandas, reward, ns\n",
+    "        ## reassembling o\n",
+    "        return new_state, reward\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 378,
+   "id": "487c1854",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "    \n",
+    "\n",
+    "def preprocess(new_state):\n",
+    "\n",
+    "    q1 = new_state[:,0]\n",
+    "    q2 = new_state[:,1]\n",
+    "    q3 = new_state[:,2]\n",
+    "    q4 = new_state[:,3]\n",
+    "    q5 = new_state[:,4]\n",
+    "\n",
+    "\n",
+    "    q1 =np.array(q1)\n",
+    "    ohq1 = np.zeros((q1.shape[0], q1.shape[0]+1))\n",
+    "    ohq1[np.arange(q1.size),q1]=1\n",
+    "#     print(ohq1)\n",
+    "\n",
+    "    q2 =np.array(q2)\n",
+    "    ohq2 = np.zeros((q2.shape[0], q2.shape[0]+1))\n",
+    "    ohq2[np.arange(q2.size),q2]=1\n",
+    "#     print(ohq2)\n",
+    "\n",
+    "\n",
+    "\n",
+    "    q3 =np.array(q3)\n",
+    "    ohq3 = np.zeros((q3.shape[0], q3.shape[0]+1))\n",
+    "    ohq3[np.arange(q3.size),q3]=1\n",
+    "\n",
+    "#     print(ohq3)\n",
+    "\n",
+    "    q4 =np.array(q4)\n",
+    "    ohq4 = np.zeros((q4.shape[0], q4.shape[0]+1))\n",
+    "    ohq4[np.arange(q4.size),q4]=1\n",
+    "\n",
+    "#     print(ohq4)\n",
+    "\n",
+    "    q5 =np.array(q5)\n",
+    "    ohq5 = np.zeros((q5.shape[0], q5.shape[0]+1))\n",
+    "    ohq5[np.arange(q5.size),q5]=1\n",
+    "\n",
+    "    ns=np.dstack((ohq1,ohq2,ohq3,ohq4,ohq5))\n",
+    "\n",
+    "    return ns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 379,
+   "id": "352e8f67",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[[0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 1., 0., 0.],\n",
+       "        [0., 0., 0., 1., 0.],\n",
+       "        [1., 0., 0., 0., 1.],\n",
+       "        [0., 1., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 1., 0., 1.],\n",
+       "        [1., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 1., 0., 1., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]]])"
+      ]
+     },
+     "execution_count": 379,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ns = test_env.state_observation\n",
+    "preprocess(ns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 380,
+   "id": "9fcafdb0-96bb-4d2f-8a81-3cb28df4eabc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DF \n",
+      "    N1Q  N2Q  N3Q  N4Q  N5Q\n",
+      "0    3    4    1    2    3\n",
+      "1    2    5    1    5    1\n",
+      "2    0    0    0    0    0\n",
+      "3    0    0    0    0    0\n",
+      "4    0    0    0    0    0\n",
+      "5    0    0    0    0    0\n",
+      "6    0    0    0    0    0\n",
+      "7    0    0    0    0    0\n",
+      "8    0    0    0    0    0\n",
+      "9    0    0    0    0    0\n",
+      "data \n",
+      " [[3 4 1 2 3]\n",
+      " [2 5 1 5 1]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]]\n",
+      "[2, 1, 2, 3, 3]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = np.array([[3,4,1,2,3],[2,5,1,5,1],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]])\n",
+    "action = [2,1,2,3,3]\n",
+    "df = pd.DataFrame(data, columns=[\"N1Q\",\"N2Q\",\"N3Q\",\"N4Q\",\"N5Q\"])\n",
+    "print(\"DF \\n\", df)\n",
+    "print(\"data \\n\",data)\n",
+    "print(action)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 343,
+   "id": "c08bf717-dec6-495d-8bad-3c63edffaaf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_env = NetworkEnv()\n",
+    "NetworkEnv.reset(test_env,df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 344,
+   "id": "2f3be0d7-7fce-4bdf-8774-f5bb83de019d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[3 4 1 2 3]\n",
+      " [2 5 1 5 1]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(test_env.state_observation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 330,
+   "id": "7bb46f56-0b20-4bef-a24b-9d1242b2a960",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 2 2 2 3]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "([[2, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [3, 5, 0, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [2, 4, 1, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [3, 1, 5, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]],\n",
+       " -5,\n",
+       " False)"
+      ]
+     },
+     "execution_count": 330,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "NetworkEnv.random_step(test_env)\n",
+    "        #Actions (0: No routing for all)\n",
+    "        #N1, 1:route to N2, 2: route to N5\n",
+    "        #N2, 1:route to N4, 2: route to N3, 3: route to N1\n",
+    "        #N3, 1:route to N2, 2: route to N4, 3: route to N5\n",
+    "        #N4, 1:route to N5, 2: route to N3, 3: route to N2\n",
+    "        #N5, 1:route to N1, 2: route to N3, 3: route to N4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c21c01d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "126b48af",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 707ef5da45018247207446cb6999cb5593e9a574 Mon Sep 17 00:00:00 2001
From: nkorra <naveen6040@gmail.com>
Date: Mon, 28 Mar 2022 21:28:01 +0200
Subject: [PATCH 2/2] changes to update

---
 .../Network_Routing_Env_1-checkpoint.ipynb    | 466 +++++++++++++
 ...etwork_Routing_Env_update-checkpoint.ipynb | 621 ++++++++++++++++++
 .../RL_Network_Routing_agent-checkpoint.ipynb | 171 +++++
 .ipynb_checkpoints/Untitled-checkpoint.ipynb  |   6 +
 NetRouting                                    |   1 +
 Network_Routing_Env_1.ipynb                   | 153 ++++-
 Network_Routing_Env_1.py                      | 301 +++++++++
 RL_Network_Routing_agent.ipynb                | 171 +++++
 Untitled.ipynb                                |  33 +
 9 files changed, 1892 insertions(+), 31 deletions(-)
 create mode 100644 .ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb
 create mode 100644 .ipynb_checkpoints/Network_Routing_Env_update-checkpoint.ipynb
 create mode 100644 .ipynb_checkpoints/RL_Network_Routing_agent-checkpoint.ipynb
 create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb
 create mode 160000 NetRouting
 create mode 100644 Network_Routing_Env_1.py
 create mode 100644 RL_Network_Routing_agent.ipynb
 create mode 100644 Untitled.ipynb

diff --git a/.ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb b/.ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb
new file mode 100644
index 0000000..a1afe49
--- /dev/null
+++ b/.ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb
@@ -0,0 +1,466 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "323b62d7",
+   "metadata": {},
+   "source": [
+    "### This is updated file with hot encoding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b508c735-59ea-4955-b110-f5c10fd88408",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gym\n",
+    "from gym import spaces\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "import os, sys"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class WanEnv(gym.Env):\n",
+    "  \"\"\"Custom Environment that follows gym interface\"\"\"\n",
+    "  metadata = {'render.modes': ['human']}\n",
+    "\n",
+    "  def __init__(self, df):\n",
+    "    super(WanEnv, self).__init__()\n",
+    "    # Define action and observation space\n",
+    "    # They must be gym.spaces objects\n",
+    "    # Example when using discrete actions:\n",
+    "    self.action_space = spaces.MultiDiscrete([2,3,3,3,3])\n",
+    "    # Example for using image as input:\n",
+    "    self.observation_space = spaces.dict({\n",
+    "                                         'N1Q' : MultiDiscrete[5,5,5,5,5],\n",
+    "                                         'N2Q' : MultiDiscrete[5,5,5,5,5],\n",
+    "                                         'N3Q' : MultiDiscrete[5,5,5,5,5],\n",
+    "                                         'N4Q' : MultiDiscrete[5,5,5,5,5],\n",
+    "                                         'N5Q' : MultiDiscrete[5,5,5,5,5]}\n",
+    "                                        )\n",
+    "    #(low=0, high=255, shape=\n",
+    "     #               (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)\n",
+    "\n",
+    "  def step(self, action):\n",
+    "    # Execute one time step within the environment\n",
+    "        self.take_action(action)\n",
+    "        reward, ob = self.take_action(action)\n",
+    "        return ob, reward\n",
+    "    \n",
+    "  def take_action(self, action):\n",
+    "        self.episode_over = self.backend.switch_link(action)\n",
+    "                \n",
+    "        self.ticks += 1\n",
+    "        tmp = []\n",
+    "\n",
+    "        # check if episode ended by ERROR, then mark it in 'info'\n",
+    "        if self.episode_over:\n",
+    "            logging.info ('Episode ended by ERROR')\n",
+    "            self.info['exit_status'] = 'ERROR'\n",
+    "\n",
+    "        # else Stop if max ticks over\n",
+    "        elif self.ticks == self.MAX_TICKS:\n",
+    "            logging.info ('Max ticks over, ending episode')\n",
+    "            self.episode_over = True\n",
+    "            self.info['exit_status'] = 'NORMAL'\n",
+    "    \n",
+    "  def reset(self):\n",
+    "    # Reset the state of the environment to an initial state\n",
+    "    self.N1Q = df.loc[:,[\"N1Q\"]]\n",
+    "    self.N2Q = df.loc[:,[\"N2Q\"]]\n",
+    "    self.N3Q = df.loc[:,[\"N3Q\"]]\n",
+    "    self.N4Q = df.loc[:,[\"N4Q\"]]\n",
+    "    self.N5Q = df.loc[:,[\"N5Q\"]]\n",
+    "    self.reward=0\n",
+    "    self.done = False\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "9fcafdb0-96bb-4d2f-8a81-3cb28df4eabc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   N1Q  N2Q  N3Q  N4Q  N5Q\n",
+      "0    4    4    1    2    3\n",
+      "1    2    5    1    3    1\n",
+      "2    0    0    0    0    4\n",
+      "3    0    0    0    0    0\n",
+      "4    0    0    0    0    5\n",
+      "[[4 4 1 2 3]\n",
+      " [2 5 1 3 1]\n",
+      " [0 0 0 0 4]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 5]]\n",
+      "[2, 1, 0, 3, 0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = np.array([[4,4,1,2,3],[2,5,1,3,1],[0,0,0,0,4],[0,0,0,0,0],[0,0,0,0,5]])\n",
+    "action = [2,1,0,3,0]\n",
+    "df = pd.DataFrame(data, columns=[\"N1Q\",\"N2Q\",\"N3Q\",\"N4Q\",\"N5Q\"])\n",
+    "print(df)\n",
+    "print(data)\n",
+    "print(action)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "11906017-26b6-4518-a68e-b4ca28be98fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def qcontrol(l, size, filler):    ## Functon used after each timestep to keep a constant queuee size \n",
+    "    length = len(l)\n",
+    "    if length>size:\n",
+    "        return l[:5]\n",
+    "    elif length<size:\n",
+    "        for i in range(0,size-length):\n",
+    "            l.append(filler)\n",
+    "            return l\n",
+    "    else:\n",
+    "        return l"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "365940b7-97cb-48bd-9d11-df414159a0f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def take_action(df,action):\n",
+    "\n",
+    "    print(df)\n",
+    "    data = df.to_numpy()\n",
+    "    q=data.tolist()\n",
+    "    tmp = q.pop(0)\n",
+    "\n",
+    "    #Actions (0: No routing for all)\n",
+    "    #N1, 1:route to N2, 2: route to N5\n",
+    "    #N2, 1:route to N4, 2: route to N3, 3: route to N1\n",
+    "    #N3, 1:route to N2, 2: route to N4, 3: route to N5\n",
+    "    #N4, 1:route to N5, 2: route to N3, 3: route to N2\n",
+    "    #N5, 1:route to N1, 2: route to N3, 3: route to N4\n",
+    "    q1 = data[:,0].tolist()[1:]   \n",
+    "    q2 = data[:,1].tolist()[1:]\n",
+    "    q3 = data[:,2].tolist()[1:]      ## Seperating into individual queues for manipulation since each one will\n",
+    "    q4 = data[:,3].tolist()[1:]      ## receive different quantity\n",
+    "    q5 = data[:,4].tolist()[1:]\n",
+    "    count = 0                         ## counter managing the multidiscrete action space\n",
+    "    reward = 0                        ## initializng reward for current episode\n",
+    "    for i in action:\n",
+    "        packet = tmp[count]\n",
+    "        if count==0:\n",
+    "            if i == 0:\n",
+    "                q1.insert(0, packet)\n",
+    "            elif i==1:\n",
+    "                if packet!=2:\n",
+    "                    q2.insert(0,packet)         ## Managing routing algorithm for each qeueu starting by q1 and \n",
+    "                    reward += -1                ## along with each corresponding action, if the packet is routed \n",
+    "                else:                           ## to its destination it disapears from our env and we get a +10 reward\n",
+    "                    reward += 10                ## if not it is added to the top of the next queue with a reward of -1                \n",
+    "            elif i==2:\n",
+    "                if packet!=5:\n",
+    "                    q5.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10                 ## if statement are determined by the available actions based on \n",
+    "            count+=1                             ## network topology and encoded actions\n",
+    "        elif count==1:\n",
+    "            if i == 0:\n",
+    "                q2.insert(0, packet)\n",
+    "            elif i==1:\n",
+    "                if packet!=4:\n",
+    "                    q4.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10  \n",
+    "            elif i==2:\n",
+    "                if packet!=3:\n",
+    "                    q3.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==3:\n",
+    "                if packet!=1:\n",
+    "                    q1.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            count+=1\n",
+    "        elif count==2:\n",
+    "            if i == 0:\n",
+    "                q3.insert(0,packet)\n",
+    "            elif i==1:\n",
+    "                if packet!=2:\n",
+    "                    q2.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==2:\n",
+    "                if packet!=4:\n",
+    "                    q4.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==3:\n",
+    "                if packet!=5:\n",
+    "                    q5.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            count+=1\n",
+    "        elif count==3:\n",
+    "            if i == 0:\n",
+    "                q4.insert(0, packet)\n",
+    "                \n",
+    "            elif i==1:\n",
+    "                if packet!=5:\n",
+    "                    q5.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==2:\n",
+    "                if packet!=3:            \n",
+    "                    q3.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==3:\n",
+    "                if packet!=2:\n",
+    "                    q2.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            count+=1\n",
+    "        elif count==4:\n",
+    "            if i == 0:\n",
+    "                q5.insert(0, packet)\n",
+    "            elif i==1:\n",
+    "                if packet!=1:\n",
+    "                    q1.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==2:\n",
+    "                if packet!=3:\n",
+    "                    q3.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            elif i==3:\n",
+    "                if packet!=4:\n",
+    "                    q4.insert(0,packet)\n",
+    "                    reward += -1\n",
+    "                else:\n",
+    "                    reward += 10\n",
+    "            count+=1\n",
+    "\n",
+    "\n",
+    "    print(action)\n",
+    "    q1=qcontrol(q1,5,0)                     ## qcontrol is called for each individual to ensure a constant queue size\n",
+    "    q2=qcontrol(q2,5,0)                     ## according to our observation space definition\n",
+    "    q3=qcontrol(q3,5,0)\n",
+    "    q4=qcontrol(q4,5,0)\n",
+    "    q5=qcontrol(q5,5,0)\n",
+    "    new_state = np.array([q1,q2,q3,q4,q5]).T            ## reassembling o\n",
+    "    \n",
+    "    q1 =np.array(q1)\n",
+    "    ohq1 = np.zeros((q1.shape[0], q1.shape[0]+1))\n",
+    "    ohq1[np.arange(q1.size),q1]=1\n",
+    "#     print(ohq1)\n",
+    "\n",
+    "    q2 =np.array(q2)\n",
+    "    ohq2 = np.zeros((q2.shape[0], q2.shape[0]+1))\n",
+    "    ohq2[np.arange(q2.size),q2]=1\n",
+    "#     print(ohq2)\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    q3 =np.array(q3)\n",
+    "    ohq3 = np.zeros((q3.shape[0], q3.shape[0]+1))\n",
+    "    ohq3[np.arange(q3.size),q3]=1\n",
+    "    \n",
+    "#     print(ohq3)\n",
+    "    \n",
+    "    q4 =np.array(q4)\n",
+    "    ohq4 = np.zeros((q4.shape[0], q4.shape[0]+1))\n",
+    "    ohq4[np.arange(q4.size),q4]=1\n",
+    "    \n",
+    "#     print(ohq4)\n",
+    "    \n",
+    "    q5 =np.array(q5)\n",
+    "    ohq5 = np.zeros((q5.shape[0], q5.shape[0]+1))\n",
+    "    ohq5[np.arange(q5.size),q5]=1\n",
+    "    \n",
+    "    ns=np.dstack((ohq1,ohq2,ohq3,ohq4,ohq5))\n",
+    "    \n",
+    "    new_state_pandas = pd.DataFrame(new_state, columns = [\"NQ1\",\"NQ2\",\"NQ3\",\"NQ4\",\"NQ%\"])\n",
+    "    return new_state_pandas, reward, ns\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f88b4ee8-463d-4c9d-8ad7-8d6f9ad5abfa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   N1Q  N2Q  N3Q  N4Q  N5Q\n",
+      "0    4    4    1    2    3\n",
+      "1    2    5    1    3    1\n",
+      "2    0    0    0    0    4\n",
+      "3    0    0    0    0    0\n",
+      "4    0    0    0    0    5\n",
+      "[2, 1, 0, 3, 0]\n",
+      "   N1Q  N2Q  N3Q  N4Q  N5Q\n",
+      "0    4    4    1    2    3\n",
+      "1    2    5    1    3    1\n",
+      "2    0    0    0    0    4\n",
+      "3    0    0    0    0    0\n",
+      "4    0    0    0    0    5\n",
+      "[2, 1, 0, 3, 0]\n",
+      "[[[0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 1. 0. 0.]\n",
+      "  [1. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 1. 1.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 1. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 0. 1. 0.]\n",
+      "  [0. 0. 1. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 1.]\n",
+      "  [0. 0. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 1. 1. 0.]\n",
+      "  [0. 0. 0. 0. 1.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 1. 1. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 1.]\n",
+      "  [0. 0. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 1. 1. 1.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df)\n",
+    "print(action)\n",
+    "new_state, reward, ns = take_action(df,action)\n",
+    "print(ns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9e97ffdf-ab2c-48bd-bf06-adb8f8f878d4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   NQ1  NQ2  NQ3  NQ4  NQ%\n",
+      "0    2    5    1    3    3\n",
+      "1    0    0    1    0    4\n",
+      "2    0    0    0    0    1\n",
+      "3    0    0    0    0    4\n",
+      "4    0    0    0    0    0\n",
+      "19\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(new_state)\n",
+    "print(reward)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c3237310",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0. 0. 0. 0. 0.]\n",
+      " [0. 0. 1. 0. 0.]\n",
+      " [1. 0. 0. 0. 0.]\n",
+      " [0. 0. 0. 1. 1.]\n",
+      " [0. 0. 0. 0. 0.]\n",
+      " [0. 1. 0. 0. 0.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(ns[0,:,:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "041749aa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/Network_Routing_Env_update-checkpoint.ipynb b/.ipynb_checkpoints/Network_Routing_Env_update-checkpoint.ipynb
new file mode 100644
index 0000000..bade1d5
--- /dev/null
+++ b/.ipynb_checkpoints/Network_Routing_Env_update-checkpoint.ipynb
@@ -0,0 +1,621 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bc34563a",
+   "metadata": {},
+   "source": [
+    "## Environment with all funcions and hot encoding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 305,
+   "id": "b508c735-59ea-4955-b110-f5c10fd88408",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 377,
+   "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class NetworkEnv():\n",
+    " # metadata = {'render.modes': ['human']}\n",
+    "\n",
+    "    def __init__(self):\n",
+    "        super(NetworkEnv, self).__init__()\n",
+    "        self.Node_1Q = np.zeros(10).T\n",
+    "        self.Node_2Q = np.zeros(10).T\n",
+    "        self.Node_3Q = np.zeros(10).T\n",
+    "        self.Node_4Q = np.zeros(10).T\n",
+    "        self.Node_5Q = np.zeros(10).T\n",
+    "        self.state_obseravation = np.zeros([10,5])\n",
+    "        self.action = np.zeros(5)\n",
+    "        self.N1_max_actions = 2\n",
+    "        self.N2_max_actions = 3\n",
+    "        self.N3_max_actions = 3\n",
+    "        self.N4_max_actions = 3\n",
+    "        self.N5_max_actions = 3\n",
+    "        self.done = False\n",
+    "        self.episode_length = 0\n",
+    "\n",
+    "    def reset(self,df):\n",
+    "        # Reset the state of the environment to an initial state\n",
+    "        self.Node_1Q = np.array(df.loc[:,\"N1Q\"])\n",
+    "        self.Node_2Q = np.array(df.loc[:,\"N2Q\"])\n",
+    "        self.Node_3Q = np.array(df.loc[:,\"N3Q\"])\n",
+    "        self.Node_4Q = np.array(df.loc[:,\"N4Q\"])\n",
+    "        self.Node_5Q = np.array(df.loc[:,\"N5Q\"])\n",
+    "        self.state_observation = np.array([self.Node_1Q, self.Node_2Q, self.Node_3Q, self.Node_4Q, self.Node_5Q]).T\n",
+    "        self.reward=0\n",
+    "        self.done = False\n",
+    "    \n",
+    "    def random_action(self):\n",
+    "        raction_1 = np.random.randint(self.N1_max_actions + 1)\n",
+    "        raction_2 = np.random.randint(self.N2_max_actions + 1)\n",
+    "        raction_3 = np.random.randint(self.N3_max_actions + 1)\n",
+    "        raction_4 = np.random.randint(self.N4_max_actions + 1)\n",
+    "        raction_5 = np.random.randint(self.N5_max_actions + 1)\n",
+    "        random_action = np.array([raction_1, raction_2, raction_3, raction_4, raction_5])\n",
+    "\n",
+    "        return random_action\n",
+    "    \n",
+    "\n",
+    "\n",
+    "    def step(self, action):\n",
+    "        # Execute one time step within the environment\n",
+    "        if self.state_observation == np.zeros([5,10]):\n",
+    "            self.done = True\n",
+    "            return self.state_observation, self.reward, self.done, self.episode_length\n",
+    "        elif self.episode_length > 200:\n",
+    "            self.done = True\n",
+    "            return self.state_observation, self.reward, self.done, self.episode_length\n",
+    "\n",
+    "        self.action = action\n",
+    "        self.state_observation, self.reward = self.take_action()\n",
+    "        self.episode_length += 1\n",
+    "        if(self.episode_length >= 200):\n",
+    "            self.done = True\n",
+    "\n",
+    "        return self.state_observation, self.reward, self.done, self.episode_length\n",
+    "        \n",
+    "    def random_step(self):\n",
+    "        self.action = self.random_action()\n",
+    "        print(self.action)\n",
+    "        self.state_observation, self.reward = self.take_action()\n",
+    "        return self.state_observation, self.reward, self.done #self.episode_length\n",
+    "        \n",
+    "\n",
+    "\n",
+    "    def qcontrol(self, l, size, filler):    ## Functon used after each timestep to keep a constant queuee size \n",
+    "        length = len(l)\n",
+    "        if length>size:\n",
+    "            return l[:size]\n",
+    "        elif length<size:\n",
+    "            for i in range(0,size-length):\n",
+    "                l.append(filler)\n",
+    "                return l\n",
+    "        else:\n",
+    "            return l\n",
+    "\n",
+    "    def take_action(self):\n",
+    "        action = self.action\n",
+    "        q=self.state_observation.tolist()\n",
+    "        tmp = q.pop(0)        \n",
+    "\n",
+    "        #Actions (0: No routing for all)\n",
+    "        #N1, 1:route to N2, 2: route to N5\n",
+    "        #N2, 1:route to N4, 2: route to N3, 3: route to N1\n",
+    "        #N3, 1:route to N2, 2: route to N4, 3: route to N5\n",
+    "        #N4, 1:route to N5, 2: route to N3, 3: route to N2\n",
+    "        #N5, 1:route to N1, 2: route to N3, 3: route to N4\n",
+    "        q1 = self.Node_1Q[1:].tolist()\n",
+    "        q2 = self.Node_2Q[1:].tolist()\n",
+    "        q3 = self.Node_3Q[1:].tolist()      ## Seperating into individual queues for manipulation since each one will\n",
+    "        q4 = self.Node_4Q[1:].tolist()      ## receive different quantity\n",
+    "        q5 = self.Node_5Q[1:].tolist()\n",
+    "        count = 0                         ## counter managing the multidiscrete action space\n",
+    "        reward = 0                        ## initializng reward for current episode\n",
+    "        for i in action:\n",
+    "            packet = tmp[count]\n",
+    "            if packet != 0:\n",
+    "                if count==0:\n",
+    "                    if i == 0:\n",
+    "                        q1.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=2:\n",
+    "                            q2.insert(0,packet)         ## Managing routing algorithm for each qeueu starting by q1 and \n",
+    "                            reward += -1                ## along with each corresponding action, if the packet is routed \n",
+    "                        else:                           ## to its destination it disapears from our env and we get a +10 reward\n",
+    "                            reward += 10                ## if not it is added to the top of the next queue with a reward of -1                \n",
+    "                    elif i==2:\n",
+    "                        if packet!=5:\n",
+    "                            q5.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10                 ## if statement are determined by the available actions based on \n",
+    "                    count+=1                             ## network topology and encoded actions\n",
+    "                elif count==1:\n",
+    "                    if i == 0:\n",
+    "                        q2.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=4:\n",
+    "                            q4.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10  \n",
+    "                    elif i==2:\n",
+    "                        if packet!=3:\n",
+    "                            q3.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=1:\n",
+    "                            q1.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "                elif count==2:\n",
+    "                    if i == 0:\n",
+    "                        q3.insert(0,packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=2:\n",
+    "                            q2.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==2:\n",
+    "                        if packet!=4:\n",
+    "                            q4.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=5:\n",
+    "                            q5.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "                elif count==3:\n",
+    "                    if i == 0:\n",
+    "                        q4.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "\n",
+    "                    elif i==1:\n",
+    "                        if packet!=5:\n",
+    "                            q5.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==2:\n",
+    "                        if packet!=3:            \n",
+    "                            q3.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=2:\n",
+    "                            q2.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "                elif count==4:\n",
+    "                    if i == 0:\n",
+    "                        q5.insert(0, packet)\n",
+    "                        reward += -10\n",
+    "                    elif i==1:\n",
+    "                        if packet!=1:\n",
+    "                            q1.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==2:\n",
+    "                        if packet!=3:\n",
+    "                            q3.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    elif i==3:\n",
+    "                        if packet!=4:\n",
+    "                            q4.insert(0,packet)\n",
+    "                            reward += -1\n",
+    "                        else:\n",
+    "                            reward += 10\n",
+    "                    count+=1\n",
+    "            else:\n",
+    "                globals()[tt] = \"q\"+ str(count+1)\n",
+    "                tt.insert(0,packet)\n",
+    "\n",
+    "                if i == 0:\n",
+    "                    reward += 10\n",
+    "                else :\n",
+    "                    reward += -10\n",
+    "\n",
+    "                count+=1\n",
+    "       \n",
+    "        q1=self.qcontrol(q1,10,0)                     ## qcontrol is called for each individual to ensure a constant queue size\n",
+    "        q2=self.qcontrol(q2,10,0)                     ## according to our observation space definition\n",
+    "        q3=self.qcontrol(q3,10,0)\n",
+    "        q4=self.qcontrol(q4,10,0)\n",
+    "        q5=self.qcontrol(q5,10,0)\n",
+    "\n",
+    "        new_state = [q1,q2,q3,q4,q5]\n",
+    "        \n",
+    "        \n",
+    "    \n",
+    "#         new_state_pandas = pd.DataFrame(new_state, columns = [\"NQ1\",\"NQ2\",\"NQ3\",\"NQ4\",\"NQ%\"])\n",
+    "#         return new_state_pandas, reward, ns\n",
+    "        ## reassembling o\n",
+    "        return new_state, reward\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 378,
+   "id": "487c1854",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "    \n",
+    "\n",
+    "def preprocess(new_state):\n",
+    "\n",
+    "    q1 = new_state[:,0]\n",
+    "    q2 = new_state[:,1]\n",
+    "    q3 = new_state[:,2]\n",
+    "    q4 = new_state[:,3]\n",
+    "    q5 = new_state[:,4]\n",
+    "\n",
+    "\n",
+    "    q1 =np.array(q1)\n",
+    "    ohq1 = np.zeros((q1.shape[0], q1.shape[0]+1))\n",
+    "    ohq1[np.arange(q1.size),q1]=1\n",
+    "#     print(ohq1)\n",
+    "\n",
+    "    q2 =np.array(q2)\n",
+    "    ohq2 = np.zeros((q2.shape[0], q2.shape[0]+1))\n",
+    "    ohq2[np.arange(q2.size),q2]=1\n",
+    "#     print(ohq2)\n",
+    "\n",
+    "\n",
+    "\n",
+    "    q3 =np.array(q3)\n",
+    "    ohq3 = np.zeros((q3.shape[0], q3.shape[0]+1))\n",
+    "    ohq3[np.arange(q3.size),q3]=1\n",
+    "\n",
+    "#     print(ohq3)\n",
+    "\n",
+    "    q4 =np.array(q4)\n",
+    "    ohq4 = np.zeros((q4.shape[0], q4.shape[0]+1))\n",
+    "    ohq4[np.arange(q4.size),q4]=1\n",
+    "\n",
+    "#     print(ohq4)\n",
+    "\n",
+    "    q5 =np.array(q5)\n",
+    "    ohq5 = np.zeros((q5.shape[0], q5.shape[0]+1))\n",
+    "    ohq5[np.arange(q5.size),q5]=1\n",
+    "\n",
+    "    ns=np.dstack((ohq1,ohq2,ohq3,ohq4,ohq5))\n",
+    "\n",
+    "    return ns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 379,
+   "id": "352e8f67",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[[0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 1., 0., 0.],\n",
+       "        [0., 0., 0., 1., 0.],\n",
+       "        [1., 0., 0., 0., 1.],\n",
+       "        [0., 1., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 1., 0., 1.],\n",
+       "        [1., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 1., 0., 1., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]],\n",
+       "\n",
+       "       [[1., 1., 1., 1., 1.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.],\n",
+       "        [0., 0., 0., 0., 0.]]])"
+      ]
+     },
+     "execution_count": 379,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ns = test_env.state_observation\n",
+    "preprocess(ns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 380,
+   "id": "9fcafdb0-96bb-4d2f-8a81-3cb28df4eabc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DF \n",
+      "    N1Q  N2Q  N3Q  N4Q  N5Q\n",
+      "0    3    4    1    2    3\n",
+      "1    2    5    1    5    1\n",
+      "2    0    0    0    0    0\n",
+      "3    0    0    0    0    0\n",
+      "4    0    0    0    0    0\n",
+      "5    0    0    0    0    0\n",
+      "6    0    0    0    0    0\n",
+      "7    0    0    0    0    0\n",
+      "8    0    0    0    0    0\n",
+      "9    0    0    0    0    0\n",
+      "data \n",
+      " [[3 4 1 2 3]\n",
+      " [2 5 1 5 1]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]]\n",
+      "[2, 1, 2, 3, 3]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = np.array([[3,4,1,2,3],[2,5,1,5,1],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]])\n",
+    "action = [2,1,2,3,3]\n",
+    "df = pd.DataFrame(data, columns=[\"N1Q\",\"N2Q\",\"N3Q\",\"N4Q\",\"N5Q\"])\n",
+    "print(\"DF \\n\", df)\n",
+    "print(\"data \\n\",data)\n",
+    "print(action)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 343,
+   "id": "c08bf717-dec6-495d-8bad-3c63edffaaf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_env = NetworkEnv()\n",
+    "NetworkEnv.reset(test_env,df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 344,
+   "id": "2f3be0d7-7fce-4bdf-8774-f5bb83de019d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[3 4 1 2 3]\n",
+      " [2 5 1 5 1]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]\n",
+      " [0 0 0 0 0]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(test_env.state_observation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 330,
+   "id": "7bb46f56-0b20-4bef-a24b-9d1242b2a960",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 2 2 2 3]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "([[2, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [3, 5, 0, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [2, 4, 1, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [3, 1, 5, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]],\n",
+       " -5,\n",
+       " False)"
+      ]
+     },
+     "execution_count": 330,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "NetworkEnv.random_step(test_env)\n",
+    "        #Actions (0: No routing for all)\n",
+    "        #N1, 1:route to N2, 2: route to N5\n",
+    "        #N2, 1:route to N4, 2: route to N3, 3: route to N1\n",
+    "        #N3, 1:route to N2, 2: route to N4, 3: route to N5\n",
+    "        #N4, 1:route to N5, 2: route to N3, 3: route to N2\n",
+    "        #N5, 1:route to N1, 2: route to N3, 3: route to N4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c21c01d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "126b48af",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/RL_Network_Routing_agent-checkpoint.ipynb b/.ipynb_checkpoints/RL_Network_Routing_agent-checkpoint.ipynb
new file mode 100644
index 0000000..7811972
--- /dev/null
+++ b/.ipynb_checkpoints/RL_Network_Routing_agent-checkpoint.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "53516aaa",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'Network_Routing_Env1'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-4-4f75e88769a3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mNetwork_Routing_Env1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnew_state\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Network_Routing_Env1'"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "# import keras.backend.tensorflow_backend as backend\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten\n",
+    "from keras.callbacks import TensorBoard\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from collections import deque\n",
+    "import tensorflow as tf\n",
+    "from collections import deque\n",
+    "import time\n",
+    "import random\n",
+    "from tqdm import tqdm\n",
+    "import os\n",
+    "import sys\n",
+    "# import PIL import Image\n",
+    "import cv2\n",
+    "import pdb\n",
+    "\n",
+    "from Network_Routing_Env1 import new_state\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1bd0bff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca359cdc",
+   "metadata": {},
+   "source": [
+    "### Createating class Agent for the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e3608684",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class agent_learner:\n",
+    "    \n",
+    "    def __init__(self, df, action, gamma=0.99, stddev=0.2, learning_rate=0.0001):\n",
+    "        #initializing Q nets\n",
+    "        # Q networks\n",
+    "        Q_initializer = tf.keras.initializers.GlorotNormal()\n",
+    "        optimizer_Q = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
+    "        identity_Q = tf.keras.initializers.Identity()\n",
+    "\n",
+    "        input_Q = tf.keras.layers.Input(shape=(df), name=\"state\")\n",
+    "        x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_1\",)(input_Q)\n",
+    "        x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_2\",)(x_Q)\n",
+    "        x_Q = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_3\",)(x_Q)\n",
+    "        x_Q = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_4\",)(x_Q)\n",
+    "\n",
+    "        output_Q = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q)\n",
+    "        self.Q = keras.Model(inputs=input_Q, outputs = output_Q)\n",
+    "        self.Q.compile(optimizer=optimizer_Q, loss=[\"mse\"])\n",
+    "\n",
+    "        #the target Q-network (the prime one)\n",
+    "\n",
+    "        initializer_Q_t = tf.keras.initializers.GlorotNormal()\n",
+    "        optimizer_Q_t = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
+    "        identity_Q_t = tf.keras.initializers.Identity()\n",
+    "\n",
+    "        input_Q_t = tf.keras.layers.Input(shape=(df), name=\"state\")\n",
+    "        x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_1\",)(input_Q_t)\n",
+    "        x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_2\",)(x_Q_t)\n",
+    "        x_Q_t = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_3\",)(x_Q_t)\n",
+    "        x_Q_t = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_4\",)(x_Q_t)\n",
+    "\n",
+    "        output_Q_t = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q_t)\n",
+    "        self.Q_t = keras.Model(inputs=input_Q_t, outputs = output_Q_t)\n",
+    "        self.Q_t.compile(optimizer=optimizer_Q_t, loss=[\"mse\"])\n",
+    "    \n",
+    "        self.gamma = gamma\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    def prepare_learning_materials(self, action, Env):\n",
+    "        '''\n",
+    "        Creating the y vector for learning.\n",
+    "        The y vector should be (but it is not)\n",
+    "        y(s,a) := r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n",
+    "        with  Q_t -- the target Q-function\n",
+    "        \n",
+    "        Instead the output is Q(s) where Q is actually a network which return values of all actions:\n",
+    "        Q(s) returns a vector of size action\n",
+    "        Therefor the returned object is the Q(s) where only its a-th element is modified to\n",
+    "        r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n",
+    "        Keyword arguments:\n",
+    "        action -- a list of action\n",
+    "        Env -- the environment\n",
+    "        returns:\n",
+    "        y vector\n",
+    "        '''\n",
+    "\n",
+    "        debug = False\n",
+    "\n",
+    "        if debug:\n",
+    "            import pdb; pdb.set_trace()\n",
+    "\n",
+    "        nr_samples = len(action)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbe8c06e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000..363fcab
--- /dev/null
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/NetRouting b/NetRouting
new file mode 160000
index 0000000..f28e5cb
--- /dev/null
+++ b/NetRouting
@@ -0,0 +1 @@
+Subproject commit f28e5cb9a6b13f81aacc88e38e8fea0c7c075f7c
diff --git a/Network_Routing_Env_1.ipynb b/Network_Routing_Env_1.ipynb
index 739136d..a1afe49 100644
--- a/Network_Routing_Env_1.ipynb
+++ b/Network_Routing_Env_1.ipynb
@@ -1,8 +1,16 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "323b62d7",
+   "metadata": {},
+   "source": [
+    "### This is updated file with hot encoding"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 1,
    "id": "b508c735-59ea-4955-b110-f5c10fd88408",
    "metadata": {},
    "outputs": [],
@@ -10,12 +18,14 @@
     "import gym\n",
     "from gym import spaces\n",
     "import pandas as pd\n",
-    "import numpy as np"
+    "import numpy as np\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "import os, sys"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 2,
    "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395",
    "metadata": {},
    "outputs": [],
@@ -78,7 +88,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 3,
    "id": "9fcafdb0-96bb-4d2f-8a81-3cb28df4eabc",
    "metadata": {},
    "outputs": [
@@ -88,21 +98,21 @@
      "text": [
       "   N1Q  N2Q  N3Q  N4Q  N5Q\n",
       "0    4    4    1    2    3\n",
-      "1    2    5    1    5    1\n",
-      "2    0    0    0    0    0\n",
+      "1    2    5    1    3    1\n",
+      "2    0    0    0    0    4\n",
       "3    0    0    0    0    0\n",
-      "4    0    0    0    0    0\n",
+      "4    0    0    0    0    5\n",
       "[[4 4 1 2 3]\n",
-      " [2 5 1 5 1]\n",
-      " [0 0 0 0 0]\n",
+      " [2 5 1 3 1]\n",
+      " [0 0 0 0 4]\n",
       " [0 0 0 0 0]\n",
-      " [0 0 0 0 0]]\n",
+      " [0 0 0 0 5]]\n",
       "[2, 1, 0, 3, 0]\n"
      ]
     }
    ],
    "source": [
-    "data = np.array([[4,4,1,2,3],[2,5,1,5,1],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]])\n",
+    "data = np.array([[4,4,1,2,3],[2,5,1,3,1],[0,0,0,0,4],[0,0,0,0,0],[0,0,0,0,5]])\n",
     "action = [2,1,0,3,0]\n",
     "df = pd.DataFrame(data, columns=[\"N1Q\",\"N2Q\",\"N3Q\",\"N4Q\",\"N5Q\"])\n",
     "print(df)\n",
@@ -112,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 4,
    "id": "11906017-26b6-4518-a68e-b4ca28be98fa",
    "metadata": {},
    "outputs": [],
@@ -131,7 +141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 5,
    "id": "365940b7-97cb-48bd-9d11-df414159a0f2",
    "metadata": {},
    "outputs": [],
@@ -272,13 +282,44 @@
     "    q4=qcontrol(q4,5,0)\n",
     "    q5=qcontrol(q5,5,0)\n",
     "    new_state = np.array([q1,q2,q3,q4,q5]).T            ## reassembling o\n",
+    "    \n",
+    "    q1 =np.array(q1)\n",
+    "    ohq1 = np.zeros((q1.shape[0], q1.shape[0]+1))\n",
+    "    ohq1[np.arange(q1.size),q1]=1\n",
+    "#     print(ohq1)\n",
+    "\n",
+    "    q2 =np.array(q2)\n",
+    "    ohq2 = np.zeros((q2.shape[0], q2.shape[0]+1))\n",
+    "    ohq2[np.arange(q2.size),q2]=1\n",
+    "#     print(ohq2)\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    q3 =np.array(q3)\n",
+    "    ohq3 = np.zeros((q3.shape[0], q3.shape[0]+1))\n",
+    "    ohq3[np.arange(q3.size),q3]=1\n",
+    "    \n",
+    "#     print(ohq3)\n",
+    "    \n",
+    "    q4 =np.array(q4)\n",
+    "    ohq4 = np.zeros((q4.shape[0], q4.shape[0]+1))\n",
+    "    ohq4[np.arange(q4.size),q4]=1\n",
+    "    \n",
+    "#     print(ohq4)\n",
+    "    \n",
+    "    q5 =np.array(q5)\n",
+    "    ohq5 = np.zeros((q5.shape[0], q5.shape[0]+1))\n",
+    "    ohq5[np.arange(q5.size),q5]=1\n",
+    "    \n",
+    "    ns=np.dstack((ohq1,ohq2,ohq3,ohq4,ohq5))\n",
+    "    \n",
     "    new_state_pandas = pd.DataFrame(new_state, columns = [\"NQ1\",\"NQ2\",\"NQ3\",\"NQ4\",\"NQ%\"])\n",
-    "    return new_state_pandas, reward\n"
+    "    return new_state_pandas, reward, ns\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 6,
    "id": "f88b4ee8-463d-4c9d-8ad7-8d6f9ad5abfa",
    "metadata": {},
    "outputs": [
@@ -288,30 +329,65 @@
      "text": [
       "   N1Q  N2Q  N3Q  N4Q  N5Q\n",
       "0    4    4    1    2    3\n",
-      "1    2    5    1    5    1\n",
-      "2    0    0    0    0    0\n",
+      "1    2    5    1    3    1\n",
+      "2    0    0    0    0    4\n",
       "3    0    0    0    0    0\n",
-      "4    0    0    0    0    0\n",
+      "4    0    0    0    0    5\n",
       "[2, 1, 0, 3, 0]\n",
       "   N1Q  N2Q  N3Q  N4Q  N5Q\n",
       "0    4    4    1    2    3\n",
-      "1    2    5    1    5    1\n",
-      "2    0    0    0    0    0\n",
+      "1    2    5    1    3    1\n",
+      "2    0    0    0    0    4\n",
       "3    0    0    0    0    0\n",
-      "4    0    0    0    0    0\n",
-      "[2, 1, 0, 3, 0]\n"
+      "4    0    0    0    0    5\n",
+      "[2, 1, 0, 3, 0]\n",
+      "[[[0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 1. 0. 0.]\n",
+      "  [1. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 1. 1.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 1. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 0. 1. 0.]\n",
+      "  [0. 0. 1. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 1.]\n",
+      "  [0. 0. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 1. 1. 0.]\n",
+      "  [0. 0. 0. 0. 1.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 1. 1. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 1.]\n",
+      "  [0. 0. 0. 0. 0.]]\n",
+      "\n",
+      " [[1. 1. 1. 1. 1.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]\n",
+      "  [0. 0. 0. 0. 0.]]]\n"
      ]
     }
    ],
    "source": [
     "print(df)\n",
     "print(action)\n",
-    "new_state, reward = take_action(df,action)"
+    "new_state, reward, ns = take_action(df,action)\n",
+    "print(ns)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": 7,
    "id": "9e97ffdf-ab2c-48bd-bf06-adb8f8f878d4",
    "metadata": {},
    "outputs": [
@@ -320,10 +396,10 @@
      "output_type": "stream",
      "text": [
       "   NQ1  NQ2  NQ3  NQ4  NQ%\n",
-      "0    2    5    1    5    3\n",
+      "0    2    5    1    3    3\n",
       "1    0    0    1    0    4\n",
       "2    0    0    0    0    1\n",
-      "3    0    0    0    0    0\n",
+      "3    0    0    0    0    4\n",
       "4    0    0    0    0    0\n",
       "19\n"
      ]
@@ -336,16 +412,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "e392aa28",
+   "execution_count": 8,
+   "id": "c3237310",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0. 0. 0. 0. 0.]\n",
+      " [0. 0. 1. 0. 0.]\n",
+      " [1. 0. 0. 0. 0.]\n",
+      " [0. 0. 0. 1. 1.]\n",
+      " [0. 0. 0. 0. 0.]\n",
+      " [0. 1. 0. 0. 0.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(ns[0,:,:])"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dd28deae",
+   "id": "041749aa",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/Network_Routing_Env_1.py b/Network_Routing_Env_1.py
new file mode 100644
index 0000000..3dfd537
--- /dev/null
+++ b/Network_Routing_Env_1.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# ### This is updated file with hot encoding
+
+# In[1]:
+
+
+import gym
+from gym import spaces
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import OneHotEncoder
+import os, sys
+
+
+# In[2]:
+
+
+class WanEnv(gym.Env):
+  """Custom Environment that follows gym interface"""
+  metadata = {'render.modes': ['human']}
+
+  def __init__(self, df):
+    super(WanEnv, self).__init__()
+    # Define action and observation space
+    # They must be gym.spaces objects
+    # Example when using discrete actions:
+    self.action_space = spaces.MultiDiscrete([2,3,3,3,3])
+    # Example for using image as input:
+    self.observation_space = spaces.dict({
+                                         'N1Q' : MultiDiscrete[5,5,5,5,5],
+                                         'N2Q' : MultiDiscrete[5,5,5,5,5],
+                                         'N3Q' : MultiDiscrete[5,5,5,5,5],
+                                         'N4Q' : MultiDiscrete[5,5,5,5,5],
+                                         'N5Q' : MultiDiscrete[5,5,5,5,5]}
+                                        )
+    #(low=0, high=255, shape=
+     #               (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)
+
+  def step(self, action):
+    # Execute one time step within the environment
+        self.take_action(action)
+        reward, ob = self.take_action(action)
+        return ob, reward
+    
+  def take_action(self, action):
+        self.episode_over = self.backend.switch_link(action)
+                
+        self.ticks += 1
+        tmp = []
+
+        # check if episode ended by ERROR, then mark it in 'info'
+        if self.episode_over:
+            logging.info ('Episode ended by ERROR')
+            self.info['exit_status'] = 'ERROR'
+
+        # else Stop if max ticks over
+        elif self.ticks == self.MAX_TICKS:
+            logging.info ('Max ticks over, ending episode')
+            self.episode_over = True
+            self.info['exit_status'] = 'NORMAL'
+    
+  def reset(self):
+    # Reset the state of the environment to an initial state
+    self.N1Q = df.loc[:,["N1Q"]]
+    self.N2Q = df.loc[:,["N2Q"]]
+    self.N3Q = df.loc[:,["N3Q"]]
+    self.N4Q = df.loc[:,["N4Q"]]
+    self.N5Q = df.loc[:,["N5Q"]]
+    self.reward=0
+    self.done = False
+
+
+# In[3]:
+
+
+data = np.array([[4,4,1,2,3],[2,5,1,3,1],[0,0,0,0,4],[0,0,0,0,0],[0,0,0,0,5]])
+action = [2,1,0,3,0]
+df = pd.DataFrame(data, columns=["N1Q","N2Q","N3Q","N4Q","N5Q"])
+print(df)
+print(data)
+print(action)
+
+
+# In[4]:
+
+
+def qcontrol(l, size, filler):    ## Functon used after each timestep to keep a constant queuee size 
+    length = len(l)
+    if length>size:
+        return l[:5]
+    elif length<size:
+        for i in range(0,size-length):
+            l.append(filler)
+            return l
+    else:
+        return l
+
+
+# In[5]:
+
+
+def take_action(df,action):
+
+    print(df)
+    data = df.to_numpy()
+    q=data.tolist()
+    tmp = q.pop(0)
+
+    #Actions (0: No routing for all)
+    #N1, 1:route to N2, 2: route to N5
+    #N2, 1:route to N4, 2: route to N3, 3: route to N1
+    #N3, 1:route to N2, 2: route to N4, 3: route to N5
+    #N4, 1:route to N5, 2: route to N3, 3: route to N2
+    #N5, 1:route to N1, 2: route to N3, 3: route to N4
+    q1 = data[:,0].tolist()[1:]   
+    q2 = data[:,1].tolist()[1:]
+    q3 = data[:,2].tolist()[1:]      ## Seperating into individual queues for manipulation since each one will
+    q4 = data[:,3].tolist()[1:]      ## receive different quantity
+    q5 = data[:,4].tolist()[1:]
+    count = 0                         ## counter managing the multidiscrete action space
+    reward = 0                        ## initializng reward for current episode
+    for i in action:
+        packet = tmp[count]
+        if count==0:
+            if i == 0:
+                q1.insert(0, packet)
+            elif i==1:
+                if packet!=2:
+                    q2.insert(0,packet)         ## Managing routing algorithm for each qeueu starting by q1 and 
+                    reward += -1                ## along with each corresponding action, if the packet is routed 
+                else:                           ## to its destination it disapears from our env and we get a +10 reward
+                    reward += 10                ## if not it is added to the top of the next queue with a reward of -1                
+            elif i==2:
+                if packet!=5:
+                    q5.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10                 ## if statement are determined by the available actions based on 
+            count+=1                             ## network topology and encoded actions
+        elif count==1:
+            if i == 0:
+                q2.insert(0, packet)
+            elif i==1:
+                if packet!=4:
+                    q4.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10  
+            elif i==2:
+                if packet!=3:
+                    q3.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==3:
+                if packet!=1:
+                    q1.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            count+=1
+        elif count==2:
+            if i == 0:
+                q3.insert(0,packet)
+            elif i==1:
+                if packet!=2:
+                    q2.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==2:
+                if packet!=4:
+                    q4.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==3:
+                if packet!=5:
+                    q5.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            count+=1
+        elif count==3:
+            if i == 0:
+                q4.insert(0, packet)
+                
+            elif i==1:
+                if packet!=5:
+                    q5.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==2:
+                if packet!=3:            
+                    q3.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==3:
+                if packet!=2:
+                    q2.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            count+=1
+        elif count==4:
+            if i == 0:
+                q5.insert(0, packet)
+            elif i==1:
+                if packet!=1:
+                    q1.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==2:
+                if packet!=3:
+                    q3.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            elif i==3:
+                if packet!=4:
+                    q4.insert(0,packet)
+                    reward += -1
+                else:
+                    reward += 10
+            count+=1
+
+
+    print(action)
+    q1=qcontrol(q1,5,0)                     ## qcontrol is called for each individual to ensure a constant queue size
+    q2=qcontrol(q2,5,0)                     ## according to our observation space definition
+    q3=qcontrol(q3,5,0)
+    q4=qcontrol(q4,5,0)
+    q5=qcontrol(q5,5,0)
+    new_state = np.array([q1,q2,q3,q4,q5]).T            ## reassembling o
+    
+    q1 =np.array(q1)
+    ohq1 = np.zeros((q1.shape[0], q1.shape[0]+1))
+    ohq1[np.arange(q1.size),q1]=1
+#     print(ohq1)
+
+    q2 =np.array(q2)
+    ohq2 = np.zeros((q2.shape[0], q2.shape[0]+1))
+    ohq2[np.arange(q2.size),q2]=1
+#     print(ohq2)
+
+
+    
+    q3 =np.array(q3)
+    ohq3 = np.zeros((q3.shape[0], q3.shape[0]+1))
+    ohq3[np.arange(q3.size),q3]=1
+    
+#     print(ohq3)
+    
+    q4 =np.array(q4)
+    ohq4 = np.zeros((q4.shape[0], q4.shape[0]+1))
+    ohq4[np.arange(q4.size),q4]=1
+    
+#     print(ohq4)
+    
+    q5 =np.array(q5)
+    ohq5 = np.zeros((q5.shape[0], q5.shape[0]+1))
+    ohq5[np.arange(q5.size),q5]=1
+    
+    ns=np.dstack((ohq1,ohq2,ohq3,ohq4,ohq5))
+    
+    new_state_pandas = pd.DataFrame(new_state, columns = ["NQ1","NQ2","NQ3","NQ4","NQ%"])
+    return new_state_pandas, reward, ns
+
+
+# In[6]:
+
+
+print(df)
+print(action)
+new_state, reward, ns = take_action(df,action)
+print(ns)
+
+
+# In[7]:
+
+
+print(new_state)
+print(reward)
+
+
+# In[8]:
+
+
+print(ns[0,:,:])
+
+
+# In[ ]:
+
+
+
+
diff --git a/RL_Network_Routing_agent.ipynb b/RL_Network_Routing_agent.ipynb
new file mode 100644
index 0000000..5468bfd
--- /dev/null
+++ b/RL_Network_Routing_agent.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "53516aaa",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'Network_Routing_Env1'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-9-b05dc7cf19db>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mNetwork_Routing_Env1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Network_Routing_Env1'"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "# import keras.backend.tensorflow_backend as backend\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten\n",
+    "from keras.callbacks import TensorBoard\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from collections import deque\n",
+    "import tensorflow as tf\n",
+    "from collections import deque\n",
+    "import time\n",
+    "import random\n",
+    "from tqdm import tqdm\n",
+    "import os\n",
+    "import sys\n",
+    "# import PIL import Image\n",
+    "import cv2\n",
+    "import pdb\n",
+    "\n",
+    "from Network_Routing_Env1 import *  \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1bd0bff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca359cdc",
+   "metadata": {},
+   "source": [
+    "### Createating class Agent for the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e3608684",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class agent_learner:\n",
+    "    \n",
+    "    def __init__(self, df, action, gamma=0.99, stddev=0.2, learning_rate=0.0001):\n",
+    "        #initializing Q nets\n",
+    "        # Q networks\n",
+    "        Q_initializer = tf.keras.initializers.GlorotNormal()\n",
+    "        optimizer_Q = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
+    "        identity_Q = tf.keras.initializers.Identity()\n",
+    "\n",
+    "        input_Q = tf.keras.layers.Input(shape=(df), name=\"state\")\n",
+    "        x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_1\",)(input_Q)\n",
+    "        x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_2\",)(x_Q)\n",
+    "        x_Q = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_3\",)(x_Q)\n",
+    "        x_Q = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_4\",)(x_Q)\n",
+    "\n",
+    "        output_Q = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q)\n",
+    "        self.Q = keras.Model(inputs=input_Q, outputs = output_Q)\n",
+    "        self.Q.compile(optimizer=optimizer_Q, loss=[\"mse\"])\n",
+    "\n",
+    "        #the target Q-network (the prime one)\n",
+    "\n",
+    "        initializer_Q_t = tf.keras.initializers.GlorotNormal()\n",
+    "        optimizer_Q_t = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
+    "        identity_Q_t = tf.keras.initializers.Identity()\n",
+    "\n",
+    "        input_Q_t = tf.keras.layers.Input(shape=(df), name=\"state\")\n",
+    "        x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_1\",)(input_Q_t)\n",
+    "        x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_2\",)(x_Q_t)\n",
+    "        x_Q_t = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_3\",)(x_Q_t)\n",
+    "        x_Q_t = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_4\",)(x_Q_t)\n",
+    "\n",
+    "        output_Q_t = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q_t)\n",
+    "        self.Q_t = keras.Model(inputs=input_Q_t, outputs = output_Q_t)\n",
+    "        self.Q_t.compile(optimizer=optimizer_Q_t, loss=[\"mse\"])\n",
+    "    \n",
+    "        self.gamma = gamma\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    def prepare_learning_materials(self, action, Env):\n",
+    "        '''\n",
+    "        Creating the y vector for learning.\n",
+    "        The y vector should be (but it is not)\n",
+    "        y(s,a) := r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n",
+    "        with  Q_t -- the target Q-function\n",
+    "        \n",
+    "        Instead the output is Q(s) where Q is actually a network which return values of all actions:\n",
+    "        Q(s) returns a vector of size action\n",
+    "        Therefor the returned object is the Q(s) where only its a-th element is modified to\n",
+    "        r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n",
+    "        Keyword arguments:\n",
+    "        action -- a list of action\n",
+    "        Env -- the environment\n",
+    "        returns:\n",
+    "        y vector\n",
+    "        '''\n",
+    "\n",
+    "        debug = False\n",
+    "\n",
+    "        if debug:\n",
+    "            import pdb; pdb.set_trace()\n",
+    "\n",
+    "        nr_samples = len(action)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bedfba6c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000..ce5f38a
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f173440",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}