From 9109281774d3fb0796f6b8d053774ec4d3ec3ef0 Mon Sep 17 00:00:00 2001 From: nkorra Date: Mon, 28 Mar 2022 21:17:22 +0200 Subject: [PATCH 1/2] changes updated --- Network_Routing_Env_update.ipynb | 621 +++++++++++++++++++++++++++++++ 1 file changed, 621 insertions(+) create mode 100644 Network_Routing_Env_update.ipynb diff --git a/Network_Routing_Env_update.ipynb b/Network_Routing_Env_update.ipynb new file mode 100644 index 0000000..bade1d5 --- /dev/null +++ b/Network_Routing_Env_update.ipynb @@ -0,0 +1,621 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bc34563a", + "metadata": {}, + "source": [ + "## Environment with all funcions and hot encoding" + ] + }, + { + "cell_type": "code", + "execution_count": 305, + "id": "b508c735-59ea-4955-b110-f5c10fd88408", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 377, + "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395", + "metadata": {}, + "outputs": [], + "source": [ + "class NetworkEnv():\n", + " # metadata = {'render.modes': ['human']}\n", + "\n", + " def __init__(self):\n", + " super(NetworkEnv, self).__init__()\n", + " self.Node_1Q = np.zeros(10).T\n", + " self.Node_2Q = np.zeros(10).T\n", + " self.Node_3Q = np.zeros(10).T\n", + " self.Node_4Q = np.zeros(10).T\n", + " self.Node_5Q = np.zeros(10).T\n", + " self.state_obseravation = np.zeros([10,5])\n", + " self.action = np.zeros(5)\n", + " self.N1_max_actions = 2\n", + " self.N2_max_actions = 3\n", + " self.N3_max_actions = 3\n", + " self.N4_max_actions = 3\n", + " self.N5_max_actions = 3\n", + " self.done = False\n", + " self.episode_length = 0\n", + "\n", + " def reset(self,df):\n", + " # Reset the state of the environment to an initial state\n", + " self.Node_1Q = np.array(df.loc[:,\"N1Q\"])\n", + " self.Node_2Q = np.array(df.loc[:,\"N2Q\"])\n", + " self.Node_3Q = np.array(df.loc[:,\"N3Q\"])\n", + " self.Node_4Q = np.array(df.loc[:,\"N4Q\"])\n", + " self.Node_5Q = np.array(df.loc[:,\"N5Q\"])\n", + " self.state_observation = np.array([self.Node_1Q, self.Node_2Q, self.Node_3Q, self.Node_4Q, self.Node_5Q]).T\n", + " self.reward=0\n", + " self.done = False\n", + " \n", + " def random_action(self):\n", + " raction_1 = np.random.randint(self.N1_max_actions + 1)\n", + " raction_2 = np.random.randint(self.N2_max_actions + 1)\n", + " raction_3 = np.random.randint(self.N3_max_actions + 1)\n", + " raction_4 = np.random.randint(self.N4_max_actions + 1)\n", + " raction_5 = np.random.randint(self.N5_max_actions + 1)\n", + " random_action = np.array([raction_1, raction_2, raction_3, raction_4, raction_5])\n", + "\n", + " return random_action\n", + " \n", + "\n", + "\n", + " def step(self, action):\n", + " # Execute one time step within the environment\n", + " if self.state_observation == np.zeros([5,10]):\n", + " self.done = True\n", + " return self.state_observation, self.reward, self.done, self.episode_length\n", + " elif self.episode_length > 200:\n", + " self.done = True\n", + " return self.state_observation, self.reward, self.done, self.episode_length\n", + "\n", + " self.action = action\n", + " self.state_observation, self.reward = self.take_action()\n", + " self.episode_length += 1\n", + " if(self.episode_length >= 200):\n", + " self.done = True\n", + "\n", + " return self.state_observation, self.reward, self.done, self.episode_length\n", + " \n", + " def random_step(self):\n", + " self.action = self.random_action()\n", + " print(self.action)\n", + " self.state_observation, self.reward = self.take_action()\n", + " return self.state_observation, self.reward, self.done #self.episode_length\n", + " \n", + "\n", + "\n", + " def qcontrol(self, l, size, filler): ## Functon used after each timestep to keep a constant queuee size \n", + " length = len(l)\n", + " if length>size:\n", + " return l[:size]\n", + " elif length Date: Mon, 28 Mar 2022 21:28:01 +0200 Subject: [PATCH 2/2] changes to update --- .../Network_Routing_Env_1-checkpoint.ipynb | 466 +++++++++++++ ...etwork_Routing_Env_update-checkpoint.ipynb | 621 ++++++++++++++++++ .../RL_Network_Routing_agent-checkpoint.ipynb | 171 +++++ .ipynb_checkpoints/Untitled-checkpoint.ipynb | 6 + NetRouting | 1 + Network_Routing_Env_1.ipynb | 153 ++++- Network_Routing_Env_1.py | 301 +++++++++ RL_Network_Routing_agent.ipynb | 171 +++++ Untitled.ipynb | 33 + 9 files changed, 1892 insertions(+), 31 deletions(-) create mode 100644 .ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/Network_Routing_Env_update-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/RL_Network_Routing_agent-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb create mode 160000 NetRouting create mode 100644 Network_Routing_Env_1.py create mode 100644 RL_Network_Routing_agent.ipynb create mode 100644 Untitled.ipynb diff --git a/.ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb b/.ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb new file mode 100644 index 0000000..a1afe49 --- /dev/null +++ b/.ipynb_checkpoints/Network_Routing_Env_1-checkpoint.ipynb @@ -0,0 +1,466 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "323b62d7", + "metadata": {}, + "source": [ + "### This is updated file with hot encoding" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b508c735-59ea-4955-b110-f5c10fd88408", + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "from gym import spaces\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "import os, sys" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395", + "metadata": {}, + "outputs": [], + "source": [ + "class WanEnv(gym.Env):\n", + " \"\"\"Custom Environment that follows gym interface\"\"\"\n", + " metadata = {'render.modes': ['human']}\n", + "\n", + " def __init__(self, df):\n", + " super(WanEnv, self).__init__()\n", + " # Define action and observation space\n", + " # They must be gym.spaces objects\n", + " # Example when using discrete actions:\n", + " self.action_space = spaces.MultiDiscrete([2,3,3,3,3])\n", + " # Example for using image as input:\n", + " self.observation_space = spaces.dict({\n", + " 'N1Q' : MultiDiscrete[5,5,5,5,5],\n", + " 'N2Q' : MultiDiscrete[5,5,5,5,5],\n", + " 'N3Q' : MultiDiscrete[5,5,5,5,5],\n", + " 'N4Q' : MultiDiscrete[5,5,5,5,5],\n", + " 'N5Q' : MultiDiscrete[5,5,5,5,5]}\n", + " )\n", + " #(low=0, high=255, shape=\n", + " # (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)\n", + "\n", + " def step(self, action):\n", + " # Execute one time step within the environment\n", + " self.take_action(action)\n", + " reward, ob = self.take_action(action)\n", + " return ob, reward\n", + " \n", + " def take_action(self, action):\n", + " self.episode_over = self.backend.switch_link(action)\n", + " \n", + " self.ticks += 1\n", + " tmp = []\n", + "\n", + " # check if episode ended by ERROR, then mark it in 'info'\n", + " if self.episode_over:\n", + " logging.info ('Episode ended by ERROR')\n", + " self.info['exit_status'] = 'ERROR'\n", + "\n", + " # else Stop if max ticks over\n", + " elif self.ticks == self.MAX_TICKS:\n", + " logging.info ('Max ticks over, ending episode')\n", + " self.episode_over = True\n", + " self.info['exit_status'] = 'NORMAL'\n", + " \n", + " def reset(self):\n", + " # Reset the state of the environment to an initial state\n", + " self.N1Q = df.loc[:,[\"N1Q\"]]\n", + " self.N2Q = df.loc[:,[\"N2Q\"]]\n", + " self.N3Q = df.loc[:,[\"N3Q\"]]\n", + " self.N4Q = df.loc[:,[\"N4Q\"]]\n", + " self.N5Q = df.loc[:,[\"N5Q\"]]\n", + " self.reward=0\n", + " self.done = False\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9fcafdb0-96bb-4d2f-8a81-3cb28df4eabc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " N1Q N2Q N3Q N4Q N5Q\n", + "0 4 4 1 2 3\n", + "1 2 5 1 3 1\n", + "2 0 0 0 0 4\n", + "3 0 0 0 0 0\n", + "4 0 0 0 0 5\n", + "[[4 4 1 2 3]\n", + " [2 5 1 3 1]\n", + " [0 0 0 0 4]\n", + " [0 0 0 0 0]\n", + " [0 0 0 0 5]]\n", + "[2, 1, 0, 3, 0]\n" + ] + } + ], + "source": [ + "data = np.array([[4,4,1,2,3],[2,5,1,3,1],[0,0,0,0,4],[0,0,0,0,0],[0,0,0,0,5]])\n", + "action = [2,1,0,3,0]\n", + "df = pd.DataFrame(data, columns=[\"N1Q\",\"N2Q\",\"N3Q\",\"N4Q\",\"N5Q\"])\n", + "print(df)\n", + "print(data)\n", + "print(action)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "11906017-26b6-4518-a68e-b4ca28be98fa", + "metadata": {}, + "outputs": [], + "source": [ + "def qcontrol(l, size, filler): ## Functon used after each timestep to keep a constant queuee size \n", + " length = len(l)\n", + " if length>size:\n", + " return l[:5]\n", + " elif length 200:\n", + " self.done = True\n", + " return self.state_observation, self.reward, self.done, self.episode_length\n", + "\n", + " self.action = action\n", + " self.state_observation, self.reward = self.take_action()\n", + " self.episode_length += 1\n", + " if(self.episode_length >= 200):\n", + " self.done = True\n", + "\n", + " return self.state_observation, self.reward, self.done, self.episode_length\n", + " \n", + " def random_step(self):\n", + " self.action = self.random_action()\n", + " print(self.action)\n", + " self.state_observation, self.reward = self.take_action()\n", + " return self.state_observation, self.reward, self.done #self.episode_length\n", + " \n", + "\n", + "\n", + " def qcontrol(self, l, size, filler): ## Functon used after each timestep to keep a constant queuee size \n", + " length = len(l)\n", + " if length>size:\n", + " return l[:size]\n", + " elif length\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mNetwork_Routing_Env1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnew_state\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Network_Routing_Env1'" + ] + } + ], + "source": [ + "import numpy as np\n", + "# import keras.backend.tensorflow_backend as backend\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten\n", + "from keras.callbacks import TensorBoard\n", + "from tensorflow.keras.optimizers import Adam\n", + "from collections import deque\n", + "import tensorflow as tf\n", + "from collections import deque\n", + "import time\n", + "import random\n", + "from tqdm import tqdm\n", + "import os\n", + "import sys\n", + "# import PIL import Image\n", + "import cv2\n", + "import pdb\n", + "\n", + "from Network_Routing_Env1 import new_state\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1bd0bff", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "ca359cdc", + "metadata": {}, + "source": [ + "### Createating class Agent for the model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e3608684", + "metadata": {}, + "outputs": [], + "source": [ + "class agent_learner:\n", + " \n", + " def __init__(self, df, action, gamma=0.99, stddev=0.2, learning_rate=0.0001):\n", + " #initializing Q nets\n", + " # Q networks\n", + " Q_initializer = tf.keras.initializers.GlorotNormal()\n", + " optimizer_Q = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n", + " identity_Q = tf.keras.initializers.Identity()\n", + "\n", + " input_Q = tf.keras.layers.Input(shape=(df), name=\"state\")\n", + " x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_1\",)(input_Q)\n", + " x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_2\",)(x_Q)\n", + " x_Q = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_3\",)(x_Q)\n", + " x_Q = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_4\",)(x_Q)\n", + "\n", + " output_Q = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q)\n", + " self.Q = keras.Model(inputs=input_Q, outputs = output_Q)\n", + " self.Q.compile(optimizer=optimizer_Q, loss=[\"mse\"])\n", + "\n", + " #the target Q-network (the prime one)\n", + "\n", + " initializer_Q_t = tf.keras.initializers.GlorotNormal()\n", + " optimizer_Q_t = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n", + " identity_Q_t = tf.keras.initializers.Identity()\n", + "\n", + " input_Q_t = tf.keras.layers.Input(shape=(df), name=\"state\")\n", + " x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_1\",)(input_Q_t)\n", + " x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_2\",)(x_Q_t)\n", + " x_Q_t = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_3\",)(x_Q_t)\n", + " x_Q_t = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_4\",)(x_Q_t)\n", + "\n", + " output_Q_t = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q_t)\n", + " self.Q_t = keras.Model(inputs=input_Q_t, outputs = output_Q_t)\n", + " self.Q_t.compile(optimizer=optimizer_Q_t, loss=[\"mse\"])\n", + " \n", + " self.gamma = gamma\n", + " \n", + " \n", + " \n", + " \n", + " def prepare_learning_materials(self, action, Env):\n", + " '''\n", + " Creating the y vector for learning.\n", + " The y vector should be (but it is not)\n", + " y(s,a) := r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n", + " with Q_t -- the target Q-function\n", + " \n", + " Instead the output is Q(s) where Q is actually a network which return values of all actions:\n", + " Q(s) returns a vector of size action\n", + " Therefor the returned object is the Q(s) where only its a-th element is modified to\n", + " r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n", + " Keyword arguments:\n", + " action -- a list of action\n", + " Env -- the environment\n", + " returns:\n", + " y vector\n", + " '''\n", + "\n", + " debug = False\n", + "\n", + " if debug:\n", + " import pdb; pdb.set_trace()\n", + "\n", + " nr_samples = len(action)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbe8c06e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/NetRouting b/NetRouting new file mode 160000 index 0000000..f28e5cb --- /dev/null +++ b/NetRouting @@ -0,0 +1 @@ +Subproject commit f28e5cb9a6b13f81aacc88e38e8fea0c7c075f7c diff --git a/Network_Routing_Env_1.ipynb b/Network_Routing_Env_1.ipynb index 739136d..a1afe49 100644 --- a/Network_Routing_Env_1.ipynb +++ b/Network_Routing_Env_1.ipynb @@ -1,8 +1,16 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "323b62d7", + "metadata": {}, + "source": [ + "### This is updated file with hot encoding" + ] + }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 1, "id": "b508c735-59ea-4955-b110-f5c10fd88408", "metadata": {}, "outputs": [], @@ -10,12 +18,14 @@ "import gym\n", "from gym import spaces\n", "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "import os, sys" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 2, "id": "b8b3144c-1d05-4e33-afb0-edf7d6fdc395", "metadata": {}, "outputs": [], @@ -78,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 3, "id": "9fcafdb0-96bb-4d2f-8a81-3cb28df4eabc", "metadata": {}, "outputs": [ @@ -88,21 +98,21 @@ "text": [ " N1Q N2Q N3Q N4Q N5Q\n", "0 4 4 1 2 3\n", - "1 2 5 1 5 1\n", - "2 0 0 0 0 0\n", + "1 2 5 1 3 1\n", + "2 0 0 0 0 4\n", "3 0 0 0 0 0\n", - "4 0 0 0 0 0\n", + "4 0 0 0 0 5\n", "[[4 4 1 2 3]\n", - " [2 5 1 5 1]\n", - " [0 0 0 0 0]\n", + " [2 5 1 3 1]\n", + " [0 0 0 0 4]\n", " [0 0 0 0 0]\n", - " [0 0 0 0 0]]\n", + " [0 0 0 0 5]]\n", "[2, 1, 0, 3, 0]\n" ] } ], "source": [ - "data = np.array([[4,4,1,2,3],[2,5,1,5,1],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]])\n", + "data = np.array([[4,4,1,2,3],[2,5,1,3,1],[0,0,0,0,4],[0,0,0,0,0],[0,0,0,0,5]])\n", "action = [2,1,0,3,0]\n", "df = pd.DataFrame(data, columns=[\"N1Q\",\"N2Q\",\"N3Q\",\"N4Q\",\"N5Q\"])\n", "print(df)\n", @@ -112,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 4, "id": "11906017-26b6-4518-a68e-b4ca28be98fa", "metadata": {}, "outputs": [], @@ -131,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 5, "id": "365940b7-97cb-48bd-9d11-df414159a0f2", "metadata": {}, "outputs": [], @@ -272,13 +282,44 @@ " q4=qcontrol(q4,5,0)\n", " q5=qcontrol(q5,5,0)\n", " new_state = np.array([q1,q2,q3,q4,q5]).T ## reassembling o\n", + " \n", + " q1 =np.array(q1)\n", + " ohq1 = np.zeros((q1.shape[0], q1.shape[0]+1))\n", + " ohq1[np.arange(q1.size),q1]=1\n", + "# print(ohq1)\n", + "\n", + " q2 =np.array(q2)\n", + " ohq2 = np.zeros((q2.shape[0], q2.shape[0]+1))\n", + " ohq2[np.arange(q2.size),q2]=1\n", + "# print(ohq2)\n", + "\n", + "\n", + " \n", + " q3 =np.array(q3)\n", + " ohq3 = np.zeros((q3.shape[0], q3.shape[0]+1))\n", + " ohq3[np.arange(q3.size),q3]=1\n", + " \n", + "# print(ohq3)\n", + " \n", + " q4 =np.array(q4)\n", + " ohq4 = np.zeros((q4.shape[0], q4.shape[0]+1))\n", + " ohq4[np.arange(q4.size),q4]=1\n", + " \n", + "# print(ohq4)\n", + " \n", + " q5 =np.array(q5)\n", + " ohq5 = np.zeros((q5.shape[0], q5.shape[0]+1))\n", + " ohq5[np.arange(q5.size),q5]=1\n", + " \n", + " ns=np.dstack((ohq1,ohq2,ohq3,ohq4,ohq5))\n", + " \n", " new_state_pandas = pd.DataFrame(new_state, columns = [\"NQ1\",\"NQ2\",\"NQ3\",\"NQ4\",\"NQ%\"])\n", - " return new_state_pandas, reward\n" + " return new_state_pandas, reward, ns\n" ] }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 6, "id": "f88b4ee8-463d-4c9d-8ad7-8d6f9ad5abfa", "metadata": {}, "outputs": [ @@ -288,30 +329,65 @@ "text": [ " N1Q N2Q N3Q N4Q N5Q\n", "0 4 4 1 2 3\n", - "1 2 5 1 5 1\n", - "2 0 0 0 0 0\n", + "1 2 5 1 3 1\n", + "2 0 0 0 0 4\n", "3 0 0 0 0 0\n", - "4 0 0 0 0 0\n", + "4 0 0 0 0 5\n", "[2, 1, 0, 3, 0]\n", " N1Q N2Q N3Q N4Q N5Q\n", "0 4 4 1 2 3\n", - "1 2 5 1 5 1\n", - "2 0 0 0 0 0\n", + "1 2 5 1 3 1\n", + "2 0 0 0 0 4\n", "3 0 0 0 0 0\n", - "4 0 0 0 0 0\n", - "[2, 1, 0, 3, 0]\n" + "4 0 0 0 0 5\n", + "[2, 1, 0, 3, 0]\n", + "[[[0. 0. 0. 0. 0.]\n", + " [0. 0. 1. 0. 0.]\n", + " [1. 0. 0. 0. 0.]\n", + " [0. 0. 0. 1. 1.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0.]]\n", + "\n", + " [[1. 1. 0. 1. 0.]\n", + " [0. 0. 1. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1.]\n", + " [0. 0. 0. 0. 0.]]\n", + "\n", + " [[1. 1. 1. 1. 0.]\n", + " [0. 0. 0. 0. 1.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]\n", + "\n", + " [[1. 1. 1. 1. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1.]\n", + " [0. 0. 0. 0. 0.]]\n", + "\n", + " [[1. 1. 1. 1. 1.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]]\n" ] } ], "source": [ "print(df)\n", "print(action)\n", - "new_state, reward = take_action(df,action)" + "new_state, reward, ns = take_action(df,action)\n", + "print(ns)" ] }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 7, "id": "9e97ffdf-ab2c-48bd-bf06-adb8f8f878d4", "metadata": {}, "outputs": [ @@ -320,10 +396,10 @@ "output_type": "stream", "text": [ " NQ1 NQ2 NQ3 NQ4 NQ%\n", - "0 2 5 1 5 3\n", + "0 2 5 1 3 3\n", "1 0 0 1 0 4\n", "2 0 0 0 0 1\n", - "3 0 0 0 0 0\n", + "3 0 0 0 0 4\n", "4 0 0 0 0 0\n", "19\n" ] @@ -336,16 +412,31 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "e392aa28", + "execution_count": 8, + "id": "c3237310", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0.]\n", + " [0. 0. 1. 0. 0.]\n", + " [1. 0. 0. 0. 0.]\n", + " [0. 0. 0. 1. 1.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "print(ns[0,:,:])" + ] }, { "cell_type": "code", "execution_count": null, - "id": "dd28deae", + "id": "041749aa", "metadata": {}, "outputs": [], "source": [] diff --git a/Network_Routing_Env_1.py b/Network_Routing_Env_1.py new file mode 100644 index 0000000..3dfd537 --- /dev/null +++ b/Network_Routing_Env_1.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python +# coding: utf-8 + +# ### This is updated file with hot encoding + +# In[1]: + + +import gym +from gym import spaces +import pandas as pd +import numpy as np +from sklearn.preprocessing import OneHotEncoder +import os, sys + + +# In[2]: + + +class WanEnv(gym.Env): + """Custom Environment that follows gym interface""" + metadata = {'render.modes': ['human']} + + def __init__(self, df): + super(WanEnv, self).__init__() + # Define action and observation space + # They must be gym.spaces objects + # Example when using discrete actions: + self.action_space = spaces.MultiDiscrete([2,3,3,3,3]) + # Example for using image as input: + self.observation_space = spaces.dict({ + 'N1Q' : MultiDiscrete[5,5,5,5,5], + 'N2Q' : MultiDiscrete[5,5,5,5,5], + 'N3Q' : MultiDiscrete[5,5,5,5,5], + 'N4Q' : MultiDiscrete[5,5,5,5,5], + 'N5Q' : MultiDiscrete[5,5,5,5,5]} + ) + #(low=0, high=255, shape= + # (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8) + + def step(self, action): + # Execute one time step within the environment + self.take_action(action) + reward, ob = self.take_action(action) + return ob, reward + + def take_action(self, action): + self.episode_over = self.backend.switch_link(action) + + self.ticks += 1 + tmp = [] + + # check if episode ended by ERROR, then mark it in 'info' + if self.episode_over: + logging.info ('Episode ended by ERROR') + self.info['exit_status'] = 'ERROR' + + # else Stop if max ticks over + elif self.ticks == self.MAX_TICKS: + logging.info ('Max ticks over, ending episode') + self.episode_over = True + self.info['exit_status'] = 'NORMAL' + + def reset(self): + # Reset the state of the environment to an initial state + self.N1Q = df.loc[:,["N1Q"]] + self.N2Q = df.loc[:,["N2Q"]] + self.N3Q = df.loc[:,["N3Q"]] + self.N4Q = df.loc[:,["N4Q"]] + self.N5Q = df.loc[:,["N5Q"]] + self.reward=0 + self.done = False + + +# In[3]: + + +data = np.array([[4,4,1,2,3],[2,5,1,3,1],[0,0,0,0,4],[0,0,0,0,0],[0,0,0,0,5]]) +action = [2,1,0,3,0] +df = pd.DataFrame(data, columns=["N1Q","N2Q","N3Q","N4Q","N5Q"]) +print(df) +print(data) +print(action) + + +# In[4]: + + +def qcontrol(l, size, filler): ## Functon used after each timestep to keep a constant queuee size + length = len(l) + if length>size: + return l[:5] + elif length\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mNetwork_Routing_Env1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Network_Routing_Env1'" + ] + } + ], + "source": [ + "import numpy as np\n", + "# import keras.backend.tensorflow_backend as backend\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten\n", + "from keras.callbacks import TensorBoard\n", + "from tensorflow.keras.optimizers import Adam\n", + "from collections import deque\n", + "import tensorflow as tf\n", + "from collections import deque\n", + "import time\n", + "import random\n", + "from tqdm import tqdm\n", + "import os\n", + "import sys\n", + "# import PIL import Image\n", + "import cv2\n", + "import pdb\n", + "\n", + "from Network_Routing_Env1 import * \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1bd0bff", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "ca359cdc", + "metadata": {}, + "source": [ + "### Createating class Agent for the model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e3608684", + "metadata": {}, + "outputs": [], + "source": [ + "class agent_learner:\n", + " \n", + " def __init__(self, df, action, gamma=0.99, stddev=0.2, learning_rate=0.0001):\n", + " #initializing Q nets\n", + " # Q networks\n", + " Q_initializer = tf.keras.initializers.GlorotNormal()\n", + " optimizer_Q = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n", + " identity_Q = tf.keras.initializers.Identity()\n", + "\n", + " input_Q = tf.keras.layers.Input(shape=(df), name=\"state\")\n", + " x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_1\",)(input_Q)\n", + " x_Q = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_2\",)(x_Q)\n", + " x_Q = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_3\",)(x_Q)\n", + " x_Q = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q, name=\"relu_dense_Q_4\",)(x_Q)\n", + "\n", + " output_Q = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q)\n", + " self.Q = keras.Model(inputs=input_Q, outputs = output_Q)\n", + " self.Q.compile(optimizer=optimizer_Q, loss=[\"mse\"])\n", + "\n", + " #the target Q-network (the prime one)\n", + "\n", + " initializer_Q_t = tf.keras.initializers.GlorotNormal()\n", + " optimizer_Q_t = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n", + " identity_Q_t = tf.keras.initializers.Identity()\n", + "\n", + " input_Q_t = tf.keras.layers.Input(shape=(df), name=\"state\")\n", + " x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_1\",)(input_Q_t)\n", + " x_Q_t = layers.Dense(64, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_2\",)(x_Q_t)\n", + " x_Q_t = layers.Dense(32, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_3\",)(x_Q_t)\n", + " x_Q_t = layers.Dense(16, activation=\"relu\", kernel_initializer=initializer_Q_t, name=\"relu_dense_Qt_4\",)(x_Q_t)\n", + "\n", + " output_Q_t = layers.Dense(action, activation=\"relu\", kernel_initializer=initializer_Q, name=\"Q_value\",)(x_Q_t)\n", + " self.Q_t = keras.Model(inputs=input_Q_t, outputs = output_Q_t)\n", + " self.Q_t.compile(optimizer=optimizer_Q_t, loss=[\"mse\"])\n", + " \n", + " self.gamma = gamma\n", + " \n", + " \n", + " \n", + " \n", + " def prepare_learning_materials(self, action, Env):\n", + " '''\n", + " Creating the y vector for learning.\n", + " The y vector should be (but it is not)\n", + " y(s,a) := r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n", + " with Q_t -- the target Q-function\n", + " \n", + " Instead the output is Q(s) where Q is actually a network which return values of all actions:\n", + " Q(s) returns a vector of size action\n", + " Therefor the returned object is the Q(s) where only its a-th element is modified to\n", + " r(s,a) + (1 - done) * gamma * Q_t(s', argmax_a'(Q(s',a'))\n", + " Keyword arguments:\n", + " action -- a list of action\n", + " Env -- the environment\n", + " returns:\n", + " y vector\n", + " '''\n", + "\n", + " debug = False\n", + "\n", + " if debug:\n", + " import pdb; pdb.set_trace()\n", + "\n", + " nr_samples = len(action)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bedfba6c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..ce5f38a --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6f173440", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}